Spaces:

all-things-vits
/

comparative-explainability

Running

App Files Files Community

sayakpaul HF Staff

Hila commited on Jun 12, 2023

Commit

c4b2b37

1 Parent(s): 7f3e838

add files

Browse files

Co-authored-by: Hila <[email protected]>

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +2 -2
Transformer-Explainability/BERT_explainability.ipynb +581 -0
Transformer-Explainability/BERT_explainability/modules/BERT/BERT.py +748 -0
Transformer-Explainability/BERT_explainability/modules/BERT/BERT_cls_lrp.py +240 -0
Transformer-Explainability/BERT_explainability/modules/BERT/BERT_orig_lrp.py +748 -0
Transformer-Explainability/BERT_explainability/modules/BERT/BertForSequenceClassification.py +241 -0
Transformer-Explainability/BERT_explainability/modules/BERT/ExplanationGenerator.py +165 -0
Transformer-Explainability/BERT_explainability/modules/__init__.py +0 -0
Transformer-Explainability/BERT_explainability/modules/layers_lrp.py +352 -0
Transformer-Explainability/BERT_explainability/modules/layers_ours.py +373 -0
Transformer-Explainability/BERT_params/boolq.json +26 -0
Transformer-Explainability/BERT_params/boolq_baas.json +26 -0
Transformer-Explainability/BERT_params/boolq_bert.json +32 -0
Transformer-Explainability/BERT_params/boolq_soft.json +21 -0
Transformer-Explainability/BERT_params/cose_bert.json +30 -0
Transformer-Explainability/BERT_params/cose_multiclass.json +35 -0
Transformer-Explainability/BERT_params/esnli_bert.json +28 -0
Transformer-Explainability/BERT_params/evidence_inference.json +26 -0
Transformer-Explainability/BERT_params/evidence_inference_bert.json +33 -0
Transformer-Explainability/BERT_params/evidence_inference_soft.json +22 -0
Transformer-Explainability/BERT_params/fever.json +26 -0
Transformer-Explainability/BERT_params/fever_baas.json +25 -0
Transformer-Explainability/BERT_params/fever_bert.json +32 -0
Transformer-Explainability/BERT_params/fever_soft.json +21 -0
Transformer-Explainability/BERT_params/movies.json +26 -0
Transformer-Explainability/BERT_params/movies_baas.json +26 -0
Transformer-Explainability/BERT_params/movies_bert.json +32 -0
Transformer-Explainability/BERT_params/movies_soft.json +21 -0
Transformer-Explainability/BERT_params/multirc.json +26 -0
Transformer-Explainability/BERT_params/multirc_baas.json +26 -0
Transformer-Explainability/BERT_params/multirc_bert.json +32 -0
Transformer-Explainability/BERT_params/multirc_soft.json +21 -0
Transformer-Explainability/BERT_rationale_benchmark/__init__.py +0 -0
Transformer-Explainability/BERT_rationale_benchmark/metrics.py +1007 -0
Transformer-Explainability/BERT_rationale_benchmark/models/model_utils.py +186 -0
Transformer-Explainability/BERT_rationale_benchmark/models/pipeline/__init__.py +0 -0
Transformer-Explainability/BERT_rationale_benchmark/models/pipeline/bert_pipeline.py +852 -0
Transformer-Explainability/BERT_rationale_benchmark/models/pipeline/pipeline_train.py +235 -0
Transformer-Explainability/BERT_rationale_benchmark/models/pipeline/pipeline_utils.py +1045 -0
Transformer-Explainability/BERT_rationale_benchmark/models/sequence_taggers.py +78 -0
Transformer-Explainability/BERT_rationale_benchmark/utils.py +251 -0
Transformer-Explainability/DeiT.PNG +0 -0
Transformer-Explainability/DeiT_example.ipynb +0 -0
Transformer-Explainability/LICENSE +21 -0
Transformer-Explainability/README.md +153 -0
Transformer-Explainability/Transformer_explainability.ipynb +0 -0
Transformer-Explainability/baselines/ViT/ViT_LRP.py +535 -0
Transformer-Explainability/baselines/ViT/ViT_explanation_generator.py +107 -0
Transformer-Explainability/baselines/ViT/ViT_new.py +329 -0
Transformer-Explainability/baselines/ViT/ViT_orig_LRP.py +508 -0

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: Comparative Explainability
-emoji: 📈
 colorFrom: red
-colorTo: indigo
 sdk: gradio
 sdk_version: 3.34.0
 app_file: app.py

 ---
 title: Comparative Explainability
+emoji: 🏆
 colorFrom: red
+colorTo: gray
 sdk: gradio
 sdk_version: 3.34.0
 app_file: app.py

Transformer-Explainability/BERT_explainability.ipynb ADDED Viewed

	@@ -0,0 +1,581 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "BERT-explainability.ipynb",
+      "provenance": [],
+      "authorship_tag": "ABX9TyOm8dIRrumd5XNcc+fntVA5",
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/hila-chefer/Transformer-Explainability/blob/main/BERT_explainability.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "YCdGaMuy56TA",
+        "outputId": "8f802262-55eb-4366-b772-89c4756224b3"
+      },
+      "source": [
+        "!git clone https://github.com/hila-chefer/Transformer-Explainability.git\n",
+        "\n",
+        "import os\n",
+        "os.chdir(f'./Transformer-Explainability')\n",
+        "\n",
+        "!pip install -r requirements.txt\n",
+        "!pip install captum"
+      ],
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "fatal: destination path 'Transformer-Explainability' already exists and is not an empty directory.\n",
+            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+            "Requirement already satisfied: Pillow>=8.1.1 in /usr/local/lib/python3.8/dist-packages (from -r requirements.txt (line 1)) (9.4.0)\n",
+            "Requirement already satisfied: einops==0.3.0 in /usr/local/lib/python3.8/dist-packages (from -r requirements.txt (line 2)) (0.3.0)\n",
+            "Requirement already satisfied: h5py==2.8.0 in /usr/local/lib/python3.8/dist-packages (from -r requirements.txt (line 3)) (2.8.0)\n",
+            "Requirement already satisfied: imageio==2.9.0 in /usr/local/lib/python3.8/dist-packages (from -r requirements.txt (line 4)) (2.9.0)\n",
+            "Collecting matplotlib==3.3.2\n",
+            "  Using cached matplotlib-3.3.2-cp38-cp38-manylinux1_x86_64.whl (11.6 MB)\n",
+            "Requirement already satisfied: opencv_python in /usr/local/lib/python3.8/dist-packages (from -r requirements.txt (line 6)) (4.6.0.66)\n",
+            "Requirement already satisfied: scikit_image==0.17.2 in /usr/local/lib/python3.8/dist-packages (from -r requirements.txt (line 7)) (0.17.2)\n",
+            "Requirement already satisfied: scipy==1.5.2 in /usr/local/lib/python3.8/dist-packages (from -r requirements.txt (line 8)) (1.5.2)\n",
+            "Requirement already satisfied: sklearn in /usr/local/lib/python3.8/dist-packages (from -r requirements.txt (line 9)) (0.0.post1)\n",
+            "Requirement already satisfied: torch==1.7.0 in /usr/local/lib/python3.8/dist-packages (from -r requirements.txt (line 10)) (1.7.0)\n",
+            "Requirement already satisfied: torchvision==0.8.1 in /usr/local/lib/python3.8/dist-packages (from -r requirements.txt (line 11)) (0.8.1)\n",
+            "Requirement already satisfied: tqdm==4.51.0 in /usr/local/lib/python3.8/dist-packages (from -r requirements.txt (line 12)) (4.51.0)\n",
+            "Requirement already satisfied: transformers==3.5.1 in /usr/local/lib/python3.8/dist-packages (from -r requirements.txt (line 13)) (3.5.1)\n",
+            "Requirement already satisfied: utils==1.0.1 in /usr/local/lib/python3.8/dist-packages (from -r requirements.txt (line 14)) (1.0.1)\n",
+            "Requirement already satisfied: Pygments>=2.7.4 in /usr/local/lib/python3.8/dist-packages (from -r requirements.txt (line 15)) (2.14.0)\n",
+            "Requirement already satisfied: numpy>=1.7 in /usr/local/lib/python3.8/dist-packages (from h5py==2.8.0->-r requirements.txt (line 3)) (1.21.6)\n",
+            "Requirement already satisfied: six in /usr/local/lib/python3.8/dist-packages (from h5py==2.8.0->-r requirements.txt (line 3)) (1.15.0)\n",
+            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib==3.3.2->-r requirements.txt (line 5)) (1.4.4)\n",
+            "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /usr/local/lib/python3.8/dist-packages (from matplotlib==3.3.2->-r requirements.txt (line 5)) (3.0.9)\n",
+            "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib==3.3.2->-r requirements.txt (line 5)) (2.8.2)\n",
+            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.8/dist-packages (from matplotlib==3.3.2->-r requirements.txt (line 5)) (0.11.0)\n",
+            "Requirement already satisfied: certifi>=2020.06.20 in /usr/local/lib/python3.8/dist-packages (from matplotlib==3.3.2->-r requirements.txt (line 5)) (2022.12.7)\n",
+            "Requirement already satisfied: networkx>=2.0 in /usr/local/lib/python3.8/dist-packages (from scikit_image==0.17.2->-r requirements.txt (line 7)) (3.0)\n",
+            "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.8/dist-packages (from scikit_image==0.17.2->-r requirements.txt (line 7)) (2022.10.10)\n",
+            "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.8/dist-packages (from scikit_image==0.17.2->-r requirements.txt (line 7)) (1.4.1)\n",
+            "Requirement already satisfied: dataclasses in /usr/local/lib/python3.8/dist-packages (from torch==1.7.0->-r requirements.txt (line 10)) (0.6)\n",
+            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.8/dist-packages (from torch==1.7.0->-r requirements.txt (line 10)) (4.4.0)\n",
+            "Requirement already satisfied: future in /usr/local/lib/python3.8/dist-packages (from torch==1.7.0->-r requirements.txt (line 10)) (0.16.0)\n",
+            "Requirement already satisfied: sacremoses in /usr/local/lib/python3.8/dist-packages (from transformers==3.5.1->-r requirements.txt (line 13)) (0.0.53)\n",
+            "Requirement already satisfied: protobuf in /usr/local/lib/python3.8/dist-packages (from transformers==3.5.1->-r requirements.txt (line 13)) (3.19.6)\n",
+            "Requirement already satisfied: filelock in /usr/local/lib/python3.8/dist-packages (from transformers==3.5.1->-r requirements.txt (line 13)) (3.9.0)\n",
+            "Requirement already satisfied: sentencepiece==0.1.91 in /usr/local/lib/python3.8/dist-packages (from transformers==3.5.1->-r requirements.txt (line 13)) (0.1.91)\n",
+            "Requirement already satisfied: packaging in /usr/local/lib/python3.8/dist-packages (from transformers==3.5.1->-r requirements.txt (line 13)) (21.3)\n",
+            "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.8/dist-packages (from transformers==3.5.1->-r requirements.txt (line 13)) (2022.6.2)\n",
+            "Requirement already satisfied: tokenizers==0.9.3 in /usr/local/lib/python3.8/dist-packages (from transformers==3.5.1->-r requirements.txt (line 13)) (0.9.3)\n",
+            "Requirement already satisfied: requests in /usr/local/lib/python3.8/dist-packages (from transformers==3.5.1->-r requirements.txt (line 13)) (2.25.1)\n",
+            "Requirement already satisfied: chardet<5,>=3.0.2 in /usr/local/lib/python3.8/dist-packages (from requests->transformers==3.5.1->-r requirements.txt (line 13)) (4.0.0)\n",
+            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /usr/local/lib/python3.8/dist-packages (from requests->transformers==3.5.1->-r requirements.txt (line 13)) (1.24.3)\n",
+            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.8/dist-packages (from requests->transformers==3.5.1->-r requirements.txt (line 13)) (2.10)\n",
+            "Requirement already satisfied: joblib in /usr/local/lib/python3.8/dist-packages (from sacremoses->transformers==3.5.1->-r requirements.txt (line 13)) (1.2.0)\n",
+            "Requirement already satisfied: click in /usr/local/lib/python3.8/dist-packages (from sacremoses->transformers==3.5.1->-r requirements.txt (line 13)) (7.1.2)\n",
+            "Installing collected packages: matplotlib\n",
+            "  Attempting uninstall: matplotlib\n",
+            "    Found existing installation: matplotlib 3.6.3\n",
+            "    Uninstalling matplotlib-3.6.3:\n",
+            "      Successfully uninstalled matplotlib-3.6.3\n",
+            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+            "fastai 2.7.10 requires torchvision>=0.8.2, but you have torchvision 0.8.1 which is incompatible.\u001b[0m\u001b[31m\n",
+            "\u001b[0mSuccessfully installed matplotlib-3.3.2\n",
+            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+            "Requirement already satisfied: captum in /usr/local/lib/python3.8/dist-packages (0.6.0)\n",
+            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.8/dist-packages (from captum) (3.3.2)\n",
+            "Requirement already satisfied: torch>=1.6 in /usr/local/lib/python3.8/dist-packages (from captum) (1.7.0)\n",
+            "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from captum) (1.21.6)\n",
+            "Requirement already satisfied: future in /usr/local/lib/python3.8/dist-packages (from torch>=1.6->captum) (0.16.0)\n",
+            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.8/dist-packages (from torch>=1.6->captum) (4.4.0)\n",
+            "Requirement already satisfied: dataclasses in /usr/local/lib/python3.8/dist-packages (from torch>=1.6->captum) (0.6)\n",
+            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.8/dist-packages (from matplotlib->captum) (0.11.0)\n",
+            "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib->captum) (9.4.0)\n",
+            "Requirement already satisfied: certifi>=2020.06.20 in /usr/local/lib/python3.8/dist-packages (from matplotlib->captum) (2022.12.7)\n",
+            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib->captum) (1.4.4)\n",
+            "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /usr/local/lib/python3.8/dist-packages (from matplotlib->captum) (3.0.9)\n",
+            "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib->captum) (2.8.2)\n",
+            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.1->matplotlib->captum) (1.15.0)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!pip install captum==0.6.0\n",
+        "!pip install matplotlib==3.3.2"
+      ],
+      "metadata": {
+        "id": "zDPnh4lofcNw",
+        "outputId": "3d585bbc-ff3b-4a09-b5bf-57bb4d46e830",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        }
+      },
+      "execution_count": 9,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+            "Requirement already satisfied: captum==0.6.0 in /usr/local/lib/python3.8/dist-packages (0.6.0)\n",
+            "Requirement already satisfied: torch>=1.6 in /usr/local/lib/python3.8/dist-packages (from captum==0.6.0) (1.7.0)\n",
+            "Requirement already satisfied: numpy in /usr/local/lib/python3.8/dist-packages (from captum==0.6.0) (1.21.6)\n",
+            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.8/dist-packages (from captum==0.6.0) (3.6.3)\n",
+            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.8/dist-packages (from torch>=1.6->captum==0.6.0) (4.4.0)\n",
+            "Requirement already satisfied: future in /usr/local/lib/python3.8/dist-packages (from torch>=1.6->captum==0.6.0) (0.16.0)\n",
+            "Requirement already satisfied: dataclasses in /usr/local/lib/python3.8/dist-packages (from torch>=1.6->captum==0.6.0) (0.6)\n",
+            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib->captum==0.6.0) (1.4.4)\n",
+            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib->captum==0.6.0) (1.0.7)\n",
+            "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib->captum==0.6.0) (9.4.0)\n",
+            "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.8/dist-packages (from matplotlib->captum==0.6.0) (2.8.2)\n",
+            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib->captum==0.6.0) (21.3)\n",
+            "Requirement already satisfied: pyparsing>=2.2.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib->captum==0.6.0) (3.0.9)\n",
+            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib->captum==0.6.0) (4.38.0)\n",
+            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.8/dist-packages (from matplotlib->captum==0.6.0) (0.11.0)\n",
+            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.7->matplotlib->captum==0.6.0) (1.15.0)\n",
+            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
+            "Collecting matplotlib==3.3.2\n",
+            "  Using cached matplotlib-3.3.2-cp38-cp38-manylinux1_x86_64.whl (11.6 MB)\n",
+            "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.8/dist-packages (from matplotlib==3.3.2) (9.4.0)\n",
+            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.8/dist-packages (from matplotlib==3.3.2) (0.11.0)\n",
+            "Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.8/dist-packages (from matplotlib==3.3.2) (1.21.6)\n",
+            "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.3 in /usr/local/lib/python3.8/dist-packages (from matplotlib==3.3.2) (3.0.9)\n",
+            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib==3.3.2) (1.4.4)\n",
+            "Requirement already satisfied: certifi>=2020.06.20 in /usr/local/lib/python3.8/dist-packages (from matplotlib==3.3.2) (2022.12.7)\n",
+            "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.8/dist-packages (from matplotlib==3.3.2) (2.8.2)\n",
+            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.8/dist-packages (from python-dateutil>=2.1->matplotlib==3.3.2) (1.15.0)\n",
+            "Installing collected packages: matplotlib\n",
+            "  Attempting uninstall: matplotlib\n",
+            "    Found existing installation: matplotlib 3.6.3\n",
+            "    Uninstalling matplotlib-3.6.3:\n",
+            "      Successfully uninstalled matplotlib-3.6.3\n",
+            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+            "fastai 2.7.10 requires torchvision>=0.8.2, but you have torchvision 0.8.1 which is incompatible.\u001b[0m\u001b[31m\n",
+            "\u001b[0mSuccessfully installed matplotlib-3.3.2\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "4-XGl_Zw6Aht"
+      },
+      "source": [
+        "from transformers import BertTokenizer\n",
+        "from BERT_explainability.modules.BERT.ExplanationGenerator import Generator\n",
+        "from BERT_explainability.modules.BERT.BertForSequenceClassification import BertForSequenceClassification\n",
+        "from transformers import BertTokenizer\n",
+        "from BERT_explainability.modules.BERT.ExplanationGenerator import Generator\n",
+        "from transformers import AutoTokenizer\n",
+        "\n",
+        "from captum.attr import visualization\n",
+        "import torch"
+      ],
+      "execution_count": 10,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "VakYjrkC6C3S"
+      },
+      "source": [
+        "model = BertForSequenceClassification.from_pretrained(\"textattack/bert-base-uncased-SST-2\").to(\"cuda\")\n",
+        "model.eval()\n",
+        "tokenizer = AutoTokenizer.from_pretrained(\"textattack/bert-base-uncased-SST-2\")\n",
+        "# initialize the explanations generator\n",
+        "explanations = Generator(model)\n",
+        "\n",
+        "classifications = [\"NEGATIVE\", \"POSITIVE\"]\n"
+      ],
+      "execution_count": 11,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jGRp376FPOvV"
+      },
+      "source": [
+        "#Positive sentiment example"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "uSLZtv546H2z",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 219
+        },
+        "outputId": "26712e90-0b77-40b0-a908-fef13dd88bcd"
+      },
+      "source": [
+        "# encode a sentence\n",
+        "text_batch = [\"This movie was the best movie I have ever seen! some scenes were ridiculous, but acting was great.\"]\n",
+        "encoding = tokenizer(text_batch, return_tensors='pt')\n",
+        "input_ids = encoding['input_ids'].to(\"cuda\")\n",
+        "attention_mask = encoding['attention_mask'].to(\"cuda\")\n",
+        "\n",
+        "# true class is positive - 1\n",
+        "true_class = 1\n",
+        "\n",
+        "# generate an explanation for the input\n",
+        "expl = explanations.generate_LRP(input_ids=input_ids, attention_mask=attention_mask, start_layer=0)[0]\n",
+        "# normalize scores\n",
+        "expl = (expl - expl.min()) / (expl.max() - expl.min())\n",
+        "\n",
+        "# get the model classification\n",
+        "output = torch.nn.functional.softmax(model(input_ids=input_ids, attention_mask=attention_mask)[0], dim=-1)\n",
+        "classification = output.argmax(dim=-1).item()\n",
+        "# get class name\n",
+        "class_name = classifications[classification]\n",
+        "# if the classification is negative, higher explanation scores are more negative\n",
+        "# flip for visualization\n",
+        "if class_name == \"NEGATIVE\":\n",
+        "  expl *= (-1)\n",
+        "\n",
+        "tokens = tokenizer.convert_ids_to_tokens(input_ids.flatten())\n",
+        "print([(tokens[i], expl[i].item()) for i in range(len(tokens))])\n",
+        "vis_data_records = [visualization.VisualizationDataRecord(\n",
+        "                                expl,\n",
+        "                                output[0][classification],\n",
+        "                                classification,\n",
+        "                                true_class,\n",
+        "                                true_class,\n",
+        "                                1,       \n",
+        "                                tokens,\n",
+        "                                1)]\n",
+        "visualization.visualize_text(vis_data_records)"
+      ],
+      "execution_count": 12,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[('[CLS]', 0.0), ('this', 0.4267549514770508), ('movie', 0.30920878052711487), ('was', 0.2684089243412018), ('the', 0.33637329936027527), ('best', 0.6280889511108398), ('movie', 0.28546375036239624), ('i', 0.1863601952791214), ('have', 0.10115814208984375), ('ever', 0.1419338583946228), ('seen', 0.1898290067911148), ('!', 0.5944811105728149), ('some', 0.003896803595125675), ('scenes', 0.033401958644390106), ('were', 0.018588582053780556), ('ridiculous', 0.018908796831965446), (',', 0.0), ('but', 0.42920616269111633), ('acting', 0.43855082988739014), ('was', 0.500239372253418), ('great', 1.0), ('.', 0.014817383140325546), ('[SEP]', 0.0868983045220375)]\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "<table width: 100%><div style=\"border-top: 1px solid; margin-top: 5px;             padding-top: 5px; display: inline-block\"><b>Legend: </b><span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 60%)\"></span> Negative  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 100%)\"></span> Neutral  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(120, 75%, 50%)\"></span> Positive  </div><tr><th>True Label</th><th>Predicted Label</th><th>Attribution Label</th><th>Attribution Score</th><th>Word Importance</th><tr><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>1 (1.00)</b></text></td><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>1.00</b></text></td><td><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [CLS]                    </font></mark><mark style=\"background-color: hsl(120, 75%, 79%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> this                    </font></mark><mark style=\"background-color: hsl(120, 75%, 85%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> movie                    </font></mark><mark style=\"background-color: hsl(120, 75%, 87%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> was                    </font></mark><mark style=\"background-color: hsl(120, 75%, 84%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> the                    </font></mark><mark style=\"background-color: hsl(120, 75%, 69%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> best                    </font></mark><mark style=\"background-color: hsl(120, 75%, 86%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> movie                    </font></mark><mark style=\"background-color: hsl(120, 75%, 91%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> i                    </font></mark><mark style=\"background-color: hsl(120, 75%, 95%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> have                    </font></mark><mark style=\"background-color: hsl(120, 75%, 93%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> ever                    </font></mark><mark style=\"background-color: hsl(120, 75%, 91%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> seen                    </font></mark><mark style=\"background-color: hsl(120, 75%, 71%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> !                    </font></mark><mark style=\"background-color: hsl(120, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> some                    </font></mark><mark style=\"background-color: hsl(120, 75%, 99%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> scenes                    </font></mark><mark style=\"background-color: hsl(120, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> were                    </font></mark><mark style=\"background-color: hsl(120, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> ridiculous                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> ,                    </font></mark><mark style=\"background-color: hsl(120, 75%, 79%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> but                    </font></mark><mark style=\"background-color: hsl(120, 75%, 79%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> acting                    </font></mark><mark style=\"background-color: hsl(120, 75%, 75%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> was                    </font></mark><mark style=\"background-color: hsl(120, 75%, 50%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> great                    </font></mark><mark style=\"background-color: hsl(120, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> .                    </font></mark><mark style=\"background-color: hsl(120, 75%, 96%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [SEP]                    </font></mark></td><tr></table>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "<table width: 100%><div style=\"border-top: 1px solid; margin-top: 5px;             padding-top: 5px; display: inline-block\"><b>Legend: </b><span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 60%)\"></span> Negative  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 100%)\"></span> Neutral  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(120, 75%, 50%)\"></span> Positive  </div><tr><th>True Label</th><th>Predicted Label</th><th>Attribution Label</th><th>Attribution Score</th><th>Word Importance</th><tr><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>1 (1.00)</b></text></td><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>1.00</b></text></td><td><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [CLS]                    </font></mark><mark style=\"background-color: hsl(120, 75%, 79%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> this                    </font></mark><mark style=\"background-color: hsl(120, 75%, 85%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> movie                    </font></mark><mark style=\"background-color: hsl(120, 75%, 87%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> was                    </font></mark><mark style=\"background-color: hsl(120, 75%, 84%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> the                    </font></mark><mark style=\"background-color: hsl(120, 75%, 69%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> best                    </font></mark><mark style=\"background-color: hsl(120, 75%, 86%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> movie                    </font></mark><mark style=\"background-color: hsl(120, 75%, 91%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> i                    </font></mark><mark style=\"background-color: hsl(120, 75%, 95%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> have                    </font></mark><mark style=\"background-color: hsl(120, 75%, 93%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> ever                    </font></mark><mark style=\"background-color: hsl(120, 75%, 91%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> seen                    </font></mark><mark style=\"background-color: hsl(120, 75%, 71%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> !                    </font></mark><mark style=\"background-color: hsl(120, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> some                    </font></mark><mark style=\"background-color: hsl(120, 75%, 99%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> scenes                    </font></mark><mark style=\"background-color: hsl(120, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> were                    </font></mark><mark style=\"background-color: hsl(120, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> ridiculous                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> ,                    </font></mark><mark style=\"background-color: hsl(120, 75%, 79%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> but                    </font></mark><mark style=\"background-color: hsl(120, 75%, 79%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> acting                    </font></mark><mark style=\"background-color: hsl(120, 75%, 75%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> was                    </font></mark><mark style=\"background-color: hsl(120, 75%, 50%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> great                    </font></mark><mark style=\"background-color: hsl(120, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> .                    </font></mark><mark style=\"background-color: hsl(120, 75%, 96%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [SEP]                    </font></mark></td><tr></table>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 12
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "oO_k1BtSPVt3"
+      },
+      "source": [
+        "#Negative sentiment example"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 219
+        },
+        "id": "gD4xcvovI1KI",
+        "outputId": "e4a50a94-da4c-460e-b602-052b09cec28f"
+      },
+      "source": [
+        "# encode a sentence\n",
+        "text_batch = [\"I really didn't like this movie. Some of the actors were good, but overall the movie was boring.\"]\n",
+        "encoding = tokenizer(text_batch, return_tensors='pt')\n",
+        "input_ids = encoding['input_ids'].to(\"cuda\")\n",
+        "attention_mask = encoding['attention_mask'].to(\"cuda\")\n",
+        "\n",
+        "# generate an explanation for the input\n",
+        "expl = explanations.generate_LRP(input_ids=input_ids, attention_mask=attention_mask, start_layer=0)[0]\n",
+        "# normalize scores\n",
+        "expl = (expl - expl.min()) / (expl.max() - expl.min())\n",
+        "\n",
+        "# get the model classification\n",
+        "output = torch.nn.functional.softmax(model(input_ids=input_ids, attention_mask=attention_mask)[0], dim=-1)\n",
+        "classification = output.argmax(dim=-1).item()\n",
+        "# get class name\n",
+        "class_name = classifications[classification]\n",
+        "# if the classification is negative, higher explanation scores are more negative\n",
+        "# flip for visualization\n",
+        "if class_name == \"NEGATIVE\":\n",
+        "  expl *= (-1)\n",
+        "\n",
+        "tokens = tokenizer.convert_ids_to_tokens(input_ids.flatten())\n",
+        "print([(tokens[i], expl[i].item()) for i in range(len(tokens))])\n",
+        "vis_data_records = [visualization.VisualizationDataRecord(\n",
+        "                                expl,\n",
+        "                                output[0][classification],\n",
+        "                                classification,\n",
+        "                                1,\n",
+        "                                1,\n",
+        "                                1,       \n",
+        "                                tokens,\n",
+        "                                1)]\n",
+        "visualization.visualize_text(vis_data_records)"
+      ],
+      "execution_count": 13,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[('[CLS]', -0.0), ('i', -0.19109757244586945), ('really', -0.1888734996318817), ('didn', -0.2894313633441925), (\"'\", -0.006574898026883602), ('t', -0.36788827180862427), ('like', -0.15249046683311462), ('this', -0.18922168016433716), ('movie', -0.0404353104531765), ('.', -0.019592661410570145), ('some', -0.02311306819319725), ('of', -0.0), ('the', -0.02295113168656826), ('actors', -0.09577538073062897), ('were', -0.013370633125305176), ('good', -0.0323222391307354), (',', -0.004366681911051273), ('but', -0.05878860130906105), ('overall', -0.33596664667129517), ('the', -0.21820111572742462), ('movie', -0.05482065677642822), ('was', -0.6248231530189514), ('boring', -1.0), ('.', -0.031107747927308083), ('[SEP]', -0.052539654076099396)]\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "<table width: 100%><div style=\"border-top: 1px solid; margin-top: 5px;             padding-top: 5px; display: inline-block\"><b>Legend: </b><span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 60%)\"></span> Negative  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 100%)\"></span> Neutral  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(120, 75%, 50%)\"></span> Positive  </div><tr><th>True Label</th><th>Predicted Label</th><th>Attribution Label</th><th>Attribution Score</th><th>Word Importance</th><tr><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>0 (1.00)</b></text></td><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>1.00</b></text></td><td><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [CLS]                    </font></mark><mark style=\"background-color: hsl(0, 75%, 93%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> i                    </font></mark><mark style=\"background-color: hsl(0, 75%, 93%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> really                    </font></mark><mark style=\"background-color: hsl(0, 75%, 89%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> didn                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> '                    </font></mark><mark style=\"background-color: hsl(0, 75%, 86%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> t                    </font></mark><mark style=\"background-color: hsl(0, 75%, 94%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> like                    </font></mark><mark style=\"background-color: hsl(0, 75%, 93%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> this                    </font></mark><mark style=\"background-color: hsl(0, 75%, 99%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> movie                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> .                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> some                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> of                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> the                    </font></mark><mark style=\"background-color: hsl(0, 75%, 97%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> actors                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> were                    </font></mark><mark style=\"background-color: hsl(0, 75%, 99%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> good                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> ,                    </font></mark><mark style=\"background-color: hsl(0, 75%, 98%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> but                    </font></mark><mark style=\"background-color: hsl(0, 75%, 87%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> overall                    </font></mark><mark style=\"background-color: hsl(0, 75%, 92%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> the                    </font></mark><mark style=\"background-color: hsl(0, 75%, 98%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> movie                    </font></mark><mark style=\"background-color: hsl(0, 75%, 76%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> was                    </font></mark><mark style=\"background-color: hsl(0, 75%, 60%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> boring                    </font></mark><mark style=\"background-color: hsl(0, 75%, 99%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> .                    </font></mark><mark style=\"background-color: hsl(0, 75%, 98%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [SEP]                    </font></mark></td><tr></table>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "<table width: 100%><div style=\"border-top: 1px solid; margin-top: 5px;             padding-top: 5px; display: inline-block\"><b>Legend: </b><span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 60%)\"></span> Negative  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 100%)\"></span> Neutral  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(120, 75%, 50%)\"></span> Positive  </div><tr><th>True Label</th><th>Predicted Label</th><th>Attribution Label</th><th>Attribution Score</th><th>Word Importance</th><tr><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>0 (1.00)</b></text></td><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>1.00</b></text></td><td><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [CLS]                    </font></mark><mark style=\"background-color: hsl(0, 75%, 93%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> i                    </font></mark><mark style=\"background-color: hsl(0, 75%, 93%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> really                    </font></mark><mark style=\"background-color: hsl(0, 75%, 89%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> didn                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> '                    </font></mark><mark style=\"background-color: hsl(0, 75%, 86%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> t                    </font></mark><mark style=\"background-color: hsl(0, 75%, 94%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> like                    </font></mark><mark style=\"background-color: hsl(0, 75%, 93%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> this                    </font></mark><mark style=\"background-color: hsl(0, 75%, 99%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> movie                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> .                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> some                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> of                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> the                    </font></mark><mark style=\"background-color: hsl(0, 75%, 97%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> actors                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> were                    </font></mark><mark style=\"background-color: hsl(0, 75%, 99%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> good                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> ,                    </font></mark><mark style=\"background-color: hsl(0, 75%, 98%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> but                    </font></mark><mark style=\"background-color: hsl(0, 75%, 87%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> overall                    </font></mark><mark style=\"background-color: hsl(0, 75%, 92%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> the                    </font></mark><mark style=\"background-color: hsl(0, 75%, 98%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> movie                    </font></mark><mark style=\"background-color: hsl(0, 75%, 76%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> was                    </font></mark><mark style=\"background-color: hsl(0, 75%, 60%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> boring                    </font></mark><mark style=\"background-color: hsl(0, 75%, 99%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> .                    </font></mark><mark style=\"background-color: hsl(0, 75%, 98%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [SEP]                    </font></mark></td><tr></table>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 13
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "# Choosing class for visualization example"
+      ],
+      "metadata": {
+        "id": "UUn2_SMPNG-Y"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# encode a sentence\n",
+        "text_batch = [\"I hate that I love you.\"]\n",
+        "encoding = tokenizer(text_batch, return_tensors='pt')\n",
+        "input_ids = encoding['input_ids'].to(\"cuda\")\n",
+        "attention_mask = encoding['attention_mask'].to(\"cuda\")\n",
+        "\n",
+        "# true class is positive - 1\n",
+        "true_class = 1\n",
+        "\n",
+        "# generate an explanation for the input\n",
+        "target_class = 0\n",
+        "expl = explanations.generate_LRP(input_ids=input_ids, attention_mask=attention_mask, start_layer=11, index=target_class)[0]\n",
+        "# normalize scores\n",
+        "expl = (expl - expl.min()) / (expl.max() - expl.min())\n",
+        "\n",
+        "# get the model classification\n",
+        "output = torch.nn.functional.softmax(model(input_ids=input_ids, attention_mask=attention_mask)[0], dim=-1)\n",
+        "\n",
+        "# get class name\n",
+        "class_name = classifications[target_class]\n",
+        "# if the classification is negative, higher explanation scores are more negative\n",
+        "# flip for visualization\n",
+        "if class_name == \"NEGATIVE\":\n",
+        "  expl *= (-1)\n",
+        "\n",
+        "tokens = tokenizer.convert_ids_to_tokens(input_ids.flatten())\n",
+        "print([(tokens[i], expl[i].item()) for i in range(len(tokens))])\n",
+        "vis_data_records = [visualization.VisualizationDataRecord(\n",
+        "                                expl,\n",
+        "                                output[0][classification],\n",
+        "                                classification,\n",
+        "                                true_class,\n",
+        "                                true_class,\n",
+        "                                1,       \n",
+        "                                tokens,\n",
+        "                                1)]\n",
+        "visualization.visualize_text(vis_data_records)"
+      ],
+      "metadata": {
+        "id": "VQVmMFnzhPoV",
+        "outputId": "26a43f8a-340c-4821-b39c-80105a565810",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 219
+        }
+      },
+      "execution_count": 14,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[('[CLS]', -0.0), ('i', -0.19790242612361908), ('hate', -1.0), ('that', -0.40287283062934875), ('i', -0.12505637109279633), ('love', -0.1307140290737152), ('you', -0.05467141419649124), ('.', -6.108225989009952e-06), ('[SEP]', -0.0)]\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "<table width: 100%><div style=\"border-top: 1px solid; margin-top: 5px;             padding-top: 5px; display: inline-block\"><b>Legend: </b><span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 60%)\"></span> Negative  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 100%)\"></span> Neutral  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(120, 75%, 50%)\"></span> Positive  </div><tr><th>True Label</th><th>Predicted Label</th><th>Attribution Label</th><th>Attribution Score</th><th>Word Importance</th><tr><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>0 (0.91)</b></text></td><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>1.00</b></text></td><td><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [CLS]                    </font></mark><mark style=\"background-color: hsl(0, 75%, 93%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> i                    </font></mark><mark style=\"background-color: hsl(0, 75%, 60%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> hate                    </font></mark><mark style=\"background-color: hsl(0, 75%, 84%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> that                    </font></mark><mark style=\"background-color: hsl(0, 75%, 95%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> i                    </font></mark><mark style=\"background-color: hsl(0, 75%, 95%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> love                    </font></mark><mark style=\"background-color: hsl(0, 75%, 98%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> you                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> .                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [SEP]                    </font></mark></td><tr></table>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "<table width: 100%><div style=\"border-top: 1px solid; margin-top: 5px;             padding-top: 5px; display: inline-block\"><b>Legend: </b><span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 60%)\"></span> Negative  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 100%)\"></span> Neutral  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(120, 75%, 50%)\"></span> Positive  </div><tr><th>True Label</th><th>Predicted Label</th><th>Attribution Label</th><th>Attribution Score</th><th>Word Importance</th><tr><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>0 (0.91)</b></text></td><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>1.00</b></text></td><td><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [CLS]                    </font></mark><mark style=\"background-color: hsl(0, 75%, 93%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> i                    </font></mark><mark style=\"background-color: hsl(0, 75%, 60%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> hate                    </font></mark><mark style=\"background-color: hsl(0, 75%, 84%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> that                    </font></mark><mark style=\"background-color: hsl(0, 75%, 95%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> i                    </font></mark><mark style=\"background-color: hsl(0, 75%, 95%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> love                    </font></mark><mark style=\"background-color: hsl(0, 75%, 98%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> you                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> .                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [SEP]                    </font></mark></td><tr></table>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 14
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# encode a sentence\n",
+        "text_batch = [\"I hate that I love you.\"]\n",
+        "encoding = tokenizer(text_batch, return_tensors='pt')\n",
+        "input_ids = encoding['input_ids'].to(\"cuda\")\n",
+        "attention_mask = encoding['attention_mask'].to(\"cuda\")\n",
+        "\n",
+        "# true class is positive - 1\n",
+        "true_class = 1\n",
+        "\n",
+        "# generate an explanation for the input\n",
+        "target_class = 1\n",
+        "expl = explanations.generate_LRP(input_ids=input_ids, attention_mask=attention_mask, start_layer=11, index=target_class)[0]\n",
+        "# normalize scores\n",
+        "expl = (expl - expl.min()) / (expl.max() - expl.min())\n",
+        "\n",
+        "# get the model classification\n",
+        "output = torch.nn.functional.softmax(model(input_ids=input_ids, attention_mask=attention_mask)[0], dim=-1)\n",
+        "\n",
+        "# get class name\n",
+        "class_name = classifications[target_class]\n",
+        "# if the classification is negative, higher explanation scores are more negative\n",
+        "# flip for visualization\n",
+        "if class_name == \"NEGATIVE\":\n",
+        "  expl *= (-1)\n",
+        "\n",
+        "tokens = tokenizer.convert_ids_to_tokens(input_ids.flatten())\n",
+        "print([(tokens[i], expl[i].item()) for i in range(len(tokens))])\n",
+        "vis_data_records = [visualization.VisualizationDataRecord(\n",
+        "                                expl,\n",
+        "                                output[0][classification],\n",
+        "                                classification,\n",
+        "                                true_class,\n",
+        "                                true_class,\n",
+        "                                1,       \n",
+        "                                tokens,\n",
+        "                                1)]\n",
+        "visualization.visualize_text(vis_data_records)"
+      ],
+      "metadata": {
+        "id": "WiQAWw0-imCg",
+        "outputId": "a8c66996-dcd0-4132-a8b0-2346d9bf9c7b",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 219
+        }
+      },
+      "execution_count": 15,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[('[CLS]', 0.0), ('i', 0.2725590765476227), ('hate', 0.17270179092884064), ('that', 0.23211266100406647), ('i', 0.17642731964588165), ('love', 1.0), ('you', 0.2465524971485138), ('.', 0.0), ('[SEP]', 0.00015733683540020138)]\n"
+          ]
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "<table width: 100%><div style=\"border-top: 1px solid; margin-top: 5px;             padding-top: 5px; display: inline-block\"><b>Legend: </b><span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 60%)\"></span> Negative  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 100%)\"></span> Neutral  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(120, 75%, 50%)\"></span> Positive  </div><tr><th>True Label</th><th>Predicted Label</th><th>Attribution Label</th><th>Attribution Score</th><th>Word Importance</th><tr><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>0 (0.91)</b></text></td><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>1.00</b></text></td><td><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [CLS]                    </font></mark><mark style=\"background-color: hsl(120, 75%, 87%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> i                    </font></mark><mark style=\"background-color: hsl(120, 75%, 92%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> hate                    </font></mark><mark style=\"background-color: hsl(120, 75%, 89%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> that                    </font></mark><mark style=\"background-color: hsl(120, 75%, 92%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> i                    </font></mark><mark style=\"background-color: hsl(120, 75%, 50%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> love                    </font></mark><mark style=\"background-color: hsl(120, 75%, 88%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> you                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> .                    </font></mark><mark style=\"background-color: hsl(120, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [SEP]                    </font></mark></td><tr></table>"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ],
+            "text/html": [
+              "<table width: 100%><div style=\"border-top: 1px solid; margin-top: 5px;             padding-top: 5px; display: inline-block\"><b>Legend: </b><span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 60%)\"></span> Negative  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(0, 75%, 100%)\"></span> Neutral  <span style=\"display: inline-block; width: 10px; height: 10px;                 border: 1px solid; background-color:                 hsl(120, 75%, 50%)\"></span> Positive  </div><tr><th>True Label</th><th>Predicted Label</th><th>Attribution Label</th><th>Attribution Score</th><th>Word Importance</th><tr><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>0 (0.91)</b></text></td><td><text style=\"padding-right:2em\"><b>1</b></text></td><td><text style=\"padding-right:2em\"><b>1.00</b></text></td><td><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [CLS]                    </font></mark><mark style=\"background-color: hsl(120, 75%, 87%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> i                    </font></mark><mark style=\"background-color: hsl(120, 75%, 92%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> hate                    </font></mark><mark style=\"background-color: hsl(120, 75%, 89%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> that                    </font></mark><mark style=\"background-color: hsl(120, 75%, 92%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> i                    </font></mark><mark style=\"background-color: hsl(120, 75%, 50%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> love                    </font></mark><mark style=\"background-color: hsl(120, 75%, 88%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> you                    </font></mark><mark style=\"background-color: hsl(0, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> .                    </font></mark><mark style=\"background-color: hsl(120, 75%, 100%); opacity:1.0;                     line-height:1.75\"><font color=\"black\"> [SEP]                    </font></mark></td><tr></table>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 15
+        }
+      ]
+    }
+  ]
+}

Transformer-Explainability/BERT_explainability/modules/BERT/BERT.py ADDED Viewed

	@@ -0,0 +1,748 @@

+from __future__ import absolute_import
+import math
+import torch
+import torch.nn.functional as F
+from BERT_explainability.modules.layers_ours import *
+from torch import nn
+from transformers import BertConfig, BertPreTrainedModel, PreTrainedModel
+from transformers.modeling_outputs import (BaseModelOutput,
+                                           BaseModelOutputWithPooling)
+ACT2FN = {
+    "relu": ReLU,
+    "tanh": Tanh,
+    "gelu": GELU,
+}
+def get_activation(activation_string):
+    if activation_string in ACT2FN:
+        return ACT2FN[activation_string]
+    else:
+        raise KeyError(
+            "function {} not found in ACT2FN mapping {}".format(
+                activation_string, list(ACT2FN.keys())
+            )
+        )
+def compute_rollout_attention(all_layer_matrices, start_layer=0):
+    # adding residual consideration
+    num_tokens = all_layer_matrices[0].shape[1]
+    batch_size = all_layer_matrices[0].shape[0]
+    eye = (
+        torch.eye(num_tokens)
+        .expand(batch_size, num_tokens, num_tokens)
+        .to(all_layer_matrices[0].device)
+    )
+    all_layer_matrices = [
+        all_layer_matrices[i] + eye for i in range(len(all_layer_matrices))
+    ]
+    all_layer_matrices = [
+        all_layer_matrices[i] / all_layer_matrices[i].sum(dim=-1, keepdim=True)
+        for i in range(len(all_layer_matrices))
+    ]
+    joint_attention = all_layer_matrices[start_layer]
+    for i in range(start_layer + 1, len(all_layer_matrices)):
+        joint_attention = all_layer_matrices[i].bmm(joint_attention)
+    return joint_attention
+class BertEmbeddings(nn.Module):
+    """Construct the embeddings from word, position and token_type embeddings."""
+    def __init__(self, config):
+        super().__init__()
+        self.word_embeddings = nn.Embedding(
+            config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id
+        )
+        self.position_embeddings = nn.Embedding(
+            config.max_position_embeddings, config.hidden_size
+        )
+        self.token_type_embeddings = nn.Embedding(
+            config.type_vocab_size, config.hidden_size
+        )
+        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
+        # any TensorFlow checkpoint file
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = Dropout(config.hidden_dropout_prob)
+        # position_ids (1, len position emb) is contiguous in memory and exported when serialized
+        self.register_buffer(
+            "position_ids", torch.arange(config.max_position_embeddings).expand((1, -1))
+        )
+        self.add1 = Add()
+        self.add2 = Add()
+    def forward(
+        self, input_ids=None, token_type_ids=None, position_ids=None, inputs_embeds=None
+    ):
+        if input_ids is not None:
+            input_shape = input_ids.size()
+        else:
+            input_shape = inputs_embeds.size()[:-1]
+        seq_length = input_shape[1]
+        if position_ids is None:
+            position_ids = self.position_ids[:, :seq_length]
+        if token_type_ids is None:
+            token_type_ids = torch.zeros(
+                input_shape, dtype=torch.long, device=self.position_ids.device
+            )
+        if inputs_embeds is None:
+            inputs_embeds = self.word_embeddings(input_ids)
+        position_embeddings = self.position_embeddings(position_ids)
+        token_type_embeddings = self.token_type_embeddings(token_type_ids)
+        # embeddings = inputs_embeds + position_embeddings + token_type_embeddings
+        embeddings = self.add1([token_type_embeddings, position_embeddings])
+        embeddings = self.add2([embeddings, inputs_embeds])
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+    def relprop(self, cam, **kwargs):
+        cam = self.dropout.relprop(cam, **kwargs)
+        cam = self.LayerNorm.relprop(cam, **kwargs)
+        # [inputs_embeds, position_embeddings, token_type_embeddings]
+        (cam) = self.add2.relprop(cam, **kwargs)
+        return cam
+class BertEncoder(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.layer = nn.ModuleList(
+            [BertLayer(config) for _ in range(config.num_hidden_layers)]
+        )
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=False,
+        output_hidden_states=False,
+        return_dict=False,
+    ):
+        all_hidden_states = () if output_hidden_states else None
+        all_attentions = () if output_attentions else None
+        for i, layer_module in enumerate(self.layer):
+            if output_hidden_states:
+                all_hidden_states = all_hidden_states + (hidden_states,)
+            layer_head_mask = head_mask[i] if head_mask is not None else None
+            if getattr(self.config, "gradient_checkpointing", False):
+                def create_custom_forward(module):
+                    def custom_forward(*inputs):
+                        return module(*inputs, output_attentions)
+                    return custom_forward
+                layer_outputs = torch.utils.checkpoint.checkpoint(
+                    create_custom_forward(layer_module),
+                    hidden_states,
+                    attention_mask,
+                    layer_head_mask,
+                )
+            else:
+                layer_outputs = layer_module(
+                    hidden_states,
+                    attention_mask,
+                    layer_head_mask,
+                    output_attentions,
+                )
+            hidden_states = layer_outputs[0]
+            if output_attentions:
+                all_attentions = all_attentions + (layer_outputs[1],)
+        if output_hidden_states:
+            all_hidden_states = all_hidden_states + (hidden_states,)
+        if not return_dict:
+            return tuple(
+                v
+                for v in [hidden_states, all_hidden_states, all_attentions]
+                if v is not None
+            )
+        return BaseModelOutput(
+            last_hidden_state=hidden_states,
+            hidden_states=all_hidden_states,
+            attentions=all_attentions,
+        )
+    def relprop(self, cam, **kwargs):
+        # assuming output_hidden_states is False
+        for layer_module in reversed(self.layer):
+            cam = layer_module.relprop(cam, **kwargs)
+        return cam
+# not adding relprop since this is only pooling at the end of the network, does not impact tokens importance
+class BertPooler(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = Linear(config.hidden_size, config.hidden_size)
+        self.activation = Tanh()
+        self.pool = IndexSelect()
+    def forward(self, hidden_states):
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token.
+        self._seq_size = hidden_states.shape[1]
+        # first_token_tensor = hidden_states[:, 0]
+        first_token_tensor = self.pool(
+            hidden_states, 1, torch.tensor(0, device=hidden_states.device)
+        )
+        first_token_tensor = first_token_tensor.squeeze(1)
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+    def relprop(self, cam, **kwargs):
+        cam = self.activation.relprop(cam, **kwargs)
+        # print(cam.sum())
+        cam = self.dense.relprop(cam, **kwargs)
+        # print(cam.sum())
+        cam = cam.unsqueeze(1)
+        cam = self.pool.relprop(cam, **kwargs)
+        # print(cam.sum())
+        return cam
+class BertAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.self = BertSelfAttention(config)
+        self.output = BertSelfOutput(config)
+        self.pruned_heads = set()
+        self.clone = Clone()
+    def prune_heads(self, heads):
+        if len(heads) == 0:
+            return
+        heads, index = find_pruneable_heads_and_indices(
+            heads,
+            self.self.num_attention_heads,
+            self.self.attention_head_size,
+            self.pruned_heads,
+        )
+        # Prune linear layers
+        self.self.query = prune_linear_layer(self.self.query, index)
+        self.self.key = prune_linear_layer(self.self.key, index)
+        self.self.value = prune_linear_layer(self.self.value, index)
+        self.output.dense = prune_linear_layer(self.output.dense, index, dim=1)
+        # Update hyper params and store pruned heads
+        self.self.num_attention_heads = self.self.num_attention_heads - len(heads)
+        self.self.all_head_size = (
+            self.self.attention_head_size * self.self.num_attention_heads
+        )
+        self.pruned_heads = self.pruned_heads.union(heads)
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=False,
+    ):
+        h1, h2 = self.clone(hidden_states, 2)
+        self_outputs = self.self(
+            h1,
+            attention_mask,
+            head_mask,
+            encoder_hidden_states,
+            encoder_attention_mask,
+            output_attentions,
+        )
+        attention_output = self.output(self_outputs[0], h2)
+        outputs = (attention_output,) + self_outputs[
+            1:
+        ]  # add attentions if we output them
+        return outputs
+    def relprop(self, cam, **kwargs):
+        # assuming that we don't ouput the attentions (outputs = (attention_output,)), self_outputs=(context_layer,)
+        (cam1, cam2) = self.output.relprop(cam, **kwargs)
+        # print(cam1.sum(), cam2.sum(), (cam1 + cam2).sum())
+        cam1 = self.self.relprop(cam1, **kwargs)
+        # print(cam1.sum(), cam2.sum(), (cam1 + cam2).sum())
+        return self.clone.relprop((cam1, cam2), **kwargs)
+class BertSelfAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(
+            config, "embedding_size"
+        ):
+            raise ValueError(
+                "The hidden size (%d) is not a multiple of the number of attention "
+                "heads (%d)" % (config.hidden_size, config.num_attention_heads)
+            )
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+        self.query = Linear(config.hidden_size, self.all_head_size)
+        self.key = Linear(config.hidden_size, self.all_head_size)
+        self.value = Linear(config.hidden_size, self.all_head_size)
+        self.dropout = Dropout(config.attention_probs_dropout_prob)
+        self.matmul1 = MatMul()
+        self.matmul2 = MatMul()
+        self.softmax = Softmax(dim=-1)
+        self.add = Add()
+        self.mul = Mul()
+        self.head_mask = None
+        self.attention_mask = None
+        self.clone = Clone()
+        self.attn_cam = None
+        self.attn = None
+        self.attn_gradients = None
+    def get_attn(self):
+        return self.attn
+    def save_attn(self, attn):
+        self.attn = attn
+    def save_attn_cam(self, cam):
+        self.attn_cam = cam
+    def get_attn_cam(self):
+        return self.attn_cam
+    def save_attn_gradients(self, attn_gradients):
+        self.attn_gradients = attn_gradients
+    def get_attn_gradients(self):
+        return self.attn_gradients
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (
+            self.num_attention_heads,
+            self.attention_head_size,
+        )
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+    def transpose_for_scores_relprop(self, x):
+        return x.permute(0, 2, 1, 3).flatten(2)
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=False,
+    ):
+        self.head_mask = head_mask
+        self.attention_mask = attention_mask
+        h1, h2, h3 = self.clone(hidden_states, 3)
+        mixed_query_layer = self.query(h1)
+        # If this is instantiated as a cross-attention module, the keys
+        # and values come from an encoder; the attention mask needs to be
+        # such that the encoder's padding tokens are not attended to.
+        if encoder_hidden_states is not None:
+            mixed_key_layer = self.key(encoder_hidden_states)
+            mixed_value_layer = self.value(encoder_hidden_states)
+            attention_mask = encoder_attention_mask
+        else:
+            mixed_key_layer = self.key(h2)
+            mixed_value_layer = self.value(h3)
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = self.matmul1([query_layer, key_layer.transpose(-1, -2)])
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        if attention_mask is not None:
+            # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
+            attention_scores = self.add([attention_scores, attention_mask])
+        # Normalize the attention scores to probabilities.
+        attention_probs = self.softmax(attention_scores)
+        self.save_attn(attention_probs)
+        attention_probs.register_hook(self.save_attn_gradients)
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+        # Mask heads if we want to
+        if head_mask is not None:
+            attention_probs = attention_probs * head_mask
+        context_layer = self.matmul2([attention_probs, value_layer])
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        outputs = (
+            (context_layer, attention_probs) if output_attentions else (context_layer,)
+        )
+        return outputs
+    def relprop(self, cam, **kwargs):
+        # Assume output_attentions == False
+        cam = self.transpose_for_scores(cam)
+        # [attention_probs, value_layer]
+        (cam1, cam2) = self.matmul2.relprop(cam, **kwargs)
+        cam1 /= 2
+        cam2 /= 2
+        if self.head_mask is not None:
+            # [attention_probs, head_mask]
+            (cam1, _) = self.mul.relprop(cam1, **kwargs)
+        self.save_attn_cam(cam1)
+        cam1 = self.dropout.relprop(cam1, **kwargs)
+        cam1 = self.softmax.relprop(cam1, **kwargs)
+        if self.attention_mask is not None:
+            # [attention_scores, attention_mask]
+            (cam1, _) = self.add.relprop(cam1, **kwargs)
+        # [query_layer, key_layer.transpose(-1, -2)]
+        (cam1_1, cam1_2) = self.matmul1.relprop(cam1, **kwargs)
+        cam1_1 /= 2
+        cam1_2 /= 2
+        # query
+        cam1_1 = self.transpose_for_scores_relprop(cam1_1)
+        cam1_1 = self.query.relprop(cam1_1, **kwargs)
+        # key
+        cam1_2 = self.transpose_for_scores_relprop(cam1_2.transpose(-1, -2))
+        cam1_2 = self.key.relprop(cam1_2, **kwargs)
+        # value
+        cam2 = self.transpose_for_scores_relprop(cam2)
+        cam2 = self.value.relprop(cam2, **kwargs)
+        cam = self.clone.relprop((cam1_1, cam1_2, cam2), **kwargs)
+        return cam
+class BertSelfOutput(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = Dropout(config.hidden_dropout_prob)
+        self.add = Add()
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        add = self.add([hidden_states, input_tensor])
+        hidden_states = self.LayerNorm(add)
+        return hidden_states
+    def relprop(self, cam, **kwargs):
+        cam = self.LayerNorm.relprop(cam, **kwargs)
+        # [hidden_states, input_tensor]
+        (cam1, cam2) = self.add.relprop(cam, **kwargs)
+        cam1 = self.dropout.relprop(cam1, **kwargs)
+        cam1 = self.dense.relprop(cam1, **kwargs)
+        return (cam1, cam2)
+class BertIntermediate(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = Linear(config.hidden_size, config.intermediate_size)
+        if isinstance(config.hidden_act, str):
+            self.intermediate_act_fn = ACT2FN[config.hidden_act]()
+        else:
+            self.intermediate_act_fn = config.hidden_act
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+    def relprop(self, cam, **kwargs):
+        cam = self.intermediate_act_fn.relprop(cam, **kwargs)  # FIXME only ReLU
+        # print(cam.sum())
+        cam = self.dense.relprop(cam, **kwargs)
+        # print(cam.sum())
+        return cam
+class BertOutput(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = Dropout(config.hidden_dropout_prob)
+        self.add = Add()
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        add = self.add([hidden_states, input_tensor])
+        hidden_states = self.LayerNorm(add)
+        return hidden_states
+    def relprop(self, cam, **kwargs):
+        # print("in", cam.sum())
+        cam = self.LayerNorm.relprop(cam, **kwargs)
+        # print(cam.sum())
+        # [hidden_states, input_tensor]
+        (cam1, cam2) = self.add.relprop(cam, **kwargs)
+        # print("add", cam1.sum(), cam2.sum(), cam1.sum() + cam2.sum())
+        cam1 = self.dropout.relprop(cam1, **kwargs)
+        # print(cam1.sum())
+        cam1 = self.dense.relprop(cam1, **kwargs)
+        # print("dense", cam1.sum())
+        # print("out", cam1.sum() + cam2.sum(), cam1.sum(), cam2.sum())
+        return (cam1, cam2)
+class BertLayer(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.attention = BertAttention(config)
+        self.intermediate = BertIntermediate(config)
+        self.output = BertOutput(config)
+        self.clone = Clone()
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        output_attentions=False,
+    ):
+        self_attention_outputs = self.attention(
+            hidden_states,
+            attention_mask,
+            head_mask,
+            output_attentions=output_attentions,
+        )
+        attention_output = self_attention_outputs[0]
+        outputs = self_attention_outputs[
+            1:
+        ]  # add self attentions if we output attention weights
+        ao1, ao2 = self.clone(attention_output, 2)
+        intermediate_output = self.intermediate(ao1)
+        layer_output = self.output(intermediate_output, ao2)
+        outputs = (layer_output,) + outputs
+        return outputs
+    def relprop(self, cam, **kwargs):
+        (cam1, cam2) = self.output.relprop(cam, **kwargs)
+        # print("output", cam1.sum(), cam2.sum(), cam1.sum() + cam2.sum())
+        cam1 = self.intermediate.relprop(cam1, **kwargs)
+        # print("intermediate", cam1.sum())
+        cam = self.clone.relprop((cam1, cam2), **kwargs)
+        # print("clone", cam.sum())
+        cam = self.attention.relprop(cam, **kwargs)
+        # print("attention", cam.sum())
+        return cam
+class BertModel(BertPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.config = config
+        self.embeddings = BertEmbeddings(config)
+        self.encoder = BertEncoder(config)
+        self.pooler = BertPooler(config)
+        self.init_weights()
+    def get_input_embeddings(self):
+        return self.embeddings.word_embeddings
+    def set_input_embeddings(self, value):
+        self.embeddings.word_embeddings = value
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+    ):
+        r"""
+        encoder_hidden_states  (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
+            Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention
+            if the model is configured as a decoder.
+        encoder_attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
+            Mask to avoid performing attention on the padding token indices of the encoder input. This mask
+            is used in the cross-attention if the model is configured as a decoder.
+            Mask values selected in ``[0, 1]``:
+            ``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens.
+        """
+        output_attentions = (
+            output_attentions
+            if output_attentions is not None
+            else self.config.output_attentions
+        )
+        output_hidden_states = (
+            output_hidden_states
+            if output_hidden_states is not None
+            else self.config.output_hidden_states
+        )
+        return_dict = (
+            return_dict if return_dict is not None else self.config.use_return_dict
+        )
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError(
+                "You cannot specify both input_ids and inputs_embeds at the same time"
+            )
+        elif input_ids is not None:
+            input_shape = input_ids.size()
+        elif inputs_embeds is not None:
+            input_shape = inputs_embeds.size()[:-1]
+        else:
+            raise ValueError("You have to specify either input_ids or inputs_embeds")
+        device = input_ids.device if input_ids is not None else inputs_embeds.device
+        if attention_mask is None:
+            attention_mask = torch.ones(input_shape, device=device)
+        if token_type_ids is None:
+            token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device)
+        # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
+        # ourselves in which case we just need to make it broadcastable to all heads.
+        extended_attention_mask: torch.Tensor = self.get_extended_attention_mask(
+            attention_mask, input_shape, device
+        )
+        # If a 2D or 3D attention mask is provided for the cross-attention
+        # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
+        if self.config.is_decoder and encoder_hidden_states is not None:
+            (
+                encoder_batch_size,
+                encoder_sequence_length,
+                _,
+            ) = encoder_hidden_states.size()
+            encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length)
+            if encoder_attention_mask is None:
+                encoder_attention_mask = torch.ones(encoder_hidden_shape, device=device)
+            encoder_extended_attention_mask = self.invert_attention_mask(
+                encoder_attention_mask
+            )
+        else:
+            encoder_extended_attention_mask = None
+        # Prepare head mask if needed
+        # 1.0 in head_mask indicate we keep the head
+        # attention_probs has shape bsz x n_heads x N x N
+        # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
+        # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
+        head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
+        embedding_output = self.embeddings(
+            input_ids=input_ids,
+            position_ids=position_ids,
+            token_type_ids=token_type_ids,
+            inputs_embeds=inputs_embeds,
+        )
+        encoder_outputs = self.encoder(
+            embedding_output,
+            attention_mask=extended_attention_mask,
+            head_mask=head_mask,
+            encoder_hidden_states=encoder_hidden_states,
+            encoder_attention_mask=encoder_extended_attention_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        sequence_output = encoder_outputs[0]
+        pooled_output = self.pooler(sequence_output)
+        if not return_dict:
+            return (sequence_output, pooled_output) + encoder_outputs[1:]
+        return BaseModelOutputWithPooling(
+            last_hidden_state=sequence_output,
+            pooler_output=pooled_output,
+            hidden_states=encoder_outputs.hidden_states,
+            attentions=encoder_outputs.attentions,
+        )
+    def relprop(self, cam, **kwargs):
+        cam = self.pooler.relprop(cam, **kwargs)
+        # print("111111111111",cam.sum())
+        cam = self.encoder.relprop(cam, **kwargs)
+        # print("222222222222222", cam.sum())
+        # print("conservation: ", cam.sum())
+        return cam
+if __name__ == "__main__":
+    class Config:
+        def __init__(
+            self, hidden_size, num_attention_heads, attention_probs_dropout_prob
+        ):
+            self.hidden_size = hidden_size
+            self.num_attention_heads = num_attention_heads
+            self.attention_probs_dropout_prob = attention_probs_dropout_prob
+    model = BertSelfAttention(Config(1024, 4, 0.1))
+    x = torch.rand(2, 20, 1024)
+    x.requires_grad_()
+    model.eval()
+    y = model.forward(x)
+    relprop = model.relprop(torch.rand(2, 20, 1024), (torch.rand(2, 20, 1024),))
+    print(relprop[1][0].shape)

Transformer-Explainability/BERT_explainability/modules/BERT/BERT_cls_lrp.py ADDED Viewed

	@@ -0,0 +1,240 @@

+from typing import Any, List
+import torch
+import torch.nn as nn
+from BERT_explainability.modules.BERT.BERT_orig_lrp import BertModel
+from BERT_explainability.modules.layers_lrp import *
+from BERT_rationale_benchmark.models.model_utils import PaddedSequence
+from torch.nn import CrossEntropyLoss, MSELoss
+from transformers import BertPreTrainedModel
+from transformers.utils import logging
+class BertForSequenceClassification(BertPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.bert = BertModel(config)
+        self.dropout = Dropout(config.hidden_dropout_prob)
+        self.classifier = Linear(config.hidden_size, config.num_labels)
+        self.init_weights()
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+    ):
+        r"""
+        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
+            Labels for computing the sequence classification/regression loss.
+            Indices should be in :obj:`[0, ..., config.num_labels - 1]`.
+            If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
+            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
+        """
+        return_dict = (
+            return_dict if return_dict is not None else self.config.use_return_dict
+        )
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        pooled_output = outputs[1]
+        pooled_output = self.dropout(pooled_output)
+        logits = self.classifier(pooled_output)
+        loss = None
+        if labels is not None:
+            if self.num_labels == 1:
+                #  We are doing regression
+                loss_fct = MSELoss()
+                loss = loss_fct(logits.view(-1), labels.view(-1))
+            else:
+                loss_fct = CrossEntropyLoss()
+                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+        if not return_dict:
+            output = (logits,) + outputs[2:]
+            return ((loss,) + output) if loss is not None else output
+        return SequenceClassifierOutput(
+            loss=loss,
+            logits=logits,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+    def relprop(self, cam=None, **kwargs):
+        cam = self.classifier.relprop(cam, **kwargs)
+        cam = self.dropout.relprop(cam, **kwargs)
+        cam = self.bert.relprop(cam, **kwargs)
+        return cam
+# this is the actual classifier we will be using
+class BertClassifier(nn.Module):
+    """Thin wrapper around BertForSequenceClassification"""
+    def __init__(
+        self,
+        bert_dir: str,
+        pad_token_id: int,
+        cls_token_id: int,
+        sep_token_id: int,
+        num_labels: int,
+        max_length: int = 512,
+        use_half_precision=True,
+    ):
+        super(BertClassifier, self).__init__()
+        bert = BertForSequenceClassification.from_pretrained(
+            bert_dir, num_labels=num_labels
+        )
+        if use_half_precision:
+            import apex
+            bert = bert.half()
+        self.bert = bert
+        self.pad_token_id = pad_token_id
+        self.cls_token_id = cls_token_id
+        self.sep_token_id = sep_token_id
+        self.max_length = max_length
+    def forward(
+        self,
+        query: List[torch.tensor],
+        docids: List[Any],
+        document_batch: List[torch.tensor],
+    ):
+        assert len(query) == len(document_batch)
+        print(query)
+        # note about device management:
+        # since distributed training is enabled, the inputs to this module can be on *any* device (preferably cpu, since we wrap and unwrap the module)
+        # we want to keep these params on the input device (assuming CPU) for as long as possible for cheap memory access
+        target_device = next(self.parameters()).device
+        cls_token = torch.tensor([self.cls_token_id]).to(
+            device=document_batch[0].device
+        )
+        sep_token = torch.tensor([self.sep_token_id]).to(
+            device=document_batch[0].device
+        )
+        input_tensors = []
+        position_ids = []
+        for q, d in zip(query, document_batch):
+            if len(q) + len(d) + 2 > self.max_length:
+                d = d[: (self.max_length - len(q) - 2)]
+            input_tensors.append(torch.cat([cls_token, q, sep_token, d]))
+            position_ids.append(
+                torch.tensor(list(range(0, len(q) + 1)) + list(range(0, len(d) + 1)))
+            )
+        bert_input = PaddedSequence.autopad(
+            input_tensors,
+            batch_first=True,
+            padding_value=self.pad_token_id,
+            device=target_device,
+        )
+        positions = PaddedSequence.autopad(
+            position_ids, batch_first=True, padding_value=0, device=target_device
+        )
+        (classes,) = self.bert(
+            bert_input.data,
+            attention_mask=bert_input.mask(
+                on=0.0, off=float("-inf"), device=target_device
+            ),
+            position_ids=positions.data,
+        )
+        assert torch.all(classes == classes)  # for nans
+        print(input_tensors[0])
+        print(self.relprop()[0])
+        return classes
+    def relprop(self, cam=None, **kwargs):
+        return self.bert.relprop(cam, **kwargs)
+if __name__ == "__main__":
+    import os
+    from transformers import BertTokenizer
+    class Config:
+        def __init__(
+            self,
+            hidden_size,
+            num_attention_heads,
+            attention_probs_dropout_prob,
+            num_labels,
+            hidden_dropout_prob,
+        ):
+            self.hidden_size = hidden_size
+            self.num_attention_heads = num_attention_heads
+            self.attention_probs_dropout_prob = attention_probs_dropout_prob
+            self.num_labels = num_labels
+            self.hidden_dropout_prob = hidden_dropout_prob
+    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+    x = tokenizer.encode_plus(
+        "In this movie the acting is great. The movie is perfect! [sep]",
+        add_special_tokens=True,
+        max_length=512,
+        return_token_type_ids=False,
+        return_attention_mask=True,
+        pad_to_max_length=True,
+        return_tensors="pt",
+        truncation=True,
+    )
+    print(x["input_ids"])
+    model = BertForSequenceClassification.from_pretrained(
+        "bert-base-uncased", num_labels=2
+    )
+    model_save_file = os.path.join(
+        "./BERT_explainability/output_bert/movies/classifier/", "classifier.pt"
+    )
+    model.load_state_dict(torch.load(model_save_file))
+    # x = torch.randint(100, (2, 20))
+    # x = torch.tensor([[101, 2054, 2003, 1996, 15792, 1997, 2023, 3319, 1029, 102,
+    #                    101, 4079, 102, 101, 6732, 102, 101, 2643, 102, 101,
+    #                    2038, 102, 101, 1037, 102, 101, 2933, 102, 101, 2005,
+    #                    102, 101, 2032, 102, 101, 1010, 102, 101, 1037, 102,
+    #                    101, 3800, 102, 101, 2005, 102, 101, 2010, 102, 101,
+    #                    2166, 102, 101, 1010, 102, 101, 1998, 102, 101, 2010,
+    #                    102, 101, 4650, 102, 101, 1010, 102, 101, 2002, 102,
+    #                    101, 2074, 102, 101, 2515, 102, 101, 1050, 102, 101,
+    #                    1005, 102, 101, 1056, 102, 101, 2113, 102, 101, 2054,
+    #                    102, 101, 1012, 102]])
+    # x.requires_grad_()
+    model.eval()
+    y = model(x["input_ids"], x["attention_mask"])
+    print(y)
+    cam, _ = model.relprop()
+    # print(cam.shape)
+    cam = cam.sum(-1)
+    # print(cam)

Transformer-Explainability/BERT_explainability/modules/BERT/BERT_orig_lrp.py ADDED Viewed

	@@ -0,0 +1,748 @@

+from __future__ import absolute_import
+import math
+import torch
+import torch.nn.functional as F
+from BERT_explainability.modules.layers_lrp import *
+from torch import nn
+from transformers import BertConfig, BertPreTrainedModel, PreTrainedModel
+from transformers.modeling_outputs import (BaseModelOutput,
+                                           BaseModelOutputWithPooling)
+ACT2FN = {
+    "relu": ReLU,
+    "tanh": Tanh,
+    "gelu": GELU,
+}
+def get_activation(activation_string):
+    if activation_string in ACT2FN:
+        return ACT2FN[activation_string]
+    else:
+        raise KeyError(
+            "function {} not found in ACT2FN mapping {}".format(
+                activation_string, list(ACT2FN.keys())
+            )
+        )
+def compute_rollout_attention(all_layer_matrices, start_layer=0):
+    # adding residual consideration
+    num_tokens = all_layer_matrices[0].shape[1]
+    batch_size = all_layer_matrices[0].shape[0]
+    eye = (
+        torch.eye(num_tokens)
+        .expand(batch_size, num_tokens, num_tokens)
+        .to(all_layer_matrices[0].device)
+    )
+    all_layer_matrices = [
+        all_layer_matrices[i] + eye for i in range(len(all_layer_matrices))
+    ]
+    all_layer_matrices = [
+        all_layer_matrices[i] / all_layer_matrices[i].sum(dim=-1, keepdim=True)
+        for i in range(len(all_layer_matrices))
+    ]
+    joint_attention = all_layer_matrices[start_layer]
+    for i in range(start_layer + 1, len(all_layer_matrices)):
+        joint_attention = all_layer_matrices[i].bmm(joint_attention)
+    return joint_attention
+class BertEmbeddings(nn.Module):
+    """Construct the embeddings from word, position and token_type embeddings."""
+    def __init__(self, config):
+        super().__init__()
+        self.word_embeddings = nn.Embedding(
+            config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id
+        )
+        self.position_embeddings = nn.Embedding(
+            config.max_position_embeddings, config.hidden_size
+        )
+        self.token_type_embeddings = nn.Embedding(
+            config.type_vocab_size, config.hidden_size
+        )
+        # self.LayerNorm is not snake-cased to stick with TensorFlow model variable name and be able to load
+        # any TensorFlow checkpoint file
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = Dropout(config.hidden_dropout_prob)
+        # position_ids (1, len position emb) is contiguous in memory and exported when serialized
+        self.register_buffer(
+            "position_ids", torch.arange(config.max_position_embeddings).expand((1, -1))
+        )
+        self.add1 = Add()
+        self.add2 = Add()
+    def forward(
+        self, input_ids=None, token_type_ids=None, position_ids=None, inputs_embeds=None
+    ):
+        if input_ids is not None:
+            input_shape = input_ids.size()
+        else:
+            input_shape = inputs_embeds.size()[:-1]
+        seq_length = input_shape[1]
+        if position_ids is None:
+            position_ids = self.position_ids[:, :seq_length]
+        if token_type_ids is None:
+            token_type_ids = torch.zeros(
+                input_shape, dtype=torch.long, device=self.position_ids.device
+            )
+        if inputs_embeds is None:
+            inputs_embeds = self.word_embeddings(input_ids)
+        position_embeddings = self.position_embeddings(position_ids)
+        token_type_embeddings = self.token_type_embeddings(token_type_ids)
+        # embeddings = inputs_embeds + position_embeddings + token_type_embeddings
+        embeddings = self.add1([token_type_embeddings, position_embeddings])
+        embeddings = self.add2([embeddings, inputs_embeds])
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+        return embeddings
+    def relprop(self, cam, **kwargs):
+        cam = self.dropout.relprop(cam, **kwargs)
+        cam = self.LayerNorm.relprop(cam, **kwargs)
+        # [inputs_embeds, position_embeddings, token_type_embeddings]
+        (cam) = self.add2.relprop(cam, **kwargs)
+        return cam
+class BertEncoder(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.config = config
+        self.layer = nn.ModuleList(
+            [BertLayer(config) for _ in range(config.num_hidden_layers)]
+        )
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=False,
+        output_hidden_states=False,
+        return_dict=False,
+    ):
+        all_hidden_states = () if output_hidden_states else None
+        all_attentions = () if output_attentions else None
+        for i, layer_module in enumerate(self.layer):
+            if output_hidden_states:
+                all_hidden_states = all_hidden_states + (hidden_states,)
+            layer_head_mask = head_mask[i] if head_mask is not None else None
+            if getattr(self.config, "gradient_checkpointing", False):
+                def create_custom_forward(module):
+                    def custom_forward(*inputs):
+                        return module(*inputs, output_attentions)
+                    return custom_forward
+                layer_outputs = torch.utils.checkpoint.checkpoint(
+                    create_custom_forward(layer_module),
+                    hidden_states,
+                    attention_mask,
+                    layer_head_mask,
+                )
+            else:
+                layer_outputs = layer_module(
+                    hidden_states,
+                    attention_mask,
+                    layer_head_mask,
+                    output_attentions,
+                )
+            hidden_states = layer_outputs[0]
+            if output_attentions:
+                all_attentions = all_attentions + (layer_outputs[1],)
+        if output_hidden_states:
+            all_hidden_states = all_hidden_states + (hidden_states,)
+        if not return_dict:
+            return tuple(
+                v
+                for v in [hidden_states, all_hidden_states, all_attentions]
+                if v is not None
+            )
+        return BaseModelOutput(
+            last_hidden_state=hidden_states,
+            hidden_states=all_hidden_states,
+            attentions=all_attentions,
+        )
+    def relprop(self, cam, **kwargs):
+        # assuming output_hidden_states is False
+        for layer_module in reversed(self.layer):
+            cam = layer_module.relprop(cam, **kwargs)
+        return cam
+# not adding relprop since this is only pooling at the end of the network, does not impact tokens importance
+class BertPooler(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = Linear(config.hidden_size, config.hidden_size)
+        self.activation = Tanh()
+        self.pool = IndexSelect()
+    def forward(self, hidden_states):
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token.
+        self._seq_size = hidden_states.shape[1]
+        # first_token_tensor = hidden_states[:, 0]
+        first_token_tensor = self.pool(
+            hidden_states, 1, torch.tensor(0, device=hidden_states.device)
+        )
+        first_token_tensor = first_token_tensor.squeeze(1)
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+    def relprop(self, cam, **kwargs):
+        cam = self.activation.relprop(cam, **kwargs)
+        # print(cam.sum())
+        cam = self.dense.relprop(cam, **kwargs)
+        # print(cam.sum())
+        cam = cam.unsqueeze(1)
+        cam = self.pool.relprop(cam, **kwargs)
+        # print(cam.sum())
+        return cam
+class BertAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.self = BertSelfAttention(config)
+        self.output = BertSelfOutput(config)
+        self.pruned_heads = set()
+        self.clone = Clone()
+    def prune_heads(self, heads):
+        if len(heads) == 0:
+            return
+        heads, index = find_pruneable_heads_and_indices(
+            heads,
+            self.self.num_attention_heads,
+            self.self.attention_head_size,
+            self.pruned_heads,
+        )
+        # Prune linear layers
+        self.self.query = prune_linear_layer(self.self.query, index)
+        self.self.key = prune_linear_layer(self.self.key, index)
+        self.self.value = prune_linear_layer(self.self.value, index)
+        self.output.dense = prune_linear_layer(self.output.dense, index, dim=1)
+        # Update hyper params and store pruned heads
+        self.self.num_attention_heads = self.self.num_attention_heads - len(heads)
+        self.self.all_head_size = (
+            self.self.attention_head_size * self.self.num_attention_heads
+        )
+        self.pruned_heads = self.pruned_heads.union(heads)
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=False,
+    ):
+        h1, h2 = self.clone(hidden_states, 2)
+        self_outputs = self.self(
+            h1,
+            attention_mask,
+            head_mask,
+            encoder_hidden_states,
+            encoder_attention_mask,
+            output_attentions,
+        )
+        attention_output = self.output(self_outputs[0], h2)
+        outputs = (attention_output,) + self_outputs[
+            1:
+        ]  # add attentions if we output them
+        return outputs
+    def relprop(self, cam, **kwargs):
+        # assuming that we don't ouput the attentions (outputs = (attention_output,)), self_outputs=(context_layer,)
+        (cam1, cam2) = self.output.relprop(cam, **kwargs)
+        # print(cam1.sum(), cam2.sum(), (cam1 + cam2).sum())
+        cam1 = self.self.relprop(cam1, **kwargs)
+        # print(cam1.sum(), cam2.sum(), (cam1 + cam2).sum())
+        return self.clone.relprop((cam1, cam2), **kwargs)
+class BertSelfAttention(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(
+            config, "embedding_size"
+        ):
+            raise ValueError(
+                "The hidden size (%d) is not a multiple of the number of attention "
+                "heads (%d)" % (config.hidden_size, config.num_attention_heads)
+            )
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+        self.query = Linear(config.hidden_size, self.all_head_size)
+        self.key = Linear(config.hidden_size, self.all_head_size)
+        self.value = Linear(config.hidden_size, self.all_head_size)
+        self.dropout = Dropout(config.attention_probs_dropout_prob)
+        self.matmul1 = MatMul()
+        self.matmul2 = MatMul()
+        self.softmax = Softmax(dim=-1)
+        self.add = Add()
+        self.mul = Mul()
+        self.head_mask = None
+        self.attention_mask = None
+        self.clone = Clone()
+        self.attn_cam = None
+        self.attn = None
+        self.attn_gradients = None
+    def get_attn(self):
+        return self.attn
+    def save_attn(self, attn):
+        self.attn = attn
+    def save_attn_cam(self, cam):
+        self.attn_cam = cam
+    def get_attn_cam(self):
+        return self.attn_cam
+    def save_attn_gradients(self, attn_gradients):
+        self.attn_gradients = attn_gradients
+    def get_attn_gradients(self):
+        return self.attn_gradients
+    def transpose_for_scores(self, x):
+        new_x_shape = x.size()[:-1] + (
+            self.num_attention_heads,
+            self.attention_head_size,
+        )
+        x = x.view(*new_x_shape)
+        return x.permute(0, 2, 1, 3)
+    def transpose_for_scores_relprop(self, x):
+        return x.permute(0, 2, 1, 3).flatten(2)
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=False,
+    ):
+        self.head_mask = head_mask
+        self.attention_mask = attention_mask
+        h1, h2, h3 = self.clone(hidden_states, 3)
+        mixed_query_layer = self.query(h1)
+        # If this is instantiated as a cross-attention module, the keys
+        # and values come from an encoder; the attention mask needs to be
+        # such that the encoder's padding tokens are not attended to.
+        if encoder_hidden_states is not None:
+            mixed_key_layer = self.key(encoder_hidden_states)
+            mixed_value_layer = self.value(encoder_hidden_states)
+            attention_mask = encoder_attention_mask
+        else:
+            mixed_key_layer = self.key(h2)
+            mixed_value_layer = self.value(h3)
+        query_layer = self.transpose_for_scores(mixed_query_layer)
+        key_layer = self.transpose_for_scores(mixed_key_layer)
+        value_layer = self.transpose_for_scores(mixed_value_layer)
+        # Take the dot product between "query" and "key" to get the raw attention scores.
+        attention_scores = self.matmul1([query_layer, key_layer.transpose(-1, -2)])
+        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
+        if attention_mask is not None:
+            # Apply the attention mask is (precomputed for all layers in BertModel forward() function)
+            attention_scores = self.add([attention_scores, attention_mask])
+        # Normalize the attention scores to probabilities.
+        attention_probs = self.softmax(attention_scores)
+        self.save_attn(attention_probs)
+        attention_probs.register_hook(self.save_attn_gradients)
+        # This is actually dropping out entire tokens to attend to, which might
+        # seem a bit unusual, but is taken from the original Transformer paper.
+        attention_probs = self.dropout(attention_probs)
+        # Mask heads if we want to
+        if head_mask is not None:
+            attention_probs = attention_probs * head_mask
+        context_layer = self.matmul2([attention_probs, value_layer])
+        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
+        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
+        context_layer = context_layer.view(*new_context_layer_shape)
+        outputs = (
+            (context_layer, attention_probs) if output_attentions else (context_layer,)
+        )
+        return outputs
+    def relprop(self, cam, **kwargs):
+        # Assume output_attentions == False
+        cam = self.transpose_for_scores(cam)
+        # [attention_probs, value_layer]
+        (cam1, cam2) = self.matmul2.relprop(cam, **kwargs)
+        cam1 /= 2
+        cam2 /= 2
+        if self.head_mask is not None:
+            # [attention_probs, head_mask]
+            (cam1, _) = self.mul.relprop(cam1, **kwargs)
+        self.save_attn_cam(cam1)
+        cam1 = self.dropout.relprop(cam1, **kwargs)
+        cam1 = self.softmax.relprop(cam1, **kwargs)
+        if self.attention_mask is not None:
+            # [attention_scores, attention_mask]
+            (cam1, _) = self.add.relprop(cam1, **kwargs)
+        # [query_layer, key_layer.transpose(-1, -2)]
+        (cam1_1, cam1_2) = self.matmul1.relprop(cam1, **kwargs)
+        cam1_1 /= 2
+        cam1_2 /= 2
+        # query
+        cam1_1 = self.transpose_for_scores_relprop(cam1_1)
+        cam1_1 = self.query.relprop(cam1_1, **kwargs)
+        # key
+        cam1_2 = self.transpose_for_scores_relprop(cam1_2.transpose(-1, -2))
+        cam1_2 = self.key.relprop(cam1_2, **kwargs)
+        # value
+        cam2 = self.transpose_for_scores_relprop(cam2)
+        cam2 = self.value.relprop(cam2, **kwargs)
+        cam = self.clone.relprop((cam1_1, cam1_2, cam2), **kwargs)
+        return cam
+class BertSelfOutput(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = Dropout(config.hidden_dropout_prob)
+        self.add = Add()
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        add = self.add([hidden_states, input_tensor])
+        hidden_states = self.LayerNorm(add)
+        return hidden_states
+    def relprop(self, cam, **kwargs):
+        cam = self.LayerNorm.relprop(cam, **kwargs)
+        # [hidden_states, input_tensor]
+        (cam1, cam2) = self.add.relprop(cam, **kwargs)
+        cam1 = self.dropout.relprop(cam1, **kwargs)
+        cam1 = self.dense.relprop(cam1, **kwargs)
+        return (cam1, cam2)
+class BertIntermediate(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = Linear(config.hidden_size, config.intermediate_size)
+        if isinstance(config.hidden_act, str):
+            self.intermediate_act_fn = ACT2FN[config.hidden_act]()
+        else:
+            self.intermediate_act_fn = config.hidden_act
+    def forward(self, hidden_states):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+    def relprop(self, cam, **kwargs):
+        cam = self.intermediate_act_fn.relprop(cam, **kwargs)  # FIXME only ReLU
+        # print(cam.sum())
+        cam = self.dense.relprop(cam, **kwargs)
+        # print(cam.sum())
+        return cam
+class BertOutput(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.dense = Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
+        self.dropout = Dropout(config.hidden_dropout_prob)
+        self.add = Add()
+    def forward(self, hidden_states, input_tensor):
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        add = self.add([hidden_states, input_tensor])
+        hidden_states = self.LayerNorm(add)
+        return hidden_states
+    def relprop(self, cam, **kwargs):
+        # print("in", cam.sum())
+        cam = self.LayerNorm.relprop(cam, **kwargs)
+        # print(cam.sum())
+        # [hidden_states, input_tensor]
+        (cam1, cam2) = self.add.relprop(cam, **kwargs)
+        # print("add", cam1.sum(), cam2.sum(), cam1.sum() + cam2.sum())
+        cam1 = self.dropout.relprop(cam1, **kwargs)
+        # print(cam1.sum())
+        cam1 = self.dense.relprop(cam1, **kwargs)
+        # print("dense", cam1.sum())
+        # print("out", cam1.sum() + cam2.sum(), cam1.sum(), cam2.sum())
+        return (cam1, cam2)
+class BertLayer(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.attention = BertAttention(config)
+        self.intermediate = BertIntermediate(config)
+        self.output = BertOutput(config)
+        self.clone = Clone()
+    def forward(
+        self,
+        hidden_states,
+        attention_mask=None,
+        head_mask=None,
+        output_attentions=False,
+    ):
+        self_attention_outputs = self.attention(
+            hidden_states,
+            attention_mask,
+            head_mask,
+            output_attentions=output_attentions,
+        )
+        attention_output = self_attention_outputs[0]
+        outputs = self_attention_outputs[
+            1:
+        ]  # add self attentions if we output attention weights
+        ao1, ao2 = self.clone(attention_output, 2)
+        intermediate_output = self.intermediate(ao1)
+        layer_output = self.output(intermediate_output, ao2)
+        outputs = (layer_output,) + outputs
+        return outputs
+    def relprop(self, cam, **kwargs):
+        (cam1, cam2) = self.output.relprop(cam, **kwargs)
+        # print("output", cam1.sum(), cam2.sum(), cam1.sum() + cam2.sum())
+        cam1 = self.intermediate.relprop(cam1, **kwargs)
+        # print("intermediate", cam1.sum())
+        cam = self.clone.relprop((cam1, cam2), **kwargs)
+        # print("clone", cam.sum())
+        cam = self.attention.relprop(cam, **kwargs)
+        # print("attention", cam.sum())
+        return cam
+class BertModel(BertPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.config = config
+        self.embeddings = BertEmbeddings(config)
+        self.encoder = BertEncoder(config)
+        self.pooler = BertPooler(config)
+        self.init_weights()
+    def get_input_embeddings(self):
+        return self.embeddings.word_embeddings
+    def set_input_embeddings(self, value):
+        self.embeddings.word_embeddings = value
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        encoder_hidden_states=None,
+        encoder_attention_mask=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+    ):
+        r"""
+        encoder_hidden_states  (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
+            Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention
+            if the model is configured as a decoder.
+        encoder_attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
+            Mask to avoid performing attention on the padding token indices of the encoder input. This mask
+            is used in the cross-attention if the model is configured as a decoder.
+            Mask values selected in ``[0, 1]``:
+            ``1`` for tokens that are NOT MASKED, ``0`` for MASKED tokens.
+        """
+        output_attentions = (
+            output_attentions
+            if output_attentions is not None
+            else self.config.output_attentions
+        )
+        output_hidden_states = (
+            output_hidden_states
+            if output_hidden_states is not None
+            else self.config.output_hidden_states
+        )
+        return_dict = (
+            return_dict if return_dict is not None else self.config.use_return_dict
+        )
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError(
+                "You cannot specify both input_ids and inputs_embeds at the same time"
+            )
+        elif input_ids is not None:
+            input_shape = input_ids.size()
+        elif inputs_embeds is not None:
+            input_shape = inputs_embeds.size()[:-1]
+        else:
+            raise ValueError("You have to specify either input_ids or inputs_embeds")
+        device = input_ids.device if input_ids is not None else inputs_embeds.device
+        if attention_mask is None:
+            attention_mask = torch.ones(input_shape, device=device)
+        if token_type_ids is None:
+            token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device)
+        # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
+        # ourselves in which case we just need to make it broadcastable to all heads.
+        extended_attention_mask: torch.Tensor = self.get_extended_attention_mask(
+            attention_mask, input_shape, device
+        )
+        # If a 2D or 3D attention mask is provided for the cross-attention
+        # we need to make broadcastable to [batch_size, num_heads, seq_length, seq_length]
+        if self.config.is_decoder and encoder_hidden_states is not None:
+            (
+                encoder_batch_size,
+                encoder_sequence_length,
+                _,
+            ) = encoder_hidden_states.size()
+            encoder_hidden_shape = (encoder_batch_size, encoder_sequence_length)
+            if encoder_attention_mask is None:
+                encoder_attention_mask = torch.ones(encoder_hidden_shape, device=device)
+            encoder_extended_attention_mask = self.invert_attention_mask(
+                encoder_attention_mask
+            )
+        else:
+            encoder_extended_attention_mask = None
+        # Prepare head mask if needed
+        # 1.0 in head_mask indicate we keep the head
+        # attention_probs has shape bsz x n_heads x N x N
+        # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
+        # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
+        head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
+        embedding_output = self.embeddings(
+            input_ids=input_ids,
+            position_ids=position_ids,
+            token_type_ids=token_type_ids,
+            inputs_embeds=inputs_embeds,
+        )
+        encoder_outputs = self.encoder(
+            embedding_output,
+            attention_mask=extended_attention_mask,
+            head_mask=head_mask,
+            encoder_hidden_states=encoder_hidden_states,
+            encoder_attention_mask=encoder_extended_attention_mask,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        sequence_output = encoder_outputs[0]
+        pooled_output = self.pooler(sequence_output)
+        if not return_dict:
+            return (sequence_output, pooled_output) + encoder_outputs[1:]
+        return BaseModelOutputWithPooling(
+            last_hidden_state=sequence_output,
+            pooler_output=pooled_output,
+            hidden_states=encoder_outputs.hidden_states,
+            attentions=encoder_outputs.attentions,
+        )
+    def relprop(self, cam, **kwargs):
+        cam = self.pooler.relprop(cam, **kwargs)
+        # print("111111111111",cam.sum())
+        cam = self.encoder.relprop(cam, **kwargs)
+        # print("222222222222222", cam.sum())
+        # print("conservation: ", cam.sum())
+        return cam
+if __name__ == "__main__":
+    class Config:
+        def __init__(
+            self, hidden_size, num_attention_heads, attention_probs_dropout_prob
+        ):
+            self.hidden_size = hidden_size
+            self.num_attention_heads = num_attention_heads
+            self.attention_probs_dropout_prob = attention_probs_dropout_prob
+    model = BertSelfAttention(Config(1024, 4, 0.1))
+    x = torch.rand(2, 20, 1024)
+    x.requires_grad_()
+    model.eval()
+    y = model.forward(x)
+    relprop = model.relprop(torch.rand(2, 20, 1024), (torch.rand(2, 20, 1024),))
+    print(relprop[1][0].shape)

Transformer-Explainability/BERT_explainability/modules/BERT/BertForSequenceClassification.py ADDED Viewed

	@@ -0,0 +1,241 @@

+from typing import Any, List
+import torch
+import torch.nn as nn
+from BERT_explainability.modules.BERT.BERT import BertModel
+from BERT_explainability.modules.layers_ours import *
+from BERT_rationale_benchmark.models.model_utils import PaddedSequence
+from torch.nn import CrossEntropyLoss, MSELoss
+from transformers import BertPreTrainedModel
+from transformers.utils import logging
+class BertForSequenceClassification(BertPreTrainedModel):
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.bert = BertModel(config)
+        self.dropout = Dropout(config.hidden_dropout_prob)
+        self.classifier = Linear(config.hidden_size, config.num_labels)
+        self.init_weights()
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+    ):
+        r"""
+        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
+            Labels for computing the sequence classification/regression loss.
+            Indices should be in :obj:`[0, ..., config.num_labels - 1]`.
+            If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
+            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
+        """
+        return_dict = (
+            return_dict if return_dict is not None else self.config.use_return_dict
+        )
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict,
+        )
+        pooled_output = outputs[1]
+        pooled_output = self.dropout(pooled_output)
+        logits = self.classifier(pooled_output)
+        loss = None
+        if labels is not None:
+            if self.num_labels == 1:
+                #  We are doing regression
+                loss_fct = MSELoss()
+                loss = loss_fct(logits.view(-1), labels.view(-1))
+            else:
+                loss_fct = CrossEntropyLoss()
+                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+        if not return_dict:
+            output = (logits,) + outputs[2:]
+            return ((loss,) + output) if loss is not None else output
+        return SequenceClassifierOutput(
+            loss=loss,
+            logits=logits,
+            hidden_states=outputs.hidden_states,
+            attentions=outputs.attentions,
+        )
+    def relprop(self, cam=None, **kwargs):
+        cam = self.classifier.relprop(cam, **kwargs)
+        cam = self.dropout.relprop(cam, **kwargs)
+        cam = self.bert.relprop(cam, **kwargs)
+        # print("conservation: ", cam.sum())
+        return cam
+# this is the actual classifier we will be using
+class BertClassifier(nn.Module):
+    """Thin wrapper around BertForSequenceClassification"""
+    def __init__(
+        self,
+        bert_dir: str,
+        pad_token_id: int,
+        cls_token_id: int,
+        sep_token_id: int,
+        num_labels: int,
+        max_length: int = 512,
+        use_half_precision=True,
+    ):
+        super(BertClassifier, self).__init__()
+        bert = BertForSequenceClassification.from_pretrained(
+            bert_dir, num_labels=num_labels
+        )
+        if use_half_precision:
+            import apex
+            bert = bert.half()
+        self.bert = bert
+        self.pad_token_id = pad_token_id
+        self.cls_token_id = cls_token_id
+        self.sep_token_id = sep_token_id
+        self.max_length = max_length
+    def forward(
+        self,
+        query: List[torch.tensor],
+        docids: List[Any],
+        document_batch: List[torch.tensor],
+    ):
+        assert len(query) == len(document_batch)
+        print(query)
+        # note about device management:
+        # since distributed training is enabled, the inputs to this module can be on *any* device (preferably cpu, since we wrap and unwrap the module)
+        # we want to keep these params on the input device (assuming CPU) for as long as possible for cheap memory access
+        target_device = next(self.parameters()).device
+        cls_token = torch.tensor([self.cls_token_id]).to(
+            device=document_batch[0].device
+        )
+        sep_token = torch.tensor([self.sep_token_id]).to(
+            device=document_batch[0].device
+        )
+        input_tensors = []
+        position_ids = []
+        for q, d in zip(query, document_batch):
+            if len(q) + len(d) + 2 > self.max_length:
+                d = d[: (self.max_length - len(q) - 2)]
+            input_tensors.append(torch.cat([cls_token, q, sep_token, d]))
+            position_ids.append(
+                torch.tensor(list(range(0, len(q) + 1)) + list(range(0, len(d) + 1)))
+            )
+        bert_input = PaddedSequence.autopad(
+            input_tensors,
+            batch_first=True,
+            padding_value=self.pad_token_id,
+            device=target_device,
+        )
+        positions = PaddedSequence.autopad(
+            position_ids, batch_first=True, padding_value=0, device=target_device
+        )
+        (classes,) = self.bert(
+            bert_input.data,
+            attention_mask=bert_input.mask(
+                on=0.0, off=float("-inf"), device=target_device
+            ),
+            position_ids=positions.data,
+        )
+        assert torch.all(classes == classes)  # for nans
+        print(input_tensors[0])
+        print(self.relprop()[0])
+        return classes
+    def relprop(self, cam=None, **kwargs):
+        return self.bert.relprop(cam, **kwargs)
+if __name__ == "__main__":
+    import os
+    from transformers import BertTokenizer
+    class Config:
+        def __init__(
+            self,
+            hidden_size,
+            num_attention_heads,
+            attention_probs_dropout_prob,
+            num_labels,
+            hidden_dropout_prob,
+        ):
+            self.hidden_size = hidden_size
+            self.num_attention_heads = num_attention_heads
+            self.attention_probs_dropout_prob = attention_probs_dropout_prob
+            self.num_labels = num_labels
+            self.hidden_dropout_prob = hidden_dropout_prob
+    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
+    x = tokenizer.encode_plus(
+        "In this movie the acting is great. The movie is perfect! [sep]",
+        add_special_tokens=True,
+        max_length=512,
+        return_token_type_ids=False,
+        return_attention_mask=True,
+        pad_to_max_length=True,
+        return_tensors="pt",
+        truncation=True,
+    )
+    print(x["input_ids"])
+    model = BertForSequenceClassification.from_pretrained(
+        "bert-base-uncased", num_labels=2
+    )
+    model_save_file = os.path.join(
+        "./BERT_explainability/output_bert/movies/classifier/", "classifier.pt"
+    )
+    model.load_state_dict(torch.load(model_save_file))
+    # x = torch.randint(100, (2, 20))
+    # x = torch.tensor([[101, 2054, 2003, 1996, 15792, 1997, 2023, 3319, 1029, 102,
+    #                    101, 4079, 102, 101, 6732, 102, 101, 2643, 102, 101,
+    #                    2038, 102, 101, 1037, 102, 101, 2933, 102, 101, 2005,
+    #                    102, 101, 2032, 102, 101, 1010, 102, 101, 1037, 102,
+    #                    101, 3800, 102, 101, 2005, 102, 101, 2010, 102, 101,
+    #                    2166, 102, 101, 1010, 102, 101, 1998, 102, 101, 2010,
+    #                    102, 101, 4650, 102, 101, 1010, 102, 101, 2002, 102,
+    #                    101, 2074, 102, 101, 2515, 102, 101, 1050, 102, 101,
+    #                    1005, 102, 101, 1056, 102, 101, 2113, 102, 101, 2054,
+    #                    102, 101, 1012, 102]])
+    # x.requires_grad_()
+    model.eval()
+    y = model(x["input_ids"], x["attention_mask"])
+    print(y)
+    cam, _ = model.relprop()
+    # print(cam.shape)
+    cam = cam.sum(-1)
+    # print(cam)

Transformer-Explainability/BERT_explainability/modules/BERT/ExplanationGenerator.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import argparse
+import glob
+import numpy as np
+import torch
+# compute rollout between attention layers
+def compute_rollout_attention(all_layer_matrices, start_layer=0):
+    # adding residual consideration- code adapted from https://github.com/samiraabnar/attention_flow
+    num_tokens = all_layer_matrices[0].shape[1]
+    batch_size = all_layer_matrices[0].shape[0]
+    eye = (
+        torch.eye(num_tokens)
+        .expand(batch_size, num_tokens, num_tokens)
+        .to(all_layer_matrices[0].device)
+    )
+    all_layer_matrices = [
+        all_layer_matrices[i] + eye for i in range(len(all_layer_matrices))
+    ]
+    matrices_aug = [
+        all_layer_matrices[i] / all_layer_matrices[i].sum(dim=-1, keepdim=True)
+        for i in range(len(all_layer_matrices))
+    ]
+    joint_attention = matrices_aug[start_layer]
+    for i in range(start_layer + 1, len(matrices_aug)):
+        joint_attention = matrices_aug[i].bmm(joint_attention)
+    return joint_attention
+class Generator:
+    def __init__(self, model):
+        self.model = model
+        self.model.eval()
+    def forward(self, input_ids, attention_mask):
+        return self.model(input_ids, attention_mask)
+    def generate_LRP(self, input_ids, attention_mask, index=None, start_layer=11):
+        output = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
+        kwargs = {"alpha": 1}
+        if index == None:
+            index = np.argmax(output.cpu().data.numpy(), axis=-1)
+        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
+        one_hot[0, index] = 1
+        one_hot_vector = one_hot
+        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
+        one_hot = torch.sum(one_hot.cuda() * output)
+        self.model.zero_grad()
+        one_hot.backward(retain_graph=True)
+        self.model.relprop(torch.tensor(one_hot_vector).to(input_ids.device), **kwargs)
+        cams = []
+        blocks = self.model.bert.encoder.layer
+        for blk in blocks:
+            grad = blk.attention.self.get_attn_gradients()
+            cam = blk.attention.self.get_attn_cam()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+            cam = grad * cam
+            cam = cam.clamp(min=0).mean(dim=0)
+            cams.append(cam.unsqueeze(0))
+        rollout = compute_rollout_attention(cams, start_layer=start_layer)
+        rollout[:, 0, 0] = rollout[:, 0].min()
+        return rollout[:, 0]
+    def generate_LRP_last_layer(self, input_ids, attention_mask, index=None):
+        output = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
+        kwargs = {"alpha": 1}
+        if index == None:
+            index = np.argmax(output.cpu().data.numpy(), axis=-1)
+        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
+        one_hot[0, index] = 1
+        one_hot_vector = one_hot
+        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
+        one_hot = torch.sum(one_hot.cuda() * output)
+        self.model.zero_grad()
+        one_hot.backward(retain_graph=True)
+        self.model.relprop(torch.tensor(one_hot_vector).to(input_ids.device), **kwargs)
+        cam = self.model.bert.encoder.layer[-1].attention.self.get_attn_cam()[0]
+        cam = cam.clamp(min=0).mean(dim=0).unsqueeze(0)
+        cam[:, 0, 0] = 0
+        return cam[:, 0]
+    def generate_full_lrp(self, input_ids, attention_mask, index=None):
+        output = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
+        kwargs = {"alpha": 1}
+        if index == None:
+            index = np.argmax(output.cpu().data.numpy(), axis=-1)
+        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
+        one_hot[0, index] = 1
+        one_hot_vector = one_hot
+        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
+        one_hot = torch.sum(one_hot.cuda() * output)
+        self.model.zero_grad()
+        one_hot.backward(retain_graph=True)
+        cam = self.model.relprop(
+            torch.tensor(one_hot_vector).to(input_ids.device), **kwargs
+        )
+        cam = cam.sum(dim=2)
+        cam[:, 0] = 0
+        return cam
+    def generate_attn_last_layer(self, input_ids, attention_mask, index=None):
+        output = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
+        cam = self.model.bert.encoder.layer[-1].attention.self.get_attn()[0]
+        cam = cam.mean(dim=0).unsqueeze(0)
+        cam[:, 0, 0] = 0
+        return cam[:, 0]
+    def generate_rollout(self, input_ids, attention_mask, start_layer=0, index=None):
+        self.model.zero_grad()
+        output = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
+        blocks = self.model.bert.encoder.layer
+        all_layer_attentions = []
+        for blk in blocks:
+            attn_heads = blk.attention.self.get_attn()
+            avg_heads = (attn_heads.sum(dim=1) / attn_heads.shape[1]).detach()
+            all_layer_attentions.append(avg_heads)
+        rollout = compute_rollout_attention(
+            all_layer_attentions, start_layer=start_layer
+        )
+        rollout[:, 0, 0] = 0
+        return rollout[:, 0]
+    def generate_attn_gradcam(self, input_ids, attention_mask, index=None):
+        output = self.model(input_ids=input_ids, attention_mask=attention_mask)[0]
+        kwargs = {"alpha": 1}
+        if index == None:
+            index = np.argmax(output.cpu().data.numpy(), axis=-1)
+        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
+        one_hot[0, index] = 1
+        one_hot_vector = one_hot
+        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
+        one_hot = torch.sum(one_hot.cuda() * output)
+        self.model.zero_grad()
+        one_hot.backward(retain_graph=True)
+        self.model.relprop(torch.tensor(one_hot_vector).to(input_ids.device), **kwargs)
+        cam = self.model.bert.encoder.layer[-1].attention.self.get_attn()
+        grad = self.model.bert.encoder.layer[-1].attention.self.get_attn_gradients()
+        cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+        grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+        grad = grad.mean(dim=[1, 2], keepdim=True)
+        cam = (cam * grad).mean(0).clamp(min=0).unsqueeze(0)
+        cam = (cam - cam.min()) / (cam.max() - cam.min())
+        cam[:, 0, 0] = 0
+        return cam[:, 0]

Transformer-Explainability/BERT_explainability/modules/__init__.py ADDED Viewed

File without changes

Transformer-Explainability/BERT_explainability/modules/layers_lrp.py ADDED Viewed

	@@ -0,0 +1,352 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+__all__ = [
+    "forward_hook",
+    "Clone",
+    "Add",
+    "Cat",
+    "ReLU",
+    "GELU",
+    "Dropout",
+    "BatchNorm2d",
+    "Linear",
+    "MaxPool2d",
+    "AdaptiveAvgPool2d",
+    "AvgPool2d",
+    "Conv2d",
+    "Sequential",
+    "safe_divide",
+    "einsum",
+    "Softmax",
+    "IndexSelect",
+    "LayerNorm",
+    "AddEye",
+    "Tanh",
+    "MatMul",
+    "Mul",
+]
+def safe_divide(a, b):
+    den = b.clamp(min=1e-9) + b.clamp(max=1e-9)
+    den = den + den.eq(0).type(den.type()) * 1e-9
+    return a / den * b.ne(0).type(b.type())
+def forward_hook(self, input, output):
+    if type(input[0]) in (list, tuple):
+        self.X = []
+        for i in input[0]:
+            x = i.detach()
+            x.requires_grad = True
+            self.X.append(x)
+    else:
+        self.X = input[0].detach()
+        self.X.requires_grad = True
+    self.Y = output
+def backward_hook(self, grad_input, grad_output):
+    self.grad_input = grad_input
+    self.grad_output = grad_output
+class RelProp(nn.Module):
+    def __init__(self):
+        super(RelProp, self).__init__()
+        # if not self.training:
+        self.register_forward_hook(forward_hook)
+    def gradprop(self, Z, X, S):
+        C = torch.autograd.grad(Z, X, S, retain_graph=True)
+        return C
+    def relprop(self, R, alpha):
+        return R
+class RelPropSimple(RelProp):
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        if torch.is_tensor(self.X) == False:
+            outputs = []
+            outputs.append(self.X[0] * C[0])
+            outputs.append(self.X[1] * C[1])
+        else:
+            outputs = self.X * (C[0])
+        return outputs
+class AddEye(RelPropSimple):
+    # input of shape B, C, seq_len, seq_len
+    def forward(self, input):
+        return input + torch.eye(input.shape[2]).expand_as(input).to(input.device)
+class ReLU(nn.ReLU, RelProp):
+    pass
+class Tanh(nn.Tanh, RelProp):
+    pass
+class GELU(nn.GELU, RelProp):
+    pass
+class Softmax(nn.Softmax, RelProp):
+    pass
+class LayerNorm(nn.LayerNorm, RelProp):
+    pass
+class Dropout(nn.Dropout, RelProp):
+    pass
+class MaxPool2d(nn.MaxPool2d, RelPropSimple):
+    pass
+class LayerNorm(nn.LayerNorm, RelProp):
+    pass
+class AdaptiveAvgPool2d(nn.AdaptiveAvgPool2d, RelPropSimple):
+    pass
+class MatMul(RelPropSimple):
+    def forward(self, inputs):
+        return torch.matmul(*inputs)
+class Mul(RelPropSimple):
+    def forward(self, inputs):
+        return torch.mul(*inputs)
+class AvgPool2d(nn.AvgPool2d, RelPropSimple):
+    pass
+class Add(RelPropSimple):
+    def forward(self, inputs):
+        return torch.add(*inputs)
+class einsum(RelPropSimple):
+    def __init__(self, equation):
+        super().__init__()
+        self.equation = equation
+    def forward(self, *operands):
+        return torch.einsum(self.equation, *operands)
+class IndexSelect(RelProp):
+    def forward(self, inputs, dim, indices):
+        self.__setattr__("dim", dim)
+        self.__setattr__("indices", indices)
+        return torch.index_select(inputs, dim, indices)
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X, self.dim, self.indices)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        if torch.is_tensor(self.X) == False:
+            outputs = []
+            outputs.append(self.X[0] * C[0])
+            outputs.append(self.X[1] * C[1])
+        else:
+            outputs = self.X * (C[0])
+        return outputs
+class Clone(RelProp):
+    def forward(self, input, num):
+        self.__setattr__("num", num)
+        outputs = []
+        for _ in range(num):
+            outputs.append(input)
+        return outputs
+    def relprop(self, R, alpha):
+        Z = []
+        for _ in range(self.num):
+            Z.append(self.X)
+        S = [safe_divide(r, z) for r, z in zip(R, Z)]
+        C = self.gradprop(Z, self.X, S)[0]
+        R = self.X * C
+        return R
+class Cat(RelProp):
+    def forward(self, inputs, dim):
+        self.__setattr__("dim", dim)
+        return torch.cat(inputs, dim)
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X, self.dim)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        outputs = []
+        for x, c in zip(self.X, C):
+            outputs.append(x * c)
+        return outputs
+class Sequential(nn.Sequential):
+    def relprop(self, R, alpha):
+        for m in reversed(self._modules.values()):
+            R = m.relprop(R, alpha)
+        return R
+class BatchNorm2d(nn.BatchNorm2d, RelProp):
+    def relprop(self, R, alpha):
+        X = self.X
+        beta = 1 - alpha
+        weight = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3) / (
+            (
+                self.running_var.unsqueeze(0).unsqueeze(2).unsqueeze(3).pow(2)
+                + self.eps
+            ).pow(0.5)
+        )
+        Z = X * weight + 1e-9
+        S = R / Z
+        Ca = S * weight
+        R = self.X * (Ca)
+        return R
+class Linear(nn.Linear, RelProp):
+    def relprop(self, R, alpha):
+        beta = alpha - 1
+        pw = torch.clamp(self.weight, min=0)
+        nw = torch.clamp(self.weight, max=0)
+        px = torch.clamp(self.X, min=0)
+        nx = torch.clamp(self.X, max=0)
+        def f(w1, w2, x1, x2):
+            Z1 = F.linear(x1, w1)
+            Z2 = F.linear(x2, w2)
+            S1 = safe_divide(R, Z1)
+            S2 = safe_divide(R, Z2)
+            C1 = x1 * torch.autograd.grad(Z1, x1, S1)[0]
+            C2 = x2 * torch.autograd.grad(Z2, x2, S2)[0]
+            return C1 + C2
+        activator_relevances = f(pw, nw, px, nx)
+        inhibitor_relevances = f(nw, pw, px, nx)
+        R = alpha * activator_relevances - beta * inhibitor_relevances
+        return R
+class Conv2d(nn.Conv2d, RelProp):
+    def gradprop2(self, DY, weight):
+        Z = self.forward(self.X)
+        output_padding = self.X.size()[2] - (
+            (Z.size()[2] - 1) * self.stride[0]
+            - 2 * self.padding[0]
+            + self.kernel_size[0]
+        )
+        return F.conv_transpose2d(
+            DY,
+            weight,
+            stride=self.stride,
+            padding=self.padding,
+            output_padding=output_padding,
+        )
+    def relprop(self, R, alpha):
+        if self.X.shape[1] == 3:
+            pw = torch.clamp(self.weight, min=0)
+            nw = torch.clamp(self.weight, max=0)
+            X = self.X
+            L = (
+                self.X * 0
+                + torch.min(
+                    torch.min(
+                        torch.min(self.X, dim=1, keepdim=True)[0], dim=2, keepdim=True
+                    )[0],
+                    dim=3,
+                    keepdim=True,
+                )[0]
+            )
+            H = (
+                self.X * 0
+                + torch.max(
+                    torch.max(
+                        torch.max(self.X, dim=1, keepdim=True)[0], dim=2, keepdim=True
+                    )[0],
+                    dim=3,
+                    keepdim=True,
+                )[0]
+            )
+            Za = (
+                torch.conv2d(
+                    X, self.weight, bias=None, stride=self.stride, padding=self.padding
+                )
+                - torch.conv2d(
+                    L, pw, bias=None, stride=self.stride, padding=self.padding
+                )
+                - torch.conv2d(
+                    H, nw, bias=None, stride=self.stride, padding=self.padding
+                )
+                + 1e-9
+            )
+            S = R / Za
+            C = (
+                X * self.gradprop2(S, self.weight)
+                - L * self.gradprop2(S, pw)
+                - H * self.gradprop2(S, nw)
+            )
+            R = C
+        else:
+            beta = alpha - 1
+            pw = torch.clamp(self.weight, min=0)
+            nw = torch.clamp(self.weight, max=0)
+            px = torch.clamp(self.X, min=0)
+            nx = torch.clamp(self.X, max=0)
+            def f(w1, w2, x1, x2):
+                Z1 = F.conv2d(
+                    x1, w1, bias=None, stride=self.stride, padding=self.padding
+                )
+                Z2 = F.conv2d(
+                    x2, w2, bias=None, stride=self.stride, padding=self.padding
+                )
+                S1 = safe_divide(R, Z1)
+                S2 = safe_divide(R, Z2)
+                C1 = x1 * self.gradprop(Z1, x1, S1)[0]
+                C2 = x2 * self.gradprop(Z2, x2, S2)[0]
+                return C1 + C2
+            activator_relevances = f(pw, nw, px, nx)
+            inhibitor_relevances = f(nw, pw, px, nx)
+            R = alpha * activator_relevances - beta * inhibitor_relevances
+        return R

Transformer-Explainability/BERT_explainability/modules/layers_ours.py ADDED Viewed

	@@ -0,0 +1,373 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+__all__ = [
+    "forward_hook",
+    "Clone",
+    "Add",
+    "Cat",
+    "ReLU",
+    "GELU",
+    "Dropout",
+    "BatchNorm2d",
+    "Linear",
+    "MaxPool2d",
+    "AdaptiveAvgPool2d",
+    "AvgPool2d",
+    "Conv2d",
+    "Sequential",
+    "safe_divide",
+    "einsum",
+    "Softmax",
+    "IndexSelect",
+    "LayerNorm",
+    "AddEye",
+    "Tanh",
+    "MatMul",
+    "Mul",
+]
+def safe_divide(a, b):
+    den = b.clamp(min=1e-9) + b.clamp(max=1e-9)
+    den = den + den.eq(0).type(den.type()) * 1e-9
+    return a / den * b.ne(0).type(b.type())
+def forward_hook(self, input, output):
+    if type(input[0]) in (list, tuple):
+        self.X = []
+        for i in input[0]:
+            x = i.detach()
+            x.requires_grad = True
+            self.X.append(x)
+    else:
+        self.X = input[0].detach()
+        self.X.requires_grad = True
+    self.Y = output
+def backward_hook(self, grad_input, grad_output):
+    self.grad_input = grad_input
+    self.grad_output = grad_output
+class RelProp(nn.Module):
+    def __init__(self):
+        super(RelProp, self).__init__()
+        # if not self.training:
+        self.register_forward_hook(forward_hook)
+    def gradprop(self, Z, X, S):
+        C = torch.autograd.grad(Z, X, S, retain_graph=True)
+        return C
+    def relprop(self, R, alpha):
+        return R
+class RelPropSimple(RelProp):
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        if torch.is_tensor(self.X) == False:
+            outputs = []
+            outputs.append(self.X[0] * C[0])
+            outputs.append(self.X[1] * C[1])
+        else:
+            outputs = self.X * (C[0])
+        return outputs
+class AddEye(RelPropSimple):
+    # input of shape B, C, seq_len, seq_len
+    def forward(self, input):
+        return input + torch.eye(input.shape[2]).expand_as(input).to(input.device)
+class ReLU(nn.ReLU, RelProp):
+    pass
+class GELU(nn.GELU, RelProp):
+    pass
+class Softmax(nn.Softmax, RelProp):
+    pass
+class Mul(RelPropSimple):
+    def forward(self, inputs):
+        return torch.mul(*inputs)
+class Tanh(nn.Tanh, RelProp):
+    pass
+class LayerNorm(nn.LayerNorm, RelProp):
+    pass
+class Dropout(nn.Dropout, RelProp):
+    pass
+class MatMul(RelPropSimple):
+    def forward(self, inputs):
+        return torch.matmul(*inputs)
+class MaxPool2d(nn.MaxPool2d, RelPropSimple):
+    pass
+class LayerNorm(nn.LayerNorm, RelProp):
+    pass
+class AdaptiveAvgPool2d(nn.AdaptiveAvgPool2d, RelPropSimple):
+    pass
+class AvgPool2d(nn.AvgPool2d, RelPropSimple):
+    pass
+class Add(RelPropSimple):
+    def forward(self, inputs):
+        return torch.add(*inputs)
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        a = self.X[0] * C[0]
+        b = self.X[1] * C[1]
+        a_sum = a.sum()
+        b_sum = b.sum()
+        a_fact = safe_divide(a_sum.abs(), a_sum.abs() + b_sum.abs()) * R.sum()
+        b_fact = safe_divide(b_sum.abs(), a_sum.abs() + b_sum.abs()) * R.sum()
+        a = a * safe_divide(a_fact, a.sum())
+        b = b * safe_divide(b_fact, b.sum())
+        outputs = [a, b]
+        return outputs
+class einsum(RelPropSimple):
+    def __init__(self, equation):
+        super().__init__()
+        self.equation = equation
+    def forward(self, *operands):
+        return torch.einsum(self.equation, *operands)
+class IndexSelect(RelProp):
+    def forward(self, inputs, dim, indices):
+        self.__setattr__("dim", dim)
+        self.__setattr__("indices", indices)
+        return torch.index_select(inputs, dim, indices)
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X, self.dim, self.indices)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        if torch.is_tensor(self.X) == False:
+            outputs = []
+            outputs.append(self.X[0] * C[0])
+            outputs.append(self.X[1] * C[1])
+        else:
+            outputs = self.X * (C[0])
+        return outputs
+class Clone(RelProp):
+    def forward(self, input, num):
+        self.__setattr__("num", num)
+        outputs = []
+        for _ in range(num):
+            outputs.append(input)
+        return outputs
+    def relprop(self, R, alpha):
+        Z = []
+        for _ in range(self.num):
+            Z.append(self.X)
+        S = [safe_divide(r, z) for r, z in zip(R, Z)]
+        C = self.gradprop(Z, self.X, S)[0]
+        R = self.X * C
+        return R
+class Cat(RelProp):
+    def forward(self, inputs, dim):
+        self.__setattr__("dim", dim)
+        return torch.cat(inputs, dim)
+    def relprop(self, R, alpha):
+        Z = self.forward(self.X, self.dim)
+        S = safe_divide(R, Z)
+        C = self.gradprop(Z, self.X, S)
+        outputs = []
+        for x, c in zip(self.X, C):
+            outputs.append(x * c)
+        return outputs
+class Sequential(nn.Sequential):
+    def relprop(self, R, alpha):
+        for m in reversed(self._modules.values()):
+            R = m.relprop(R, alpha)
+        return R
+class BatchNorm2d(nn.BatchNorm2d, RelProp):
+    def relprop(self, R, alpha):
+        X = self.X
+        beta = 1 - alpha
+        weight = self.weight.unsqueeze(0).unsqueeze(2).unsqueeze(3) / (
+            (
+                self.running_var.unsqueeze(0).unsqueeze(2).unsqueeze(3).pow(2)
+                + self.eps
+            ).pow(0.5)
+        )
+        Z = X * weight + 1e-9
+        S = R / Z
+        Ca = S * weight
+        R = self.X * (Ca)
+        return R
+class Linear(nn.Linear, RelProp):
+    def relprop(self, R, alpha):
+        beta = alpha - 1
+        pw = torch.clamp(self.weight, min=0)
+        nw = torch.clamp(self.weight, max=0)
+        px = torch.clamp(self.X, min=0)
+        nx = torch.clamp(self.X, max=0)
+        def f(w1, w2, x1, x2):
+            Z1 = F.linear(x1, w1)
+            Z2 = F.linear(x2, w2)
+            S1 = safe_divide(R, Z1 + Z2)
+            S2 = safe_divide(R, Z1 + Z2)
+            C1 = x1 * self.gradprop(Z1, x1, S1)[0]
+            C2 = x2 * self.gradprop(Z2, x2, S2)[0]
+            return C1 + C2
+        activator_relevances = f(pw, nw, px, nx)
+        inhibitor_relevances = f(nw, pw, px, nx)
+        R = alpha * activator_relevances - beta * inhibitor_relevances
+        return R
+class Conv2d(nn.Conv2d, RelProp):
+    def gradprop2(self, DY, weight):
+        Z = self.forward(self.X)
+        output_padding = self.X.size()[2] - (
+            (Z.size()[2] - 1) * self.stride[0]
+            - 2 * self.padding[0]
+            + self.kernel_size[0]
+        )
+        return F.conv_transpose2d(
+            DY,
+            weight,
+            stride=self.stride,
+            padding=self.padding,
+            output_padding=output_padding,
+        )
+    def relprop(self, R, alpha):
+        if self.X.shape[1] == 3:
+            pw = torch.clamp(self.weight, min=0)
+            nw = torch.clamp(self.weight, max=0)
+            X = self.X
+            L = (
+                self.X * 0
+                + torch.min(
+                    torch.min(
+                        torch.min(self.X, dim=1, keepdim=True)[0], dim=2, keepdim=True
+                    )[0],
+                    dim=3,
+                    keepdim=True,
+                )[0]
+            )
+            H = (
+                self.X * 0
+                + torch.max(
+                    torch.max(
+                        torch.max(self.X, dim=1, keepdim=True)[0], dim=2, keepdim=True
+                    )[0],
+                    dim=3,
+                    keepdim=True,
+                )[0]
+            )
+            Za = (
+                torch.conv2d(
+                    X, self.weight, bias=None, stride=self.stride, padding=self.padding
+                )
+                - torch.conv2d(
+                    L, pw, bias=None, stride=self.stride, padding=self.padding
+                )
+                - torch.conv2d(
+                    H, nw, bias=None, stride=self.stride, padding=self.padding
+                )
+                + 1e-9
+            )
+            S = R / Za
+            C = (
+                X * self.gradprop2(S, self.weight)
+                - L * self.gradprop2(S, pw)
+                - H * self.gradprop2(S, nw)
+            )
+            R = C
+        else:
+            beta = alpha - 1
+            pw = torch.clamp(self.weight, min=0)
+            nw = torch.clamp(self.weight, max=0)
+            px = torch.clamp(self.X, min=0)
+            nx = torch.clamp(self.X, max=0)
+            def f(w1, w2, x1, x2):
+                Z1 = F.conv2d(
+                    x1, w1, bias=None, stride=self.stride, padding=self.padding
+                )
+                Z2 = F.conv2d(
+                    x2, w2, bias=None, stride=self.stride, padding=self.padding
+                )
+                S1 = safe_divide(R, Z1)
+                S2 = safe_divide(R, Z2)
+                C1 = x1 * self.gradprop(Z1, x1, S1)[0]
+                C2 = x2 * self.gradprop(Z2, x2, S2)[0]
+                return C1 + C2
+            activator_relevances = f(pw, nw, px, nx)
+            inhibitor_relevances = f(nw, pw, px, nx)
+            R = alpha * activator_relevances - beta * inhibitor_relevances
+        return R

Transformer-Explainability/BERT_params/boolq.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+    "embeddings": {
+        "embedding_file": "model_components/glove.6B.200d.txt",
+        "dropout": 0.05
+    },
+    "evidence_identifier": {
+        "mlp_size": 128,
+        "dropout": 0.2,
+        "batch_size": 768,
+        "epochs": 50,
+        "patience": 10,
+        "lr": 1e-3,
+        "sampling_method": "random",
+        "sampling_ratio": 1.0
+    },
+    "evidence_classifier": {
+        "classes": [ "False", "True" ],
+        "mlp_size": 128,
+        "dropout": 0.2,
+        "batch_size": 768,
+        "epochs": 50,
+        "patience": 10,
+        "lr": 1e-3,
+        "sampling_method": "everything"
+    }
+}

Transformer-Explainability/BERT_params/boolq_baas.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+    "start_server": 0,
+    "bert_dir": "model_components/uncased_L-12_H-768_A-12/",
+    "max_length": 512,
+    "pooling_strategy": "CLS_TOKEN",
+    "evidence_identifier": {
+        "batch_size": 64,
+        "epochs": 3,
+        "patience": 10,
+        "lr": 1e-3,
+        "max_grad_norm": 1.0,
+        "sampling_method": "random",
+        "sampling_ratio": 1.0
+    },
+    "evidence_classifier": {
+        "classes": [ "False", "True" ],
+        "batch_size": 64,
+        "epochs": 3,
+        "patience": 10,
+        "lr": 1e-3,
+        "max_grad_norm": 1.0,
+        "sampling_method": "everything"
+    }
+}

Transformer-Explainability/BERT_params/boolq_bert.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "max_length": 512,
+    "bert_vocab": "bert-base-uncased",
+    "bert_dir": "bert-base-uncased",
+    "use_evidence_sentence_identifier": 1,
+    "use_evidence_token_identifier": 0,
+    "evidence_identifier": {
+        "batch_size": 10,
+        "epochs": 10,
+        "patience": 10,
+        "warmup_steps": 50,
+        "lr": 1e-05,
+        "max_grad_norm": 1,
+        "sampling_method": "random",
+        "sampling_ratio": 1,
+        "use_half_precision": 0
+    },
+    "evidence_classifier": {
+        "classes": [
+            "False",
+            "True"
+        ],
+        "batch_size": 10,
+        "warmup_steps": 50,
+        "epochs": 10,
+        "patience": 10,
+        "lr": 1e-05,
+        "max_grad_norm": 1,
+        "sampling_method": "everything",
+        "use_half_precision": 0
+    }
+}

Transformer-Explainability/BERT_params/boolq_soft.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "embeddings": {
+        "embedding_file": "model_components/glove.6B.200d.txt",
+        "dropout": 0.2
+    },
+    "classifier": {
+        "classes": [ "False", "True" ],
+        "has_query": 1,
+        "hidden_size": 32,
+        "mlp_size": 128,
+        "dropout": 0.2,
+        "batch_size": 16,
+        "epochs": 50,
+        "attention_epochs": 50,
+        "patience": 10,
+        "lr": 1e-3,
+        "dropout": 0.2,
+        "k_fraction": 0.07,
+        "threshold": 0.1
+    }
+}

Transformer-Explainability/BERT_params/cose_bert.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+    "max_length": 512,
+    "bert_vocab": "bert-base-uncased",
+    "bert_dir": "bert-base-uncased",
+    "use_evidence_sentence_identifier": 0,
+    "use_evidence_token_identifier": 1,
+    "evidence_token_identifier": {
+        "batch_size": 32,
+        "epochs": 10,
+        "patience": 10,
+        "warmup_steps": 10,
+        "lr": 1e-05,
+        "max_grad_norm": 0.5,
+        "sampling_method": "everything",
+        "use_half_precision": 0,
+        "cose_data_hack": 1
+    },
+    "evidence_classifier": {
+        "classes": [ "false", "true"],
+        "batch_size": 32,
+        "warmup_steps": 10,
+        "epochs": 10,
+        "patience": 10,
+        "lr": 1e-05,
+        "max_grad_norm": 0.5,
+        "sampling_method": "everything",
+        "use_half_precision": 0,
+        "cose_data_hack": 1
+    }
+}

Transformer-Explainability/BERT_params/cose_multiclass.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "max_length": 512,
+  "bert_vocab": "bert-base-uncased",
+  "bert_dir": "bert-base-uncased",
+  "use_evidence_sentence_identifier": 1,
+  "use_evidence_token_identifier": 0,
+  "evidence_identifier": {
+    "batch_size": 32,
+    "epochs": 10,
+    "patience": 10,
+    "warmup_steps": 50,
+    "lr": 1e-05,
+    "max_grad_norm": 1,
+    "sampling_method": "random",
+    "sampling_ratio": 1,
+    "use_half_precision": 0
+  },
+  "evidence_classifier": {
+    "classes": [
+      "A",
+      "B",
+      "C",
+      "D",
+      "E"
+    ],
+    "batch_size": 10,
+    "warmup_steps": 50,
+    "epochs": 10,
+    "patience": 10,
+    "lr": 1e-05,
+    "max_grad_norm": 1,
+    "sampling_method": "everything",
+    "use_half_precision": 0
+  }
+}

Transformer-Explainability/BERT_params/esnli_bert.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "max_length": 512,
+    "bert_vocab": "bert-base-uncased",
+    "bert_dir": "bert-base-uncased",
+    "use_evidence_sentence_identifier": 0,
+    "use_evidence_token_identifier": 1,
+    "evidence_token_identifier": {
+        "batch_size": 32,
+        "epochs": 10,
+        "patience": 10,
+        "warmup_steps": 10,
+        "lr": 1e-05,
+        "max_grad_norm": 1,
+        "sampling_method": "everything",
+        "use_half_precision": 0
+    },
+    "evidence_classifier": {
+        "classes": [ "contradiction", "neutral", "entailment" ],
+        "batch_size": 32,
+        "warmup_steps": 10,
+        "epochs": 10,
+        "patience": 10,
+        "lr": 1e-05,
+        "max_grad_norm": 1,
+        "sampling_method": "everything",
+        "use_half_precision": 0
+    }
+}

Transformer-Explainability/BERT_params/evidence_inference.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+    "embeddings": {
+        "embedding_file": "model_components/PubMed-w2v.bin",
+        "dropout": 0.05
+    },
+    "evidence_identifier": {
+        "mlp_size": 128,
+        "dropout": 0.05,
+        "batch_size": 768,
+        "epochs": 50,
+        "patience": 10,
+        "lr": 1e-3,
+        "sampling_method": "random",
+        "sampling_ratio": 1.0
+    },
+    "evidence_classifier": {
+        "classes": [ "significantly decreased", "no significant difference", "significantly increased" ],
+        "mlp_size": 128,
+        "dropout": 0.05,
+        "batch_size": 768,
+        "epochs": 50,
+        "patience": 10,
+        "lr": 1e-3,
+        "sampling_method": "everything"
+    }
+}

Transformer-Explainability/BERT_params/evidence_inference_bert.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+    "max_length": 512,
+    "bert_vocab": "allenai/scibert_scivocab_uncased",
+    "bert_dir": "allenai/scibert_scivocab_uncased",
+    "use_evidence_sentence_identifier": 1,
+    "use_evidence_token_identifier": 0,
+    "evidence_identifier": {
+        "batch_size": 10,
+        "epochs": 10,
+        "patience": 10,
+        "warmup_steps": 10,
+        "lr": 1e-05,
+        "max_grad_norm": 1,
+        "sampling_method": "random",
+        "use_half_precision": 0,
+        "sampling_ratio": 1
+    },
+    "evidence_classifier": {
+        "classes": [
+            "significantly decreased",
+            "no significant difference",
+            "significantly increased"
+        ],
+        "batch_size": 10,
+        "warmup_steps": 10,
+        "epochs": 10,
+        "patience": 10,
+        "lr": 1e-05,
+        "max_grad_norm": 1,
+        "sampling_method": "everything",
+        "use_half_precision": 0
+    }
+}

Transformer-Explainability/BERT_params/evidence_inference_soft.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+    "embeddings": {
+        "embedding_file": "model_components/PubMed-w2v.bin",
+        "dropout": 0.2
+    },
+    "classifier": {
+        "classes": [ "significantly decreased", "no significant difference", "significantly increased" ],
+        "use_token_selection": 1,
+        "has_query": 1,
+        "hidden_size": 32,
+        "mlp_size": 128,
+        "dropout": 0.2,
+        "batch_size": 16,
+        "epochs": 50,
+        "attention_epochs": 0,
+        "patience": 10,
+        "lr": 1e-3,
+        "dropout": 0.2,
+        "k_fraction": 0.013,
+        "threshold": 0.1
+    }
+}

Transformer-Explainability/BERT_params/fever.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+    "embeddings": {
+        "embedding_file": "model_components/glove.6B.200d.txt",
+        "dropout": 0.05
+    },
+    "evidence_identifier": {
+        "mlp_size": 128,
+        "dropout": 0.05,
+        "batch_size": 768,
+        "epochs": 50,
+        "patience": 10,
+        "lr": 1e-3,
+        "sampling_method": "random",
+        "sampling_ratio": 1.0
+    },
+    "evidence_classifier": {
+        "classes": [ "SUPPORTS", "REFUTES" ],
+        "mlp_size": 128,
+        "dropout": 0.05,
+        "batch_size": 768,
+        "epochs": 50,
+        "patience": 10,
+        "lr": 1e-5,
+        "sampling_method": "everything"
+    }
+}

Transformer-Explainability/BERT_params/fever_baas.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+    "start_server": 0,
+    "bert_dir": "model_components/uncased_L-12_H-768_A-12/",
+    "max_length": 512,
+    "pooling_strategy": "CLS_TOKEN",
+    "evidence_identifier": {
+        "batch_size": 64,
+        "epochs": 3,
+        "patience": 10,
+        "lr": 1e-3,
+        "max_grad_norm": 1.0,
+        "sampling_method": "random",
+        "sampling_ratio": 1.0
+    },
+    "evidence_classifier": {
+        "classes": [ "SUPPORTS", "REFUTES" ],
+        "batch_size": 64,
+        "epochs": 3,
+        "patience": 10,
+        "lr": 1e-3,
+        "max_grad_norm": 1.0,
+        "sampling_method": "everything"
+    }
+}

Transformer-Explainability/BERT_params/fever_bert.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "max_length": 512,
+    "bert_vocab": "bert-base-uncased",
+    "bert_dir": "bert-base-uncased",
+    "use_evidence_sentence_identifier": 1,
+    "use_evidence_token_identifier": 0,
+    "evidence_identifier": {
+        "batch_size": 16,
+        "epochs": 10,
+        "patience": 10,
+        "warmup_steps": 10,
+        "lr": 1e-05,
+        "max_grad_norm": 1.0,
+        "sampling_method": "random",
+        "sampling_ratio": 1.0,
+        "use_half_precision": 0
+    },
+    "evidence_classifier": {
+        "classes": [
+            "SUPPORTS",
+            "REFUTES"
+        ],
+        "batch_size": 10,
+        "warmup_steps": 10,
+        "epochs": 10,
+        "patience": 10,
+        "lr": 1e-05,
+        "max_grad_norm": 1.0,
+        "sampling_method": "everything",
+        "use_half_precision": 0
+    }
+}

Transformer-Explainability/BERT_params/fever_soft.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "embeddings": {
+        "embedding_file": "model_components/glove.6B.200d.txt",
+        "dropout": 0.2
+    },
+    "classifier": {
+        "classes": [ "SUPPORTS", "REFUTES" ],
+        "has_query": 1,
+        "hidden_size": 32,
+        "mlp_size": 128,
+        "dropout": 0.2,
+        "batch_size": 128,
+        "epochs": 50,
+        "attention_epochs": 50,
+        "patience": 10,
+        "lr": 1e-3,
+        "dropout": 0.2,
+        "k_fraction": 0.07,
+        "threshold": 0.1
+    }
+}

Transformer-Explainability/BERT_params/movies.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+    "embeddings": {
+        "embedding_file": "model_components/glove.6B.200d.txt",
+        "dropout": 0.05
+    },
+    "evidence_identifier": {
+        "mlp_size": 128,
+        "dropout": 0.05,
+        "batch_size": 768,
+        "epochs": 50,
+        "patience": 10,
+        "lr": 1e-4,
+        "sampling_method": "random",
+        "sampling_ratio": 1.0
+    },
+    "evidence_classifier": {
+        "classes": [ "NEG", "POS" ],
+        "mlp_size": 128,
+        "dropout": 0.05,
+        "batch_size": 768,
+        "epochs": 50,
+        "patience": 10,
+        "lr": 1e-3,
+        "sampling_method": "everything"
+    }
+}

Transformer-Explainability/BERT_params/movies_baas.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+    "start_server": 0,
+    "bert_dir": "model_components/uncased_L-12_H-768_A-12/",
+    "max_length": 512,
+    "pooling_strategy": "CLS_TOKEN",
+    "evidence_identifier": {
+        "batch_size": 64,
+        "epochs": 3,
+        "patience": 10,
+        "lr": 1e-3,
+        "max_grad_norm": 1.0,
+        "sampling_method": "random",
+        "sampling_ratio": 1.0
+    },
+    "evidence_classifier": {
+        "classes": [ "NEG", "POS" ],
+        "batch_size": 64,
+        "epochs": 3,
+        "patience": 10,
+        "lr": 1e-3,
+        "max_grad_norm": 1.0,
+        "sampling_method": "everything"
+    }
+}

Transformer-Explainability/BERT_params/movies_bert.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "max_length": 512,
+    "bert_vocab": "bert-base-uncased",
+    "bert_dir": "bert-base-uncased",
+    "use_evidence_sentence_identifier": 1,
+    "use_evidence_token_identifier": 0,
+    "evidence_identifier": {
+        "batch_size": 16,
+        "epochs": 10,
+        "patience": 10,
+        "warmup_steps": 50,
+        "lr": 1e-05,
+        "max_grad_norm": 1,
+        "sampling_method": "random",
+        "sampling_ratio": 1,
+        "use_half_precision": 0
+    },
+    "evidence_classifier": {
+        "classes": [
+            "NEG",
+            "POS"
+        ],
+        "batch_size": 10,
+        "warmup_steps": 50,
+        "epochs": 10,
+        "patience": 10,
+        "lr": 1e-05,
+        "max_grad_norm": 1,
+        "sampling_method": "everything",
+        "use_half_precision": 0
+    }
+}

Transformer-Explainability/BERT_params/movies_soft.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "embeddings": {
+        "embedding_file": "model_components/glove.6B.200d.txt",
+        "dropout": 0.2
+    },
+    "classifier": {
+        "classes": [ "NEG", "POS" ],
+        "has_query": 0,
+        "hidden_size": 32,
+        "mlp_size": 128,
+        "dropout": 0.2,
+        "batch_size": 16,
+        "epochs": 50,
+        "attention_epochs": 50,
+        "patience": 10,
+        "lr": 1e-3,
+        "dropout": 0.2,
+        "k_fraction": 0.07,
+        "threshold": 0.1
+    }
+}

Transformer-Explainability/BERT_params/multirc.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+    "embeddings": {
+        "embedding_file": "model_components/glove.6B.200d.txt",
+        "dropout": 0.05
+    },
+    "evidence_identifier": {
+        "mlp_size": 128,
+        "dropout": 0.05,
+        "batch_size": 768,
+        "epochs": 50,
+        "patience": 10,
+        "lr": 1e-3,
+        "sampling_method": "random",
+        "sampling_ratio": 1.0
+    },
+    "evidence_classifier": {
+        "classes": [ "False", "True" ],
+        "mlp_size": 128,
+        "dropout": 0.05,
+        "batch_size": 768,
+        "epochs": 50,
+        "patience": 10,
+        "lr": 1e-3,
+        "sampling_method": "everything"
+    }
+}

Transformer-Explainability/BERT_params/multirc_baas.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+    "start_server": 0,
+    "bert_dir": "model_components/uncased_L-12_H-768_A-12/",
+    "max_length": 512,
+    "pooling_strategy": "CLS_TOKEN",
+    "evidence_identifier": {
+        "batch_size": 64,
+        "epochs": 3,
+        "patience": 10,
+        "lr": 1e-3,
+        "max_grad_norm": 1.0,
+        "sampling_method": "random",
+        "sampling_ratio": 1.0
+    },
+    "evidence_classifier": {
+        "classes": [ "False", "True" ],
+        "batch_size": 64,
+        "epochs": 3,
+        "patience": 10,
+        "lr": 1e-3,
+        "max_grad_norm": 1.0,
+        "sampling_method": "everything"
+    }
+}

Transformer-Explainability/BERT_params/multirc_bert.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "max_length": 512,
+    "bert_vocab": "bert-base-uncased",
+    "bert_dir": "bert-base-uncased",
+    "use_evidence_sentence_identifier": 1,
+    "use_evidence_token_identifier": 0,
+    "evidence_identifier": {
+        "batch_size": 32,
+        "epochs": 10,
+        "patience": 10,
+        "warmup_steps": 50,
+        "lr": 1e-05,
+        "max_grad_norm": 1,
+        "sampling_method": "random",
+        "sampling_ratio": 1,
+        "use_half_precision": 0
+    },
+    "evidence_classifier": {
+        "classes": [
+            "False",
+            "True"
+        ],
+        "batch_size": 32,
+        "warmup_steps": 50,
+        "epochs": 10,
+        "patience": 10,
+        "lr": 1e-05,
+        "max_grad_norm": 1,
+        "sampling_method": "everything",
+        "use_half_precision": 0
+    }
+}

Transformer-Explainability/BERT_params/multirc_soft.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "embeddings": {
+        "embedding_file": "model_components/glove.6B.200d.txt",
+        "dropout": 0.2
+    },
+    "classifier": {
+        "classes": [ "False", "True" ],
+        "has_query": 1,
+        "hidden_size": 32,
+        "mlp_size": 128,
+        "dropout": 0.2,
+        "batch_size": 16,
+        "epochs": 50,
+        "attention_epochs": 50,
+        "patience": 10,
+        "lr": 1e-3,
+        "dropout": 0.2,
+        "k_fraction": 0.07,
+        "threshold": 0.1
+    }
+}

Transformer-Explainability/BERT_rationale_benchmark/__init__.py ADDED Viewed

File without changes

Transformer-Explainability/BERT_rationale_benchmark/metrics.py ADDED Viewed

	@@ -0,0 +1,1007 @@

+import argparse
+import json
+import logging
+import os
+import pprint
+from collections import Counter, defaultdict, namedtuple
+from dataclasses import dataclass
+from itertools import chain
+from typing import Any, Callable, Dict, List, Set, Tuple
+import numpy as np
+import torch
+from BERT_rationale_benchmark.utils import (Annotation, Evidence,
+                                            annotations_from_jsonl,
+                                            load_documents,
+                                            load_flattened_documents,
+                                            load_jsonl)
+from scipy.stats import entropy
+from sklearn.metrics import (accuracy_score, auc, average_precision_score,
+                             classification_report, precision_recall_curve,
+                             roc_auc_score)
+logging.basicConfig(
+    level=logging.DEBUG, format="%(relativeCreated)6d %(threadName)s %(message)s"
+)
+# start_token is inclusive, end_token is exclusive
+@dataclass(eq=True, frozen=True)
+class Rationale:
+    ann_id: str
+    docid: str
+    start_token: int
+    end_token: int
+    def to_token_level(self) -> List["Rationale"]:
+        ret = []
+        for t in range(self.start_token, self.end_token):
+            ret.append(Rationale(self.ann_id, self.docid, t, t + 1))
+        return ret
+    @classmethod
+    def from_annotation(cls, ann: Annotation) -> List["Rationale"]:
+        ret = []
+        for ev_group in ann.evidences:
+            for ev in ev_group:
+                ret.append(
+                    Rationale(ann.annotation_id, ev.docid, ev.start_token, ev.end_token)
+                )
+        return ret
+    @classmethod
+    def from_instance(cls, inst: dict) -> List["Rationale"]:
+        ret = []
+        for rat in inst["rationales"]:
+            for pred in rat.get("hard_rationale_predictions", []):
+                ret.append(
+                    Rationale(
+                        inst["annotation_id"],
+                        rat["docid"],
+                        pred["start_token"],
+                        pred["end_token"],
+                    )
+                )
+        return ret
+@dataclass(eq=True, frozen=True)
+class PositionScoredDocument:
+    ann_id: str
+    docid: str
+    scores: Tuple[float]
+    truths: Tuple[bool]
+    @classmethod
+    def from_results(
+        cls,
+        instances: List[dict],
+        annotations: List[Annotation],
+        docs: Dict[str, List[Any]],
+        use_tokens: bool = True,
+    ) -> List["PositionScoredDocument"]:
+        """Creates a paired list of annotation ids/docids/predictions/truth values"""
+        key_to_annotation = dict()
+        for ann in annotations:
+            for ev in chain.from_iterable(ann.evidences):
+                key = (ann.annotation_id, ev.docid)
+                if key not in key_to_annotation:
+                    key_to_annotation[key] = [False for _ in docs[ev.docid]]
+                if use_tokens:
+                    start, end = ev.start_token, ev.end_token
+                else:
+                    start, end = ev.start_sentence, ev.end_sentence
+                for t in range(start, end):
+                    key_to_annotation[key][t] = True
+        ret = []
+        if use_tokens:
+            field = "soft_rationale_predictions"
+        else:
+            field = "soft_sentence_predictions"
+        for inst in instances:
+            for rat in inst["rationales"]:
+                docid = rat["docid"]
+                scores = rat[field]
+                key = (inst["annotation_id"], docid)
+                assert len(scores) == len(docs[docid])
+                if key in key_to_annotation:
+                    assert len(scores) == len(key_to_annotation[key])
+                else:
+                    # In case model makes a prediction on docuemnt(s) for which ground truth evidence is not present
+                    key_to_annotation[key] = [False for _ in docs[docid]]
+                ret.append(
+                    PositionScoredDocument(
+                        inst["annotation_id"],
+                        docid,
+                        tuple(scores),
+                        tuple(key_to_annotation[key]),
+                    )
+                )
+        return ret
+def _f1(_p, _r):
+    if _p == 0 or _r == 0:
+        return 0
+    return 2 * _p * _r / (_p + _r)
+def _keyed_rationale_from_list(
+    rats: List[Rationale],
+) -> Dict[Tuple[str, str], Rationale]:
+    ret = defaultdict(set)
+    for r in rats:
+        ret[(r.ann_id, r.docid)].add(r)
+    return ret
+def partial_match_score(
+    truth: List[Rationale], pred: List[Rationale], thresholds: List[float]
+) -> List[Dict[str, Any]]:
+    """Computes a partial match F1
+    Computes an instance-level (annotation) micro- and macro-averaged F1 score.
+    True Positives are computed by using intersection-over-union and
+    thresholding the resulting intersection-over-union fraction.
+    Micro-average results are computed by ignoring instance level distinctions
+    in the TP calculation (and recall, and precision, and finally the F1 of
+    those numbers). Macro-average results are computed first by measuring
+    instance (annotation + document) precisions and recalls, averaging those,
+    and finally computing an F1 of the resulting average.
+    """
+    ann_to_rat = _keyed_rationale_from_list(truth)
+    pred_to_rat = _keyed_rationale_from_list(pred)
+    num_classifications = {k: len(v) for k, v in pred_to_rat.items()}
+    num_truth = {k: len(v) for k, v in ann_to_rat.items()}
+    ious = defaultdict(dict)
+    for k in set(ann_to_rat.keys()) | set(pred_to_rat.keys()):
+        for p in pred_to_rat.get(k, []):
+            best_iou = 0.0
+            for t in ann_to_rat.get(k, []):
+                num = len(
+                    set(range(p.start_token, p.end_token))
+                    & set(range(t.start_token, t.end_token))
+                )
+                denom = len(
+                    set(range(p.start_token, p.end_token))
+                    | set(range(t.start_token, t.end_token))
+                )
+                iou = 0 if denom == 0 else num / denom
+                if iou > best_iou:
+                    best_iou = iou
+            ious[k][p] = best_iou
+    scores = []
+    for threshold in thresholds:
+        threshold_tps = dict()
+        for k, vs in ious.items():
+            threshold_tps[k] = sum(int(x >= threshold) for x in vs.values())
+        micro_r = (
+            sum(threshold_tps.values()) / sum(num_truth.values())
+            if sum(num_truth.values()) > 0
+            else 0
+        )
+        micro_p = (
+            sum(threshold_tps.values()) / sum(num_classifications.values())
+            if sum(num_classifications.values()) > 0
+            else 0
+        )
+        micro_f1 = _f1(micro_r, micro_p)
+        macro_rs = list(
+            threshold_tps.get(k, 0.0) / n if n > 0 else 0 for k, n in num_truth.items()
+        )
+        macro_ps = list(
+            threshold_tps.get(k, 0.0) / n if n > 0 else 0
+            for k, n in num_classifications.items()
+        )
+        macro_r = sum(macro_rs) / len(macro_rs) if len(macro_rs) > 0 else 0
+        macro_p = sum(macro_ps) / len(macro_ps) if len(macro_ps) > 0 else 0
+        macro_f1 = _f1(macro_r, macro_p)
+        scores.append(
+            {
+                "threshold": threshold,
+                "micro": {"p": micro_p, "r": micro_r, "f1": micro_f1},
+                "macro": {"p": macro_p, "r": macro_r, "f1": macro_f1},
+            }
+        )
+    return scores
+def score_hard_rationale_predictions(
+    truth: List[Rationale], pred: List[Rationale]
+) -> Dict[str, Dict[str, float]]:
+    """Computes instance (annotation)-level micro/macro averaged F1s"""
+    scores = dict()
+    truth = set(truth)
+    pred = set(pred)
+    micro_prec = len(truth & pred) / len(pred)
+    micro_rec = len(truth & pred) / len(truth)
+    micro_f1 = _f1(micro_prec, micro_rec)
+    scores["instance_micro"] = {
+        "p": micro_prec,
+        "r": micro_rec,
+        "f1": micro_f1,
+    }
+    ann_to_rat = _keyed_rationale_from_list(truth)
+    pred_to_rat = _keyed_rationale_from_list(pred)
+    instances_to_scores = dict()
+    for k in set(ann_to_rat.keys()) | (pred_to_rat.keys()):
+        if len(pred_to_rat.get(k, set())) > 0:
+            instance_prec = len(
+                ann_to_rat.get(k, set()) & pred_to_rat.get(k, set())
+            ) / len(pred_to_rat[k])
+        else:
+            instance_prec = 0
+        if len(ann_to_rat.get(k, set())) > 0:
+            instance_rec = len(
+                ann_to_rat.get(k, set()) & pred_to_rat.get(k, set())
+            ) / len(ann_to_rat[k])
+        else:
+            instance_rec = 0
+        instance_f1 = _f1(instance_prec, instance_rec)
+        instances_to_scores[k] = {
+            "p": instance_prec,
+            "r": instance_rec,
+            "f1": instance_f1,
+        }
+    # these are calculated as sklearn would
+    macro_prec = sum(instance["p"] for instance in instances_to_scores.values()) / len(
+        instances_to_scores
+    )
+    macro_rec = sum(instance["r"] for instance in instances_to_scores.values()) / len(
+        instances_to_scores
+    )
+    macro_f1 = sum(instance["f1"] for instance in instances_to_scores.values()) / len(
+        instances_to_scores
+    )
+    f1_scores = [instance["f1"] for instance in instances_to_scores.values()]
+    print(macro_f1, np.argsort(f1_scores)[::-1])
+    scores["instance_macro"] = {
+        "p": macro_prec,
+        "r": macro_rec,
+        "f1": macro_f1,
+    }
+    return scores
+def _auprc(truth: Dict[Any, List[bool]], preds: Dict[Any, List[float]]) -> float:
+    if len(preds) == 0:
+        return 0.0
+    assert len(truth.keys() and preds.keys()) == len(truth.keys())
+    aucs = []
+    for k, true in truth.items():
+        pred = preds[k]
+        true = [int(t) for t in true]
+        precision, recall, _ = precision_recall_curve(true, pred)
+        aucs.append(auc(recall, precision))
+    return np.average(aucs)
+def _score_aggregator(
+    truth: Dict[Any, List[bool]],
+    preds: Dict[Any, List[float]],
+    score_function: Callable[[List[float], List[float]], float],
+    discard_single_class_answers: bool,
+) -> float:
+    if len(preds) == 0:
+        return 0.0
+    assert len(truth.keys() and preds.keys()) == len(truth.keys())
+    scores = []
+    for k, true in truth.items():
+        pred = preds[k]
+        if (all(true) or all(not x for x in true)) and discard_single_class_answers:
+            continue
+        true = [int(t) for t in true]
+        scores.append(score_function(true, pred))
+    return np.average(scores)
+def score_soft_tokens(paired_scores: List[PositionScoredDocument]) -> Dict[str, float]:
+    truth = {(ps.ann_id, ps.docid): ps.truths for ps in paired_scores}
+    pred = {(ps.ann_id, ps.docid): ps.scores for ps in paired_scores}
+    auprc_score = _auprc(truth, pred)
+    ap = _score_aggregator(truth, pred, average_precision_score, True)
+    roc_auc = _score_aggregator(truth, pred, roc_auc_score, True)
+    return {
+        "auprc": auprc_score,
+        "average_precision": ap,
+        "roc_auc_score": roc_auc,
+    }
+def _instances_aopc(
+    instances: List[dict], thresholds: List[float], key: str
+) -> Tuple[float, List[float]]:
+    dataset_scores = []
+    for inst in instances:
+        kls = inst["classification"]
+        beta_0 = inst["classification_scores"][kls]
+        instance_scores = []
+        for score in filter(
+            lambda x: x["threshold"] in thresholds,
+            sorted(inst["thresholded_scores"], key=lambda x: x["threshold"]),
+        ):
+            beta_k = score[key][kls]
+            delta = beta_0 - beta_k
+            instance_scores.append(delta)
+        assert len(instance_scores) == len(thresholds)
+        dataset_scores.append(instance_scores)
+    dataset_scores = np.array(dataset_scores)
+    # a careful reading of Samek, et al. "Evaluating the Visualization of What a Deep Neural Network Has Learned"
+    # and some algebra will show the reader that we can average in any of several ways and get the same result:
+    # over a flattened array, within an instance and then between instances, or over instances (by position) an
+    # then across them.
+    final_score = np.average(dataset_scores)
+    position_scores = np.average(dataset_scores, axis=0).tolist()
+    return final_score, position_scores
+def compute_aopc_scores(instances: List[dict], aopc_thresholds: List[float]):
+    if aopc_thresholds is None:
+        aopc_thresholds = sorted(
+            set(
+                chain.from_iterable(
+                    [x["threshold"] for x in y["thresholded_scores"]] for y in instances
+                )
+            )
+        )
+    aopc_comprehensiveness_score, aopc_comprehensiveness_points = _instances_aopc(
+        instances, aopc_thresholds, "comprehensiveness_classification_scores"
+    )
+    aopc_sufficiency_score, aopc_sufficiency_points = _instances_aopc(
+        instances, aopc_thresholds, "sufficiency_classification_scores"
+    )
+    return (
+        aopc_thresholds,
+        aopc_comprehensiveness_score,
+        aopc_comprehensiveness_points,
+        aopc_sufficiency_score,
+        aopc_sufficiency_points,
+    )
+def score_classifications(
+    instances: List[dict],
+    annotations: List[Annotation],
+    docs: Dict[str, List[str]],
+    aopc_thresholds: List[float],
+) -> Dict[str, float]:
+    def compute_kl(cls_scores_, faith_scores_):
+        keys = list(cls_scores_.keys())
+        cls_scores_ = [cls_scores_[k] for k in keys]
+        faith_scores_ = [faith_scores_[k] for k in keys]
+        return entropy(faith_scores_, cls_scores_)
+    labels = list(set(x.classification for x in annotations))
+    label_to_int = {l: i for i, l in enumerate(labels)}
+    key_to_instances = {inst["annotation_id"]: inst for inst in instances}
+    truth = []
+    predicted = []
+    for ann in annotations:
+        truth.append(label_to_int[ann.classification])
+        inst = key_to_instances[ann.annotation_id]
+        predicted.append(label_to_int[inst["classification"]])
+    classification_scores = classification_report(
+        truth, predicted, output_dict=True, target_names=labels, digits=3
+    )
+    accuracy = accuracy_score(truth, predicted)
+    if "comprehensiveness_classification_scores" in instances[0]:
+        comprehensiveness_scores = [
+            x["classification_scores"][x["classification"]]
+            - x["comprehensiveness_classification_scores"][x["classification"]]
+            for x in instances
+        ]
+        comprehensiveness_score = np.average(comprehensiveness_scores)
+    else:
+        comprehensiveness_score = None
+        comprehensiveness_scores = None
+    if "sufficiency_classification_scores" in instances[0]:
+        sufficiency_scores = [
+            x["classification_scores"][x["classification"]]
+            - x["sufficiency_classification_scores"][x["classification"]]
+            for x in instances
+        ]
+        sufficiency_score = np.average(sufficiency_scores)
+    else:
+        sufficiency_score = None
+        sufficiency_scores = None
+    if "comprehensiveness_classification_scores" in instances[0]:
+        comprehensiveness_entropies = [
+            entropy(list(x["classification_scores"].values()))
+            - entropy(list(x["comprehensiveness_classification_scores"].values()))
+            for x in instances
+        ]
+        comprehensiveness_entropy = np.average(comprehensiveness_entropies)
+        comprehensiveness_kl = np.average(
+            list(
+                compute_kl(
+                    x["classification_scores"],
+                    x["comprehensiveness_classification_scores"],
+                )
+                for x in instances
+            )
+        )
+    else:
+        comprehensiveness_entropies = None
+        comprehensiveness_kl = None
+        comprehensiveness_entropy = None
+    if "sufficiency_classification_scores" in instances[0]:
+        sufficiency_entropies = [
+            entropy(list(x["classification_scores"].values()))
+            - entropy(list(x["sufficiency_classification_scores"].values()))
+            for x in instances
+        ]
+        sufficiency_entropy = np.average(sufficiency_entropies)
+        sufficiency_kl = np.average(
+            list(
+                compute_kl(
+                    x["classification_scores"], x["sufficiency_classification_scores"]
+                )
+                for x in instances
+            )
+        )
+    else:
+        sufficiency_entropies = None
+        sufficiency_kl = None
+        sufficiency_entropy = None
+    if "thresholded_scores" in instances[0]:
+        (
+            aopc_thresholds,
+            aopc_comprehensiveness_score,
+            aopc_comprehensiveness_points,
+            aopc_sufficiency_score,
+            aopc_sufficiency_points,
+        ) = compute_aopc_scores(instances, aopc_thresholds)
+    else:
+        (
+            aopc_thresholds,
+            aopc_comprehensiveness_score,
+            aopc_comprehensiveness_points,
+            aopc_sufficiency_score,
+            aopc_sufficiency_points,
+        ) = (None, None, None, None, None)
+    if "tokens_to_flip" in instances[0]:
+        token_percentages = []
+        for ann in annotations:
+            # in practice, this is of size 1 for everything except e-snli
+            docids = set(ev.docid for ev in chain.from_iterable(ann.evidences))
+            inst = key_to_instances[ann.annotation_id]
+            tokens = inst["tokens_to_flip"]
+            doc_lengths = sum(len(docs[d]) for d in docids)
+            token_percentages.append(tokens / doc_lengths)
+        token_percentages = np.average(token_percentages)
+    else:
+        token_percentages = None
+    return {
+        "accuracy": accuracy,
+        "prf": classification_scores,
+        "comprehensiveness": comprehensiveness_score,
+        "sufficiency": sufficiency_score,
+        "comprehensiveness_entropy": comprehensiveness_entropy,
+        "comprehensiveness_kl": comprehensiveness_kl,
+        "sufficiency_entropy": sufficiency_entropy,
+        "sufficiency_kl": sufficiency_kl,
+        "aopc_thresholds": aopc_thresholds,
+        "comprehensiveness_aopc": aopc_comprehensiveness_score,
+        "comprehensiveness_aopc_points": aopc_comprehensiveness_points,
+        "sufficiency_aopc": aopc_sufficiency_score,
+        "sufficiency_aopc_points": aopc_sufficiency_points,
+    }
+def verify_instance(instance: dict, docs: Dict[str, list], thresholds: Set[float]):
+    error = False
+    docids = []
+    # verify the internal structure of these instances is correct:
+    # * hard predictions are present
+    # * start and end tokens are valid
+    # * soft rationale predictions, if present, must have the same document length
+    for rat in instance["rationales"]:
+        docid = rat["docid"]
+        if docid not in docid:
+            error = True
+            logging.info(
+                f'Error! For instance annotation={instance["annotation_id"]}, docid={docid} could not be found as a preprocessed document! Gave up on additional processing.'
+            )
+            continue
+        doc_length = len(docs[docid])
+        for h1 in rat.get("hard_rationale_predictions", []):
+            # verify that each token is valid
+            # verify that no annotations overlap
+            for h2 in rat.get("hard_rationale_predictions", []):
+                if h1 == h2:
+                    continue
+                if (
+                    len(
+                        set(range(h1["start_token"], h1["end_token"]))
+                        & set(range(h2["start_token"], h2["end_token"]))
+                    )
+                    > 0
+                ):
+                    logging.info(
+                        f'Error! For instance annotation={instance["annotation_id"]}, docid={docid} {h1} and {h2} overlap!'
+                    )
+                    error = True
+            if h1["start_token"] > doc_length:
+                logging.info(
+                    f'Error! For instance annotation={instance["annotation_id"]}, docid={docid} received an impossible tokenspan: {h1} for a document of length {doc_length}'
+                )
+                error = True
+            if h1["end_token"] > doc_length:
+                logging.info(
+                    f'Error! For instance annotation={instance["annotation_id"]}, docid={docid} received an impossible tokenspan: {h1} for a document of length {doc_length}'
+                )
+                error = True
+        # length check for soft rationale
+        # note that either flattened_documents or sentence-broken documents must be passed in depending on result
+        soft_rationale_predictions = rat.get("soft_rationale_predictions", [])
+        if (
+            len(soft_rationale_predictions) > 0
+            and len(soft_rationale_predictions) != doc_length
+        ):
+            logging.info(
+                f'Error! For instance annotation={instance["annotation_id"]}, docid={docid} expected classifications for {doc_length} tokens but have them for {len(soft_rationale_predictions)} tokens instead!'
+            )
+            error = True
+    # count that one appears per-document
+    docids = Counter(docids)
+    for docid, count in docids.items():
+        if count > 1:
+            error = True
+            logging.info(
+                'Error! For instance annotation={instance["annotation_id"]}, docid={docid} appear {count} times, may only appear once!'
+            )
+    classification = instance.get("classification", "")
+    if not isinstance(classification, str):
+        logging.info(
+            f'Error! For instance annotation={instance["annotation_id"]}, classification field {classification} is not a string!'
+        )
+        error = True
+    classification_scores = instance.get("classification_scores", dict())
+    if not isinstance(classification_scores, dict):
+        logging.info(
+            f'Error! For instance annotation={instance["annotation_id"]}, classification_scores field {classification_scores} is not a dict!'
+        )
+        error = True
+    comprehensiveness_classification_scores = instance.get(
+        "comprehensiveness_classification_scores", dict()
+    )
+    if not isinstance(comprehensiveness_classification_scores, dict):
+        logging.info(
+            f'Error! For instance annotation={instance["annotation_id"]}, comprehensiveness_classification_scores field {comprehensiveness_classification_scores} is not a dict!'
+        )
+        error = True
+    sufficiency_classification_scores = instance.get(
+        "sufficiency_classification_scores", dict()
+    )
+    if not isinstance(sufficiency_classification_scores, dict):
+        logging.info(
+            f'Error! For instance annotation={instance["annotation_id"]}, sufficiency_classification_scores field {sufficiency_classification_scores} is not a dict!'
+        )
+        error = True
+    if ("classification" in instance) != ("classification_scores" in instance):
+        logging.info(
+            f'Error! For instance annotation={instance["annotation_id"]}, when providing a classification, you must also provide classification scores!'
+        )
+        error = True
+    if ("comprehensiveness_classification_scores" in instance) and not (
+        "classification" in instance
+    ):
+        logging.info(
+            f'Error! For instance annotation={instance["annotation_id"]}, when providing a classification, you must also provide a comprehensiveness_classification_score'
+        )
+        error = True
+    if ("sufficiency_classification_scores" in instance) and not (
+        "classification_scores" in instance
+    ):
+        logging.info(
+            f'Error! For instance annotation={instance["annotation_id"]}, when providing a sufficiency_classification_score, you must also provide a classification score!'
+        )
+        error = True
+    if "thresholded_scores" in instance:
+        instance_thresholds = set(
+            x["threshold"] for x in instance["thresholded_scores"]
+        )
+        if instance_thresholds != thresholds:
+            error = True
+            logging.info(
+                'Error: {instance["thresholded_scores"]} has thresholds that differ from previous thresholds: {thresholds}'
+            )
+        if (
+            "comprehensiveness_classification_scores" not in instance
+            or "sufficiency_classification_scores" not in instance
+            or "classification" not in instance
+            or "classification_scores" not in instance
+        ):
+            error = True
+            logging.info(
+                "Error: {instance} must have comprehensiveness_classification_scores, sufficiency_classification_scores, classification, and classification_scores defined when including thresholded scores"
+            )
+        if not all(
+            "sufficiency_classification_scores" in x
+            for x in instance["thresholded_scores"]
+        ):
+            error = True
+            logging.info(
+                "Error: {instance} must have sufficiency_classification_scores for every threshold"
+            )
+        if not all(
+            "comprehensiveness_classification_scores" in x
+            for x in instance["thresholded_scores"]
+        ):
+            error = True
+            logging.info(
+                "Error: {instance} must have comprehensiveness_classification_scores for every threshold"
+            )
+    return error
+def verify_instances(instances: List[dict], docs: Dict[str, list]):
+    annotation_ids = list(x["annotation_id"] for x in instances)
+    key_counter = Counter(annotation_ids)
+    multi_occurrence_annotation_ids = list(
+        filter(lambda kv: kv[1] > 1, key_counter.items())
+    )
+    error = False
+    if len(multi_occurrence_annotation_ids) > 0:
+        error = True
+        logging.info(
+            f"Error in instances: {len(multi_occurrence_annotation_ids)} appear multiple times in the annotations file: {multi_occurrence_annotation_ids}"
+        )
+    failed_validation = set()
+    instances_with_classification = list()
+    instances_with_soft_rationale_predictions = list()
+    instances_with_soft_sentence_predictions = list()
+    instances_with_comprehensiveness_classifications = list()
+    instances_with_sufficiency_classifications = list()
+    instances_with_thresholded_scores = list()
+    if "thresholded_scores" in instances[0]:
+        thresholds = set(x["threshold"] for x in instances[0]["thresholded_scores"])
+    else:
+        thresholds = None
+    for instance in instances:
+        instance_error = verify_instance(instance, docs, thresholds)
+        if instance_error:
+            error = True
+            failed_validation.add(instance["annotation_id"])
+        if instance.get("classification", None) != None:
+            instances_with_classification.append(instance)
+        if instance.get("comprehensiveness_classification_scores", None) != None:
+            instances_with_comprehensiveness_classifications.append(instance)
+        if instance.get("sufficiency_classification_scores", None) != None:
+            instances_with_sufficiency_classifications.append(instance)
+        has_soft_rationales = []
+        has_soft_sentences = []
+        for rat in instance["rationales"]:
+            if rat.get("soft_rationale_predictions", None) != None:
+                has_soft_rationales.append(rat)
+            if rat.get("soft_sentence_predictions", None) != None:
+                has_soft_sentences.append(rat)
+        if len(has_soft_rationales) > 0:
+            instances_with_soft_rationale_predictions.append(instance)
+            if len(has_soft_rationales) != len(instance["rationales"]):
+                error = True
+                logging.info(
+                    f'Error: instance {instance["annotation"]} has soft rationales for some but not all reported documents!'
+                )
+        if len(has_soft_sentences) > 0:
+            instances_with_soft_sentence_predictions.append(instance)
+            if len(has_soft_sentences) != len(instance["rationales"]):
+                error = True
+                logging.info(
+                    f'Error: instance {instance["annotation"]} has soft sentences for some but not all reported documents!'
+                )
+        if "thresholded_scores" in instance:
+            instances_with_thresholded_scores.append(instance)
+    logging.info(
+        f"Error in instances: {len(failed_validation)} instances fail validation: {failed_validation}"
+    )
+    if len(instances_with_classification) != 0 and len(
+        instances_with_classification
+    ) != len(instances):
+        logging.info(
+            f"Either all {len(instances)} must have a classification or none may, instead {len(instances_with_classification)} do!"
+        )
+        error = True
+    if len(instances_with_soft_sentence_predictions) != 0 and len(
+        instances_with_soft_sentence_predictions
+    ) != len(instances):
+        logging.info(
+            f"Either all {len(instances)} must have a sentence prediction or none may, instead {len(instances_with_soft_sentence_predictions)} do!"
+        )
+        error = True
+    if len(instances_with_soft_rationale_predictions) != 0 and len(
+        instances_with_soft_rationale_predictions
+    ) != len(instances):
+        logging.info(
+            f"Either all {len(instances)} must have a soft rationale prediction or none may, instead {len(instances_with_soft_rationale_predictions)} do!"
+        )
+        error = True
+    if len(instances_with_comprehensiveness_classifications) != 0 and len(
+        instances_with_comprehensiveness_classifications
+    ) != len(instances):
+        error = True
+        logging.info(
+            f"Either all {len(instances)} must have a comprehensiveness classification or none may, instead {len(instances_with_comprehensiveness_classifications)} do!"
+        )
+    if len(instances_with_sufficiency_classifications) != 0 and len(
+        instances_with_sufficiency_classifications
+    ) != len(instances):
+        error = True
+        logging.info(
+            f"Either all {len(instances)} must have a sufficiency classification or none may, instead {len(instances_with_sufficiency_classifications)} do!"
+        )
+    if len(instances_with_thresholded_scores) != 0 and len(
+        instances_with_thresholded_scores
+    ) != len(instances):
+        error = True
+        logging.info(
+            f"Either all {len(instances)} must have thresholded scores or none may, instead {len(instances_with_thresholded_scores)} do!"
+        )
+    if error:
+        raise ValueError(
+            "Some instances are invalid, please fix your formatting and try again"
+        )
+def _has_hard_predictions(results: List[dict]) -> bool:
+    # assumes that we have run "verification" over the inputs
+    return (
+        "rationales" in results[0]
+        and len(results[0]["rationales"]) > 0
+        and "hard_rationale_predictions" in results[0]["rationales"][0]
+        and results[0]["rationales"][0]["hard_rationale_predictions"] is not None
+        and len(results[0]["rationales"][0]["hard_rationale_predictions"]) > 0
+    )
+def _has_soft_predictions(results: List[dict]) -> bool:
+    # assumes that we have run "verification" over the inputs
+    return (
+        "rationales" in results[0]
+        and len(results[0]["rationales"]) > 0
+        and "soft_rationale_predictions" in results[0]["rationales"][0]
+        and results[0]["rationales"][0]["soft_rationale_predictions"] is not None
+    )
+def _has_soft_sentence_predictions(results: List[dict]) -> bool:
+    # assumes that we have run "verification" over the inputs
+    return (
+        "rationales" in results[0]
+        and len(results[0]["rationales"]) > 0
+        and "soft_sentence_predictions" in results[0]["rationales"][0]
+        and results[0]["rationales"][0]["soft_sentence_predictions"] is not None
+    )
+def _has_classifications(results: List[dict]) -> bool:
+    # assumes that we have run "verification" over the inputs
+    return "classification" in results[0] and results[0]["classification"] is not None
+def main():
+    parser = argparse.ArgumentParser(
+        description="""Computes rationale and final class classification scores""",
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        "--data_dir",
+        dest="data_dir",
+        required=True,
+        help="Which directory contains a {train,val,test}.jsonl file?",
+    )
+    parser.add_argument(
+        "--split",
+        dest="split",
+        required=True,
+        help="Which of {train,val,test} are we scoring on?",
+    )
+    parser.add_argument(
+        "--strict",
+        dest="strict",
+        required=False,
+        action="store_true",
+        default=False,
+        help="Do we perform strict scoring?",
+    )
+    parser.add_argument(
+        "--results",
+        dest="results",
+        required=True,
+        help="""Results File
+    Contents are expected to be jsonl of:
+    {
+        "annotation_id": str, required
+        # these classifications *must not* overlap
+        "rationales": List[
+            {
+                "docid": str, required
+                "hard_rationale_predictions": List[{
+                    "start_token": int, inclusive, required
+                    "end_token": int, exclusive, required
+                }], optional,
+                # token level classifications, a value must be provided per-token
+                # in an ideal world, these correspond to the hard-decoding above.
+                "soft_rationale_predictions": List[float], optional.
+                # sentence level classifications, a value must be provided for every
+                # sentence in each document, or not at all
+                "soft_sentence_predictions": List[float], optional.
+            }
+        ],
+        # the classification the model made for the overall classification task
+        "classification": str, optional
+        # A probability distribution output by the model. We require this to be normalized.
+        "classification_scores": Dict[str, float], optional
+        # The next two fields are measures for how faithful your model is (the
+        # rationales it predicts are in some sense causal of the prediction), and
+        # how sufficient they are. We approximate a measure for comprehensiveness by
+        # asking that you remove the top k%% of tokens from your documents,
+        # running your models again, and reporting the score distribution in the
+        # "comprehensiveness_classification_scores" field.
+        # We approximate a measure of sufficiency by asking exactly the converse
+        # - that you provide model distributions on the removed k%% tokens.
+        # 'k' is determined by human rationales, and is documented in our paper.
+        # You should determine which of these tokens to remove based on some kind
+        # of information about your model: gradient based, attention based, other
+        # interpretability measures, etc.
+        # scores per class having removed k%% of the data, where k is determined by human comprehensive rationales
+        "comprehensiveness_classification_scores": Dict[str, float], optional
+        # scores per class having access to only k%% of the data, where k is determined by human comprehensive rationales
+        "sufficiency_classification_scores": Dict[str, float], optional
+        # the number of tokens required to flip the prediction - see "Is Attention Interpretable" by Serrano and Smith.
+        "tokens_to_flip": int, optional
+        "thresholded_scores": List[{
+            "threshold": float, required,
+            "comprehensiveness_classification_scores": like "classification_scores"
+            "sufficiency_classification_scores": like "classification_scores"
+        }], optional. if present, then "classification" and "classification_scores" must be present
+    }
+    When providing one of the optional fields, it must be provided for *every* instance.
+    The classification, classification_score, and comprehensiveness_classification_scores
+    must together be present for every instance or absent for every instance.
+    """,
+    )
+    parser.add_argument(
+        "--iou_thresholds",
+        dest="iou_thresholds",
+        required=False,
+        nargs="+",
+        type=float,
+        default=[0.5],
+        help="""Thresholds for IOU scoring.
+    These are used for "soft" or partial match scoring of rationale spans.
+    A span is considered a match if the size of the intersection of the prediction
+    and the annotation, divided by the union of the two spans, is larger than
+    the IOU threshold. This score can be computed for arbitrary thresholds.
+    """,
+    )
+    parser.add_argument(
+        "--score_file",
+        dest="score_file",
+        required=False,
+        default=None,
+        help="Where to write results?",
+    )
+    parser.add_argument(
+        "--aopc_thresholds",
+        nargs="+",
+        required=False,
+        type=float,
+        default=[0.01, 0.05, 0.1, 0.2, 0.5],
+        help="Thresholds for AOPC Thresholds",
+    )
+    args = parser.parse_args()
+    results = load_jsonl(args.results)
+    docids = set(
+        chain.from_iterable(
+            [rat["docid"] for rat in res["rationales"]] for res in results
+        )
+    )
+    docs = load_flattened_documents(args.data_dir, docids)
+    verify_instances(results, docs)
+    # load truth
+    annotations = annotations_from_jsonl(
+        os.path.join(args.data_dir, args.split + ".jsonl")
+    )
+    docids |= set(
+        chain.from_iterable(
+            (ev.docid for ev in chain.from_iterable(ann.evidences))
+            for ann in annotations
+        )
+    )
+    has_final_predictions = _has_classifications(results)
+    scores = dict()
+    if args.strict:
+        if not args.iou_thresholds:
+            raise ValueError(
+                "--iou_thresholds must be provided when running strict scoring"
+            )
+        if not has_final_predictions:
+            raise ValueError(
+                "We must have a 'classification', 'classification_score', and 'comprehensiveness_classification_score' field in order to perform scoring!"
+            )
+    # TODO think about offering a sentence level version of these scores.
+    if _has_hard_predictions(results):
+        truth = list(
+            chain.from_iterable(Rationale.from_annotation(ann) for ann in annotations)
+        )
+        pred = list(
+            chain.from_iterable(Rationale.from_instance(inst) for inst in results)
+        )
+        if args.iou_thresholds is not None:
+            iou_scores = partial_match_score(truth, pred, args.iou_thresholds)
+            scores["iou_scores"] = iou_scores
+        # NER style scoring
+        rationale_level_prf = score_hard_rationale_predictions(truth, pred)
+        scores["rationale_prf"] = rationale_level_prf
+        token_level_truth = list(
+            chain.from_iterable(rat.to_token_level() for rat in truth)
+        )
+        token_level_pred = list(
+            chain.from_iterable(rat.to_token_level() for rat in pred)
+        )
+        token_level_prf = score_hard_rationale_predictions(
+            token_level_truth, token_level_pred
+        )
+        scores["token_prf"] = token_level_prf
+    else:
+        logging.info("No hard predictions detected, skipping rationale scoring")
+    if _has_soft_predictions(results):
+        flattened_documents = load_flattened_documents(args.data_dir, docids)
+        paired_scoring = PositionScoredDocument.from_results(
+            results, annotations, flattened_documents, use_tokens=True
+        )
+        token_scores = score_soft_tokens(paired_scoring)
+        scores["token_soft_metrics"] = token_scores
+    else:
+        logging.info("No soft predictions detected, skipping rationale scoring")
+    if _has_soft_sentence_predictions(results):
+        documents = load_documents(args.data_dir, docids)
+        paired_scoring = PositionScoredDocument.from_results(
+            results, annotations, documents, use_tokens=False
+        )
+        sentence_scores = score_soft_tokens(paired_scoring)
+        scores["sentence_soft_metrics"] = sentence_scores
+    else:
+        logging.info(
+            "No sentence level predictions detected, skipping sentence-level diagnostic"
+        )
+    if has_final_predictions:
+        flattened_documents = load_flattened_documents(args.data_dir, docids)
+        class_results = score_classifications(
+            results, annotations, flattened_documents, args.aopc_thresholds
+        )
+        scores["classification_scores"] = class_results
+    else:
+        logging.info("No classification scores detected, skipping classification")
+    pprint.pprint(scores)
+    if args.score_file:
+        with open(args.score_file, "w") as of:
+            json.dump(scores, of, indent=4, sort_keys=True)
+if __name__ == "__main__":
+    main()

Transformer-Explainability/BERT_rationale_benchmark/models/model_utils.py ADDED Viewed

	@@ -0,0 +1,186 @@

+from dataclasses import dataclass
+from typing import Dict, List, Set
+import numpy as np
+import torch
+from gensim.models import KeyedVectors
+from torch import nn
+from torch.nn.utils.rnn import (PackedSequence, pack_padded_sequence,
+                                pad_packed_sequence, pad_sequence)
+@dataclass(eq=True, frozen=True)
+class PaddedSequence:
+    """A utility class for padding variable length sequences mean for RNN input
+    This class is in the style of PackedSequence from the PyTorch RNN Utils,
+    but is somewhat more manual in approach. It provides the ability to generate masks
+    for outputs of the same input dimensions.
+    The constructor should never be called directly and should only be called via
+    the autopad classmethod.
+    We'd love to delete this, but we pad_sequence, pack_padded_sequence, and
+    pad_packed_sequence all require shuffling around tuples of information, and some
+    convenience methods using these are nice to have.
+    """
+    data: torch.Tensor
+    batch_sizes: torch.Tensor
+    batch_first: bool = False
+    @classmethod
+    def autopad(
+        cls, data, batch_first: bool = False, padding_value=0, device=None
+    ) -> "PaddedSequence":
+        # handle tensors of size 0 (single item)
+        data_ = []
+        for d in data:
+            if len(d.size()) == 0:
+                d = d.unsqueeze(0)
+            data_.append(d)
+        padded = pad_sequence(
+            data_, batch_first=batch_first, padding_value=padding_value
+        )
+        if batch_first:
+            batch_lengths = torch.LongTensor([len(x) for x in data_])
+            if any([x == 0 for x in batch_lengths]):
+                raise ValueError(
+                    "Found a 0 length batch element, this can't possibly be right: {}".format(
+                        batch_lengths
+                    )
+                )
+        else:
+            # TODO actually test this codepath
+            batch_lengths = torch.LongTensor([len(x) for x in data])
+        return PaddedSequence(padded, batch_lengths, batch_first).to(device=device)
+    def pack_other(self, data: torch.Tensor):
+        return pack_padded_sequence(
+            data, self.batch_sizes, batch_first=self.batch_first, enforce_sorted=False
+        )
+    @classmethod
+    def from_packed_sequence(
+        cls, ps: PackedSequence, batch_first: bool, padding_value=0
+    ) -> "PaddedSequence":
+        padded, batch_sizes = pad_packed_sequence(ps, batch_first, padding_value)
+        return PaddedSequence(padded, batch_sizes, batch_first)
+    def cuda(self) -> "PaddedSequence":
+        return PaddedSequence(
+            self.data.cuda(), self.batch_sizes.cuda(), batch_first=self.batch_first
+        )
+    def to(
+        self, dtype=None, device=None, copy=False, non_blocking=False
+    ) -> "PaddedSequence":
+        # TODO make to() support all of the torch.Tensor to() variants
+        return PaddedSequence(
+            self.data.to(
+                dtype=dtype, device=device, copy=copy, non_blocking=non_blocking
+            ),
+            self.batch_sizes.to(device=device, copy=copy, non_blocking=non_blocking),
+            batch_first=self.batch_first,
+        )
+    def mask(
+        self, on=int(0), off=int(0), device="cpu", size=None, dtype=None
+    ) -> torch.Tensor:
+        if size is None:
+            size = self.data.size()
+        out_tensor = torch.zeros(*size, dtype=dtype)
+        # TODO this can be done more efficiently
+        out_tensor.fill_(off)
+        # note to self: these are probably less efficient than explicilty populating the off values instead of the on values.
+        if self.batch_first:
+            for i, bl in enumerate(self.batch_sizes):
+                out_tensor[i, :bl] = on
+        else:
+            for i, bl in enumerate(self.batch_sizes):
+                out_tensor[:bl, i] = on
+        return out_tensor.to(device)
+    def unpad(self, other: torch.Tensor) -> List[torch.Tensor]:
+        out = []
+        for o, bl in zip(other, self.batch_sizes):
+            out.append(o[:bl])
+        return out
+    def flip(self) -> "PaddedSequence":
+        return PaddedSequence(
+            self.data.transpose(0, 1), not self.batch_first, self.padding_value
+        )
+def extract_embeddings(
+    vocab: Set[str], embedding_file: str, unk_token: str = "UNK", pad_token: str = "PAD"
+) -> (nn.Embedding, Dict[str, int], List[str]):
+    vocab = vocab | set([unk_token, pad_token])
+    if embedding_file.endswith(".bin"):
+        WVs = KeyedVectors.load_word2vec_format(embedding_file, binary=True)
+        word_to_vector = dict()
+        WV_matrix = np.matrix([WVs[v] for v in WVs.vocab.keys()])
+        if unk_token not in WVs:
+            mean_vector = np.mean(WV_matrix, axis=0)
+            word_to_vector[unk_token] = mean_vector
+        if pad_token not in WVs:
+            word_to_vector[pad_token] = np.zeros(WVs.vector_size)
+        for v in vocab:
+            if v in WVs:
+                word_to_vector[v] = WVs[v]
+        interner = dict()
+        deinterner = list()
+        vectors = []
+        count = 0
+        for word in [pad_token, unk_token] + sorted(
+            list(word_to_vector.keys() - {unk_token, pad_token})
+        ):
+            vector = word_to_vector[word]
+            vectors.append(np.array(vector))
+            interner[word] = count
+            deinterner.append(word)
+            count += 1
+        vectors = torch.FloatTensor(np.array(vectors))
+        embedding = nn.Embedding.from_pretrained(
+            vectors, padding_idx=interner[pad_token]
+        )
+        embedding.weight.requires_grad = False
+        return embedding, interner, deinterner
+    elif embedding_file.endswith(".txt"):
+        word_to_vector = dict()
+        vector = []
+        with open(embedding_file, "r") as inf:
+            for line in inf:
+                contents = line.strip().split()
+                word = contents[0]
+                vector = torch.tensor([float(v) for v in contents[1:]]).unsqueeze(0)
+                word_to_vector[word] = vector
+        embed_size = vector.size()
+        if unk_token not in word_to_vector:
+            mean_vector = torch.cat(list(word_to_vector.values()), dim=0).mean(dim=0)
+            word_to_vector[unk_token] = mean_vector.unsqueeze(0)
+        if pad_token not in word_to_vector:
+            word_to_vector[pad_token] = torch.zeros(embed_size)
+        interner = dict()
+        deinterner = list()
+        vectors = []
+        count = 0
+        for word in [pad_token, unk_token] + sorted(
+            list(word_to_vector.keys() - {unk_token, pad_token})
+        ):
+            vector = word_to_vector[word]
+            vectors.append(vector)
+            interner[word] = count
+            deinterner.append(word)
+            count += 1
+        vectors = torch.cat(vectors, dim=0)
+        embedding = nn.Embedding.from_pretrained(
+            vectors, padding_idx=interner[pad_token]
+        )
+        embedding.weight.requires_grad = False
+        return embedding, interner, deinterner
+    else:
+        raise ValueError("Unable to open embeddings file {}".format(embedding_file))

Transformer-Explainability/BERT_rationale_benchmark/models/pipeline/__init__.py ADDED Viewed

File without changes

Transformer-Explainability/BERT_rationale_benchmark/models/pipeline/bert_pipeline.py ADDED Viewed

	@@ -0,0 +1,852 @@

+# TODO consider if this can be collapsed back down into the pipeline_train.py
+import argparse
+import json
+import logging
+import os
+import random
+from collections import OrderedDict
+from itertools import chain
+from typing import List, Tuple
+import numpy as np
+import torch
+import torch.nn as nn
+from BERT_explainability.modules.BERT.BERT_cls_lrp import \
+    BertForSequenceClassification as BertForClsOrigLrp
+from BERT_explainability.modules.BERT.BertForSequenceClassification import \
+    BertForSequenceClassification as BertForSequenceClassificationTest
+from BERT_explainability.modules.BERT.ExplanationGenerator import Generator
+from BERT_rationale_benchmark.utils import (Annotation, Evidence,
+                                            load_datasets, load_documents,
+                                            write_jsonl)
+from sklearn.metrics import accuracy_score
+from transformers import BertForSequenceClassification, BertTokenizer
+logging.basicConfig(
+    level=logging.DEBUG, format="%(relativeCreated)6d %(threadName)s %(message)s"
+)
+logger = logging.getLogger(__name__)
+# let's make this more or less deterministic (not resistent to restarts)
+random.seed(12345)
+np.random.seed(67890)
+torch.manual_seed(10111213)
+torch.backends.cudnn.deterministic = True
+torch.backends.cudnn.benchmark = False
+import numpy as np
+latex_special_token = ["!@#$%^&*()"]
+def generate(text_list, attention_list, latex_file, color="red", rescale_value=False):
+    attention_list = attention_list[: len(text_list)]
+    if attention_list.max() == attention_list.min():
+        attention_list = torch.zeros_like(attention_list)
+    else:
+        attention_list = (
+            100
+            * (attention_list - attention_list.min())
+            / (attention_list.max() - attention_list.min())
+        )
+    attention_list[attention_list < 1] = 0
+    attention_list = attention_list.tolist()
+    text_list = [text_list[i].replace("$", "") for i in range(len(text_list))]
+    if rescale_value:
+        attention_list = rescale(attention_list)
+    word_num = len(text_list)
+    text_list = clean_word(text_list)
+    with open(latex_file, "w") as f:
+        f.write(
+            r"""\documentclass[varwidth=150mm]{standalone}
+\special{papersize=210mm,297mm}
+\usepackage{color}
+\usepackage{tcolorbox}
+\usepackage{CJK}
+\usepackage{adjustbox}
+\tcbset{width=0.9\textwidth,boxrule=0pt,colback=red,arc=0pt,auto outer arc,left=0pt,right=0pt,boxsep=5pt}
+\begin{document}
+\begin{CJK*}{UTF8}{gbsn}"""
+            + "\n"
+        )
+        string = (
+            r"""{\setlength{\fboxsep}{0pt}\colorbox{white!0}{\parbox{0.9\textwidth}{"""
+            + "\n"
+        )
+        for idx in range(word_num):
+            # string += "\\colorbox{%s!%s}{"%(color, attention_list[idx])+"\\strut " + text_list[idx]+"} "
+            # print(text_list[idx])
+            if "\#\#" in text_list[idx]:
+                token = text_list[idx].replace("\#\#", "")
+                string += (
+                    "\\colorbox{%s!%s}{" % (color, attention_list[idx])
+                    + "\\strut "
+                    + token
+                    + "}"
+                )
+            else:
+                string += (
+                    " "
+                    + "\\colorbox{%s!%s}{" % (color, attention_list[idx])
+                    + "\\strut "
+                    + text_list[idx]
+                    + "}"
+                )
+        string += "\n}}}"
+        f.write(string + "\n")
+        f.write(
+            r"""\end{CJK*}
+\end{document}"""
+        )
+def clean_word(word_list):
+    new_word_list = []
+    for word in word_list:
+        for latex_sensitive in ["\\", "%", "&", "^", "#", "_", "{", "}"]:
+            if latex_sensitive in word:
+                word = word.replace(latex_sensitive, "\\" + latex_sensitive)
+        new_word_list.append(word)
+    return new_word_list
+def scores_per_word_from_scores_per_token(input, tokenizer, input_ids, scores_per_id):
+    words = tokenizer.convert_ids_to_tokens(input_ids)
+    words = [word.replace("##", "") for word in words]
+    score_per_char = []
+    # TODO: DELETE
+    input_ids_chars = []
+    for word in words:
+        if word in ["[CLS]", "[SEP]", "[UNK]", "[PAD]"]:
+            continue
+        input_ids_chars += list(word)
+    # TODO: DELETE
+    for i in range(len(scores_per_id)):
+        if words[i] in ["[CLS]", "[SEP]", "[UNK]", "[PAD]"]:
+            continue
+        score_per_char += [scores_per_id[i]] * len(words[i])
+    score_per_word = []
+    start_idx = 0
+    end_idx = 0
+    # TODO: DELETE
+    words_from_chars = []
+    for inp in input:
+        if start_idx >= len(score_per_char):
+            break
+        end_idx = end_idx + len(inp)
+        score_per_word.append(np.max(score_per_char[start_idx:end_idx]))
+        # TODO: DELETE
+        words_from_chars.append("".join(input_ids_chars[start_idx:end_idx]))
+        start_idx = end_idx
+    if words_from_chars[:-1] != input[: len(words_from_chars) - 1]:
+        print(words_from_chars)
+        print(input[: len(words_from_chars)])
+        print(words)
+        print(tokenizer.convert_ids_to_tokens(input_ids))
+        assert False
+    return torch.tensor(score_per_word)
+def get_input_words(input, tokenizer, input_ids):
+    words = tokenizer.convert_ids_to_tokens(input_ids)
+    words = [word.replace("##", "") for word in words]
+    input_ids_chars = []
+    for word in words:
+        if word in ["[CLS]", "[SEP]", "[UNK]", "[PAD]"]:
+            continue
+        input_ids_chars += list(word)
+    start_idx = 0
+    end_idx = 0
+    words_from_chars = []
+    for inp in input:
+        if start_idx >= len(input_ids_chars):
+            break
+        end_idx = end_idx + len(inp)
+        words_from_chars.append("".join(input_ids_chars[start_idx:end_idx]))
+        start_idx = end_idx
+    if words_from_chars[:-1] != input[: len(words_from_chars) - 1]:
+        print(words_from_chars)
+        print(input[: len(words_from_chars)])
+        print(words)
+        print(tokenizer.convert_ids_to_tokens(input_ids))
+        assert False
+    return words_from_chars
+def bert_tokenize_doc(
+    doc: List[List[str]], tokenizer, special_token_map
+) -> Tuple[List[List[str]], List[List[Tuple[int, int]]]]:
+    """Tokenizes a document and returns [start, end) spans to map the wordpieces back to their source words"""
+    sents = []
+    sent_token_spans = []
+    for sent in doc:
+        tokens = []
+        spans = []
+        start = 0
+        for w in sent:
+            if w in special_token_map:
+                tokens.append(w)
+            else:
+                tokens.extend(tokenizer.tokenize(w))
+            end = len(tokens)
+            spans.append((start, end))
+            start = end
+        sents.append(tokens)
+        sent_token_spans.append(spans)
+    return sents, sent_token_spans
+def initialize_models(params: dict, batch_first: bool, use_half_precision=False):
+    assert batch_first
+    max_length = params["max_length"]
+    tokenizer = BertTokenizer.from_pretrained(params["bert_vocab"])
+    pad_token_id = tokenizer.pad_token_id
+    cls_token_id = tokenizer.cls_token_id
+    sep_token_id = tokenizer.sep_token_id
+    bert_dir = params["bert_dir"]
+    evidence_classes = dict(
+        (y, x) for (x, y) in enumerate(params["evidence_classifier"]["classes"])
+    )
+    evidence_classifier = BertForSequenceClassification.from_pretrained(
+        bert_dir, num_labels=len(evidence_classes)
+    )
+    word_interner = tokenizer.vocab
+    de_interner = tokenizer.ids_to_tokens
+    return evidence_classifier, word_interner, de_interner, evidence_classes, tokenizer
+BATCH_FIRST = True
+def extract_docid_from_dataset_element(element):
+    return next(iter(element.evidences))[0].docid
+def extract_evidence_from_dataset_element(element):
+    return next(iter(element.evidences))
+def main():
+    parser = argparse.ArgumentParser(
+        description="""Trains a pipeline model.
+    Step 1 is evidence identification, that is identify if a given sentence is evidence or not
+    Step 2 is evidence classification, that is given an evidence sentence, classify the final outcome for the final task
+     (e.g. sentiment or significance).
+    These models should be separated into two separate steps, but at the moment:
+    * prep data (load, intern documents, load json)
+    * convert data for evidence identification - in the case of training data we take all the positives and sample some
+      negatives
+        * side note: this sampling is *somewhat* configurable and is done on a per-batch/epoch basis in order to gain a
+          broader sampling of negative values.
+    * train evidence identification
+    * convert data for evidence classification - take all rationales + decisions and use this as input
+    * train evidence classification
+    * decode first the evidence, then run classification for each split
+    """,
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        "--data_dir",
+        dest="data_dir",
+        required=True,
+        help="Which directory contains a {train,val,test}.jsonl file?",
+    )
+    parser.add_argument(
+        "--output_dir",
+        dest="output_dir",
+        required=True,
+        help="Where shall we write intermediate models + final data to?",
+    )
+    parser.add_argument(
+        "--model_params",
+        dest="model_params",
+        required=True,
+        help="JSoN file for loading arbitrary model parameters (e.g. optimizers, pre-saved files, etc.",
+    )
+    args = parser.parse_args()
+    assert BATCH_FIRST
+    os.makedirs(args.output_dir, exist_ok=True)
+    with open(args.model_params, "r") as fp:
+        logger.info(f"Loading model parameters from {args.model_params}")
+        model_params = json.load(fp)
+        logger.info(f"Params: {json.dumps(model_params, indent=2, sort_keys=True)}")
+    train, val, test = load_datasets(args.data_dir)
+    docids = set(
+        e.docid
+        for e in chain.from_iterable(
+            chain.from_iterable(map(lambda ann: ann.evidences, chain(train, val, test)))
+        )
+    )
+    documents = load_documents(args.data_dir, docids)
+    logger.info(f"Loaded {len(documents)} documents")
+    (
+        evidence_classifier,
+        word_interner,
+        de_interner,
+        evidence_classes,
+        tokenizer,
+    ) = initialize_models(model_params, batch_first=BATCH_FIRST)
+    logger.info(f"We have {len(word_interner)} wordpieces")
+    cache = os.path.join(args.output_dir, "preprocessed.pkl")
+    if os.path.exists(cache):
+        logger.info(f"Loading interned documents from {cache}")
+        (interned_documents) = torch.load(cache)
+    else:
+        logger.info(f"Interning documents")
+        interned_documents = {}
+        for d, doc in documents.items():
+            encoding = tokenizer.encode_plus(
+                doc,
+                add_special_tokens=True,
+                max_length=model_params["max_length"],
+                return_token_type_ids=False,
+                pad_to_max_length=False,
+                return_attention_mask=True,
+                return_tensors="pt",
+                truncation=True,
+            )
+            interned_documents[d] = encoding
+        torch.save((interned_documents), cache)
+    evidence_classifier = evidence_classifier.cuda()
+    optimizer = None
+    scheduler = None
+    save_dir = args.output_dir
+    logging.info(f"Beginning training classifier")
+    evidence_classifier_output_dir = os.path.join(save_dir, "classifier")
+    os.makedirs(save_dir, exist_ok=True)
+    os.makedirs(evidence_classifier_output_dir, exist_ok=True)
+    model_save_file = os.path.join(evidence_classifier_output_dir, "classifier.pt")
+    epoch_save_file = os.path.join(
+        evidence_classifier_output_dir, "classifier_epoch_data.pt"
+    )
+    device = next(evidence_classifier.parameters()).device
+    if optimizer is None:
+        optimizer = torch.optim.Adam(
+            evidence_classifier.parameters(),
+            lr=model_params["evidence_classifier"]["lr"],
+        )
+    criterion = nn.CrossEntropyLoss(reduction="none")
+    batch_size = model_params["evidence_classifier"]["batch_size"]
+    epochs = model_params["evidence_classifier"]["epochs"]
+    patience = model_params["evidence_classifier"]["patience"]
+    max_grad_norm = model_params["evidence_classifier"].get("max_grad_norm", None)
+    class_labels = [k for k, v in sorted(evidence_classes.items())]
+    results = {
+        "train_loss": [],
+        "train_f1": [],
+        "train_acc": [],
+        "val_loss": [],
+        "val_f1": [],
+        "val_acc": [],
+    }
+    best_epoch = -1
+    best_val_acc = 0
+    best_val_loss = float("inf")
+    best_model_state_dict = None
+    start_epoch = 0
+    epoch_data = {}
+    if os.path.exists(epoch_save_file):
+        logging.info(f"Restoring model from {model_save_file}")
+        evidence_classifier.load_state_dict(torch.load(model_save_file))
+        epoch_data = torch.load(epoch_save_file)
+        start_epoch = epoch_data["epoch"] + 1
+        # handle finishing because patience was exceeded or we didn't get the best final epoch
+        if bool(epoch_data.get("done", 0)):
+            start_epoch = epochs
+        results = epoch_data["results"]
+        best_epoch = start_epoch
+        best_model_state_dict = OrderedDict(
+            {k: v.cpu() for k, v in evidence_classifier.state_dict().items()}
+        )
+        logging.info(f"Restoring training from epoch {start_epoch}")
+    logging.info(
+        f"Training evidence classifier from epoch {start_epoch} until epoch {epochs}"
+    )
+    optimizer.zero_grad()
+    for epoch in range(start_epoch, epochs):
+        epoch_train_data = random.sample(train, k=len(train))
+        epoch_train_loss = 0
+        epoch_training_acc = 0
+        evidence_classifier.train()
+        logging.info(
+            f"Training with {len(epoch_train_data) // batch_size} batches with {len(epoch_train_data)} examples"
+        )
+        for batch_start in range(0, len(epoch_train_data), batch_size):
+            batch_elements = epoch_train_data[
+                batch_start : min(batch_start + batch_size, len(epoch_train_data))
+            ]
+            targets = [evidence_classes[s.classification] for s in batch_elements]
+            targets = torch.tensor(targets, dtype=torch.long, device=device)
+            samples_encoding = [
+                interned_documents[extract_docid_from_dataset_element(s)]
+                for s in batch_elements
+            ]
+            input_ids = (
+                torch.stack(
+                    [
+                        samples_encoding[i]["input_ids"]
+                        for i in range(len(samples_encoding))
+                    ]
+                )
+                .squeeze(1)
+                .to(device)
+            )
+            attention_masks = (
+                torch.stack(
+                    [
+                        samples_encoding[i]["attention_mask"]
+                        for i in range(len(samples_encoding))
+                    ]
+                )
+                .squeeze(1)
+                .to(device)
+            )
+            preds = evidence_classifier(
+                input_ids=input_ids, attention_mask=attention_masks
+            )[0]
+            epoch_training_acc += accuracy_score(
+                preds.argmax(dim=1).cpu(), targets.cpu(), normalize=False
+            )
+            loss = criterion(preds, targets.to(device=preds.device)).sum()
+            epoch_train_loss += loss.item()
+            loss.backward()
+            assert loss == loss  # for nans
+            if max_grad_norm:
+                torch.nn.utils.clip_grad_norm_(
+                    evidence_classifier.parameters(), max_grad_norm
+                )
+            optimizer.step()
+            if scheduler:
+                scheduler.step()
+            optimizer.zero_grad()
+        epoch_train_loss /= len(epoch_train_data)
+        epoch_training_acc /= len(epoch_train_data)
+        assert epoch_train_loss == epoch_train_loss  # for nans
+        results["train_loss"].append(epoch_train_loss)
+        logging.info(f"Epoch {epoch} training loss {epoch_train_loss}")
+        logging.info(f"Epoch {epoch} training accuracy {epoch_training_acc}")
+        with torch.no_grad():
+            epoch_val_loss = 0
+            epoch_val_acc = 0
+            epoch_val_data = random.sample(val, k=len(val))
+            evidence_classifier.eval()
+            val_batch_size = 32
+            logging.info(
+                f"Validating with {len(epoch_val_data) // val_batch_size} batches with {len(epoch_val_data)} examples"
+            )
+            for batch_start in range(0, len(epoch_val_data), val_batch_size):
+                batch_elements = epoch_val_data[
+                    batch_start : min(batch_start + val_batch_size, len(epoch_val_data))
+                ]
+                targets = [evidence_classes[s.classification] for s in batch_elements]
+                targets = torch.tensor(targets, dtype=torch.long, device=device)
+                samples_encoding = [
+                    interned_documents[extract_docid_from_dataset_element(s)]
+                    for s in batch_elements
+                ]
+                input_ids = (
+                    torch.stack(
+                        [
+                            samples_encoding[i]["input_ids"]
+                            for i in range(len(samples_encoding))
+                        ]
+                    )
+                    .squeeze(1)
+                    .to(device)
+                )
+                attention_masks = (
+                    torch.stack(
+                        [
+                            samples_encoding[i]["attention_mask"]
+                            for i in range(len(samples_encoding))
+                        ]
+                    )
+                    .squeeze(1)
+                    .to(device)
+                )
+                preds = evidence_classifier(
+                    input_ids=input_ids, attention_mask=attention_masks
+                )[0]
+                epoch_val_acc += accuracy_score(
+                    preds.argmax(dim=1).cpu(), targets.cpu(), normalize=False
+                )
+                loss = criterion(preds, targets.to(device=preds.device)).sum()
+                epoch_val_loss += loss.item()
+            epoch_val_loss /= len(val)
+            epoch_val_acc /= len(val)
+            results["val_acc"].append(epoch_val_acc)
+            results["val_loss"] = epoch_val_loss
+            logging.info(f"Epoch {epoch} val loss {epoch_val_loss}")
+            logging.info(f"Epoch {epoch} val acc {epoch_val_acc}")
+            if epoch_val_acc > best_val_acc or (
+                epoch_val_acc == best_val_acc and epoch_val_loss < best_val_loss
+            ):
+                best_model_state_dict = OrderedDict(
+                    {k: v.cpu() for k, v in evidence_classifier.state_dict().items()}
+                )
+                best_epoch = epoch
+                best_val_acc = epoch_val_acc
+                best_val_loss = epoch_val_loss
+                epoch_data = {
+                    "epoch": epoch,
+                    "results": results,
+                    "best_val_acc": best_val_acc,
+                    "done": 0,
+                }
+                torch.save(evidence_classifier.state_dict(), model_save_file)
+                torch.save(epoch_data, epoch_save_file)
+                logging.debug(
+                    f"Epoch {epoch} new best model with val accuracy {epoch_val_acc}"
+                )
+        if epoch - best_epoch > patience:
+            logging.info(f"Exiting after epoch {epoch} due to no improvement")
+            epoch_data["done"] = 1
+            torch.save(epoch_data, epoch_save_file)
+            break
+    epoch_data["done"] = 1
+    epoch_data["results"] = results
+    torch.save(epoch_data, epoch_save_file)
+    evidence_classifier.load_state_dict(best_model_state_dict)
+    evidence_classifier = evidence_classifier.to(device=device)
+    evidence_classifier.eval()
+    # test
+    test_classifier = BertForSequenceClassificationTest.from_pretrained(
+        model_params["bert_dir"], num_labels=len(evidence_classes)
+    ).to(device)
+    orig_lrp_classifier = BertForClsOrigLrp.from_pretrained(
+        model_params["bert_dir"], num_labels=len(evidence_classes)
+    ).to(device)
+    if os.path.exists(epoch_save_file):
+        logging.info(f"Restoring model from {model_save_file}")
+        test_classifier.load_state_dict(torch.load(model_save_file))
+        orig_lrp_classifier.load_state_dict(torch.load(model_save_file))
+        test_classifier.eval()
+        orig_lrp_classifier.eval()
+        test_batch_size = 1
+        logging.info(
+            f"Testing with {len(test) // test_batch_size} batches with {len(test)} examples"
+        )
+        # explainability
+        explanations = Generator(test_classifier)
+        explanations_orig_lrp = Generator(orig_lrp_classifier)
+        method = "transformer_attribution"
+        method_folder = {
+            "transformer_attribution": "ours",
+            "partial_lrp": "partial_lrp",
+            "last_attn": "last_attn",
+            "attn_gradcam": "attn_gradcam",
+            "lrp": "lrp",
+            "rollout": "rollout",
+            "ground_truth": "ground_truth",
+            "generate_all": "generate_all",
+        }
+        method_expl = {
+            "transformer_attribution": explanations.generate_LRP,
+            "partial_lrp": explanations_orig_lrp.generate_LRP_last_layer,
+            "last_attn": explanations_orig_lrp.generate_attn_last_layer,
+            "attn_gradcam": explanations_orig_lrp.generate_attn_gradcam,
+            "lrp": explanations_orig_lrp.generate_full_lrp,
+            "rollout": explanations_orig_lrp.generate_rollout,
+        }
+        os.makedirs(os.path.join(args.output_dir, method_folder[method]), exist_ok=True)
+        result_files = []
+        for i in range(5, 85, 5):
+            result_files.append(
+                open(
+                    os.path.join(
+                        args.output_dir, "{0}/identifier_results_{1}.json"
+                    ).format(method_folder[method], i),
+                    "w",
+                )
+            )
+        j = 0
+        for batch_start in range(0, len(test), test_batch_size):
+            batch_elements = test[
+                batch_start : min(batch_start + test_batch_size, len(test))
+            ]
+            targets = [evidence_classes[s.classification] for s in batch_elements]
+            targets = torch.tensor(targets, dtype=torch.long, device=device)
+            samples_encoding = [
+                interned_documents[extract_docid_from_dataset_element(s)]
+                for s in batch_elements
+            ]
+            input_ids = (
+                torch.stack(
+                    [
+                        samples_encoding[i]["input_ids"]
+                        for i in range(len(samples_encoding))
+                    ]
+                )
+                .squeeze(1)
+                .to(device)
+            )
+            attention_masks = (
+                torch.stack(
+                    [
+                        samples_encoding[i]["attention_mask"]
+                        for i in range(len(samples_encoding))
+                    ]
+                )
+                .squeeze(1)
+                .to(device)
+            )
+            preds = test_classifier(
+                input_ids=input_ids, attention_mask=attention_masks
+            )[0]
+            for s in batch_elements:
+                doc_name = extract_docid_from_dataset_element(s)
+                inp = documents[doc_name].split()
+                classification = "neg" if targets.item() == 0 else "pos"
+                is_classification_correct = 1 if preds.argmax(dim=1) == targets else 0
+                if method == "generate_all":
+                    file_name = "{0}_{1}_{2}.tex".format(
+                        j, classification, is_classification_correct
+                    )
+                    GT_global = os.path.join(
+                        args.output_dir, "{0}/visual_results_{1}.pdf"
+                    ).format(method_folder["ground_truth"], j)
+                    GT_ours = os.path.join(
+                        args.output_dir, "{0}/{1}_GT_{2}_{3}.pdf"
+                    ).format(
+                        method_folder["transformer_attribution"],
+                        j,
+                        classification,
+                        is_classification_correct,
+                    )
+                    CF_ours = os.path.join(args.output_dir, "{0}/{1}_CF.pdf").format(
+                        method_folder["transformer_attribution"], j
+                    )
+                    GT_partial = os.path.join(
+                        args.output_dir, "{0}/{1}_GT_{2}_{3}.pdf"
+                    ).format(
+                        method_folder["partial_lrp"],
+                        j,
+                        classification,
+                        is_classification_correct,
+                    )
+                    CF_partial = os.path.join(args.output_dir, "{0}/{1}_CF.pdf").format(
+                        method_folder["partial_lrp"], j
+                    )
+                    GT_gradcam = os.path.join(
+                        args.output_dir, "{0}/{1}_GT_{2}_{3}.pdf"
+                    ).format(
+                        method_folder["attn_gradcam"],
+                        j,
+                        classification,
+                        is_classification_correct,
+                    )
+                    CF_gradcam = os.path.join(args.output_dir, "{0}/{1}_CF.pdf").format(
+                        method_folder["attn_gradcam"], j
+                    )
+                    GT_lrp = os.path.join(
+                        args.output_dir, "{0}/{1}_GT_{2}_{3}.pdf"
+                    ).format(
+                        method_folder["lrp"],
+                        j,
+                        classification,
+                        is_classification_correct,
+                    )
+                    CF_lrp = os.path.join(args.output_dir, "{0}/{1}_CF.pdf").format(
+                        method_folder["lrp"], j
+                    )
+                    GT_lastattn = os.path.join(
+                        args.output_dir, "{0}/{1}_GT_{2}_{3}.pdf"
+                    ).format(
+                        method_folder["last_attn"],
+                        j,
+                        classification,
+                        is_classification_correct,
+                    )
+                    GT_rollout = os.path.join(
+                        args.output_dir, "{0}/{1}_GT_{2}_{3}.pdf"
+                    ).format(
+                        method_folder["rollout"],
+                        j,
+                        classification,
+                        is_classification_correct,
+                    )
+                    with open(file_name, "w") as f:
+                        f.write(
+                            r"""\documentclass[varwidth]{standalone}
+\usepackage{color}
+\usepackage{tcolorbox}
+\usepackage{CJK}
+\tcbset{width=0.9\textwidth,boxrule=0pt,colback=red,arc=0pt,auto outer arc,left=0pt,right=0pt,boxsep=5pt}
+\begin{document}
+\begin{CJK*}{UTF8}{gbsn}
+{\setlength{\fboxsep}{0pt}\colorbox{white!0}{\parbox{0.9\textwidth}{
+    \setlength{\tabcolsep}{2pt} % Default value: 6pt
+    \begin{tabular}{ccc}
+        \includegraphics[width=0.32\linewidth]{"""
+                            + GT_global
+                            + """}&
+        \includegraphics[width=0.32\linewidth]{"""
+                            + GT_ours
+                            + """}&
+        \includegraphics[width=0.32\linewidth]{"""
+                            + CF_ours
+                            + """}\\\\
+        (a) & (b) & (c)\\\\
+        \includegraphics[width=0.32\linewidth]{"""
+                            + GT_partial
+                            + """}&
+        \includegraphics[width=0.32\linewidth]{"""
+                            + CF_partial
+                            + """}&
+        \includegraphics[width=0.32\linewidth]{"""
+                            + GT_gradcam
+                            + """}\\\\
+        (d) & (e) & (f)\\\\
+        \includegraphics[width=0.32\linewidth]{"""
+                            + CF_gradcam
+                            + """}&
+        \includegraphics[width=0.32\linewidth]{"""
+                            + GT_lrp
+                            + """}&
+        \includegraphics[width=0.32\linewidth]{"""
+                            + CF_lrp
+                            + """}\\\\
+        (g) & (h) & (i)\\\\
+        \includegraphics[width=0.32\linewidth]{"""
+                            + GT_lastattn
+                            + """}&
+        \includegraphics[width=0.32\linewidth]{"""
+                            + GT_rollout
+                            + """}&\\\\
+        (j) & (k)&\\\\
+    \end{tabular}
+}}}
+\end{CJK*}
+\end{document}
+)"""
+                        )
+                    j += 1
+                    break
+                if method == "ground_truth":
+                    inp_cropped = get_input_words(inp, tokenizer, input_ids[0])
+                    cam = torch.zeros(len(inp_cropped))
+                    for evidence in extract_evidence_from_dataset_element(s):
+                        start_idx = evidence.start_token
+                        if start_idx >= len(cam):
+                            break
+                        end_idx = evidence.end_token
+                        cam[start_idx:end_idx] = 1
+                    generate(
+                        inp_cropped,
+                        cam,
+                        (
+                            os.path.join(
+                                args.output_dir, "{0}/visual_results_{1}.tex"
+                            ).format(method_folder[method], j)
+                        ),
+                        color="green",
+                    )
+                    j = j + 1
+                    break
+                text = tokenizer.convert_ids_to_tokens(input_ids[0])
+                classification = "neg" if targets.item() == 0 else "pos"
+                is_classification_correct = 1 if preds.argmax(dim=1) == targets else 0
+                target_idx = targets.item()
+                cam_target = method_expl[method](
+                    input_ids=input_ids,
+                    attention_mask=attention_masks,
+                    index=target_idx,
+                )[0]
+                cam_target = cam_target.clamp(min=0)
+                generate(
+                    text,
+                    cam_target,
+                    (
+                        os.path.join(args.output_dir, "{0}/{1}_GT_{2}_{3}.tex").format(
+                            method_folder[method],
+                            j,
+                            classification,
+                            is_classification_correct,
+                        )
+                    ),
+                )
+                if method in [
+                    "transformer_attribution",
+                    "partial_lrp",
+                    "attn_gradcam",
+                    "lrp",
+                ]:
+                    cam_false_class = method_expl[method](
+                        input_ids=input_ids,
+                        attention_mask=attention_masks,
+                        index=1 - target_idx,
+                    )[0]
+                    cam_false_class = cam_false_class.clamp(min=0)
+                    generate(
+                        text,
+                        cam_false_class,
+                        (
+                            os.path.join(args.output_dir, "{0}/{1}_CF.tex").format(
+                                method_folder[method], j
+                            )
+                        ),
+                    )
+                cam = cam_target
+                cam = scores_per_word_from_scores_per_token(
+                    inp, tokenizer, input_ids[0], cam
+                )
+                j = j + 1
+                doc_name = extract_docid_from_dataset_element(s)
+                hard_rationales = []
+                for res, i in enumerate(range(5, 85, 5)):
+                    print("calculating top ", i)
+                    _, indices = cam.topk(k=i)
+                    for index in indices.tolist():
+                        hard_rationales.append(
+                            {"start_token": index, "end_token": index + 1}
+                        )
+                    result_dict = {
+                        "annotation_id": doc_name,
+                        "rationales": [
+                            {
+                                "docid": doc_name,
+                                "hard_rationale_predictions": hard_rationales,
+                            }
+                        ],
+                    }
+                    result_files[res].write(json.dumps(result_dict) + "\n")
+        for i in range(len(result_files)):
+            result_files[i].close()
+if __name__ == "__main__":
+    main()

Transformer-Explainability/BERT_rationale_benchmark/models/pipeline/pipeline_train.py ADDED Viewed

	@@ -0,0 +1,235 @@

+import argparse
+import json
+import logging
+import os
+import random
+from itertools import chain
+from typing import Set
+import numpy as np
+import torch
+from rationale_benchmark.models.mlp import (AttentiveClassifier,
+                                            BahadanauAttention, RNNEncoder,
+                                            WordEmbedder)
+from rationale_benchmark.models.model_utils import extract_embeddings
+from rationale_benchmark.models.pipeline.evidence_classifier import \
+    train_evidence_classifier
+from rationale_benchmark.models.pipeline.evidence_identifier import \
+    train_evidence_identifier
+from rationale_benchmark.models.pipeline.pipeline_utils import decode
+from rationale_benchmark.utils import (intern_annotations, intern_documents,
+                                       load_datasets, load_documents,
+                                       write_jsonl)
+logging.basicConfig(
+    level=logging.DEBUG, format="%(relativeCreated)6d %(threadName)s %(message)s"
+)
+# let's make this more or less deterministic (not resistant to restarts)
+random.seed(12345)
+np.random.seed(67890)
+torch.manual_seed(10111213)
+torch.backends.cudnn.deterministic = True
+torch.backends.cudnn.benchmark = False
+def initialize_models(
+    params: dict, vocab: Set[str], batch_first: bool, unk_token="UNK"
+):
+    # TODO this is obviously asking for some sort of dependency injection. implement if it saves me time.
+    if "embedding_file" in params["embeddings"]:
+        embeddings, word_interner, de_interner = extract_embeddings(
+            vocab, params["embeddings"]["embedding_file"], unk_token=unk_token
+        )
+        if torch.cuda.is_available():
+            embeddings = embeddings.cuda()
+    else:
+        raise ValueError("No 'embedding_file' found in params!")
+    word_embedder = WordEmbedder(embeddings, params["embeddings"]["dropout"])
+    query_encoder = RNNEncoder(
+        word_embedder,
+        batch_first=batch_first,
+        condition=False,
+        attention_mechanism=BahadanauAttention(word_embedder.output_dimension),
+    )
+    document_encoder = RNNEncoder(
+        word_embedder,
+        batch_first=batch_first,
+        condition=True,
+        attention_mechanism=BahadanauAttention(
+            word_embedder.output_dimension, query_size=query_encoder.output_dimension
+        ),
+    )
+    evidence_identifier = AttentiveClassifier(
+        document_encoder,
+        query_encoder,
+        2,
+        params["evidence_identifier"]["mlp_size"],
+        params["evidence_identifier"]["dropout"],
+    )
+    query_encoder = RNNEncoder(
+        word_embedder,
+        batch_first=batch_first,
+        condition=False,
+        attention_mechanism=BahadanauAttention(word_embedder.output_dimension),
+    )
+    document_encoder = RNNEncoder(
+        word_embedder,
+        batch_first=batch_first,
+        condition=True,
+        attention_mechanism=BahadanauAttention(
+            word_embedder.output_dimension, query_size=query_encoder.output_dimension
+        ),
+    )
+    evidence_classes = dict(
+        (y, x) for (x, y) in enumerate(params["evidence_classifier"]["classes"])
+    )
+    evidence_classifier = AttentiveClassifier(
+        document_encoder,
+        query_encoder,
+        len(evidence_classes),
+        params["evidence_classifier"]["mlp_size"],
+        params["evidence_classifier"]["dropout"],
+    )
+    return (
+        evidence_identifier,
+        evidence_classifier,
+        word_interner,
+        de_interner,
+        evidence_classes,
+    )
+def main():
+    parser = argparse.ArgumentParser(
+        description="""Trains a pipeline model.
+    Step 1 is evidence identification, that is identify if a given sentence is evidence or not
+    Step 2 is evidence classification, that is given an evidence sentence, classify the final outcome for the final task (e.g. sentiment or significance).
+    These models should be separated into two separate steps, but at the moment:
+    * prep data (load, intern documents, load json)
+    * convert data for evidence identification - in the case of training data we take all the positives and sample some negatives
+        * side note: this sampling is *somewhat* configurable and is done on a per-batch/epoch basis in order to gain a broader sampling of negative values.
+    * train evidence identification
+    * convert data for evidence classification - take all rationales + decisions and use this as input
+    * train evidence classification
+    * decode first the evidence, then run classification for each split
+    """,
+        formatter_class=argparse.RawTextHelpFormatter,
+    )
+    parser.add_argument(
+        "--data_dir",
+        dest="data_dir",
+        required=True,
+        help="Which directory contains a {train,val,test}.jsonl file?",
+    )
+    parser.add_argument(
+        "--output_dir",
+        dest="output_dir",
+        required=True,
+        help="Where shall we write intermediate models + final data to?",
+    )
+    parser.add_argument(
+        "--model_params",
+        dest="model_params",
+        required=True,
+        help="JSoN file for loading arbitrary model parameters (e.g. optimizers, pre-saved files, etc.",
+    )
+    args = parser.parse_args()
+    BATCH_FIRST = True
+    with open(args.model_params, "r") as fp:
+        logging.debug(f"Loading model parameters from {args.model_params}")
+        model_params = json.load(fp)
+    train, val, test = load_datasets(args.data_dir)
+    docids = set(
+        e.docid
+        for e in chain.from_iterable(
+            chain.from_iterable(map(lambda ann: ann.evidences, chain(train, val, test)))
+        )
+    )
+    documents = load_documents(args.data_dir, docids)
+    document_vocab = set(chain.from_iterable(chain.from_iterable(documents.values())))
+    annotation_vocab = set(
+        chain.from_iterable(e.query.split() for e in chain(train, val, test))
+    )
+    logging.debug(
+        f"Loaded {len(documents)} documents with {len(document_vocab)} unique words"
+    )
+    # this ignores the case where annotations don't align perfectly with token boundaries, but this isn't that important
+    vocab = document_vocab | annotation_vocab
+    unk_token = "UNK"
+    (
+        evidence_identifier,
+        evidence_classifier,
+        word_interner,
+        de_interner,
+        evidence_classes,
+    ) = initialize_models(
+        model_params, vocab, batch_first=BATCH_FIRST, unk_token=unk_token
+    )
+    logging.debug(
+        f"Including annotations, we have {len(vocab)} total words in the data, with embeddings for {len(word_interner)}"
+    )
+    interned_documents = intern_documents(documents, word_interner, unk_token)
+    interned_train = intern_annotations(train, word_interner, unk_token)
+    interned_val = intern_annotations(val, word_interner, unk_token)
+    interned_test = intern_annotations(test, word_interner, unk_token)
+    assert BATCH_FIRST  # for correctness of the split  dimension for DataParallel
+    evidence_identifier, evidence_ident_results = train_evidence_identifier(
+        evidence_identifier.cuda(),
+        args.output_dir,
+        interned_train,
+        interned_val,
+        interned_documents,
+        model_params,
+        tensorize_model_inputs=True,
+    )
+    evidence_classifier, evidence_class_results = train_evidence_classifier(
+        evidence_classifier.cuda(),
+        args.output_dir,
+        interned_train,
+        interned_val,
+        interned_documents,
+        model_params,
+        class_interner=evidence_classes,
+        tensorize_model_inputs=True,
+    )
+    pipeline_batch_size = min(
+        [
+            model_params["evidence_classifier"]["batch_size"],
+            model_params["evidence_identifier"]["batch_size"],
+        ]
+    )
+    pipeline_results, train_decoded, val_decoded, test_decoded = decode(
+        evidence_identifier,
+        evidence_classifier,
+        interned_train,
+        interned_val,
+        interned_test,
+        interned_documents,
+        evidence_classes,
+        pipeline_batch_size,
+        tensorize_model_inputs=True,
+    )
+    write_jsonl(train_decoded, os.path.join(args.output_dir, "train_decoded.jsonl"))
+    write_jsonl(val_decoded, os.path.join(args.output_dir, "val_decoded.jsonl"))
+    write_jsonl(test_decoded, os.path.join(args.output_dir, "test_decoded.jsonl"))
+    with open(
+        os.path.join(args.output_dir, "identifier_results.json"), "w"
+    ) as ident_output, open(
+        os.path.join(args.output_dir, "classifier_results.json"), "w"
+    ) as class_output:
+        ident_output.write(json.dumps(evidence_ident_results))
+        class_output.write(json.dumps(evidence_class_results))
+    for k, v in pipeline_results.items():
+        if type(v) is dict:
+            for k1, v1 in v.items():
+                logging.info(f"Pipeline results for {k}, {k1}={v1}")
+        else:
+            logging.info(f"Pipeline results {k}\t={v}")
+if __name__ == "__main__":
+    main()

Transformer-Explainability/BERT_rationale_benchmark/models/pipeline/pipeline_utils.py ADDED Viewed

	@@ -0,0 +1,1045 @@

+import itertools
+import logging
+from collections import defaultdict, namedtuple
+from itertools import chain
+from typing import Any, Dict, List, Tuple
+import numpy as np
+import torch
+import torch.nn as nn
+from rationale_benchmark.metrics import (PositionScoredDocument, Rationale,
+                                         partial_match_score,
+                                         score_hard_rationale_predictions,
+                                         score_soft_tokens)
+from rationale_benchmark.models.model_utils import PaddedSequence
+from rationale_benchmark.utils import Annotation
+from sklearn.metrics import accuracy_score, classification_report
+SentenceEvidence = namedtuple(
+    "SentenceEvidence", "kls ann_id query docid index sentence"
+)
+def token_annotations_to_evidence_classification(
+    annotations: List[Annotation],
+    documents: Dict[str, List[List[Any]]],
+    class_interner: Dict[str, int],
+) -> List[SentenceEvidence]:
+    ret = []
+    for ann in annotations:
+        docid_to_ev = defaultdict(list)
+        for evidence in ann.all_evidences():
+            docid_to_ev[evidence.docid].append(evidence)
+        for docid, evidences in docid_to_ev.items():
+            evidences = sorted(evidences, key=lambda ev: ev.start_token)
+            text = []
+            covered_tokens = set()
+            doc = list(chain.from_iterable(documents[docid]))
+            for evidence in evidences:
+                assert (
+                    evidence.start_token >= 0
+                    and evidence.end_token > evidence.start_token
+                )
+                assert evidence.start_token < len(doc) and evidence.end_token <= len(
+                    doc
+                )
+                text.extend(evidence.text)
+                new_tokens = set(range(evidence.start_token, evidence.end_token))
+                if len(new_tokens & covered_tokens) > 0:
+                    raise ValueError(
+                        "Have overlapping token ranges covered in the evidence spans and the implementer was lazy; deal with it"
+                    )
+                covered_tokens |= new_tokens
+            assert len(text) > 0
+            ret.append(
+                SentenceEvidence(
+                    kls=class_interner[ann.classification],
+                    query=ann.query,
+                    ann_id=ann.annotation_id,
+                    docid=docid,
+                    index=-1,
+                    sentence=tuple(text),
+                )
+            )
+    return ret
+def annotations_to_evidence_classification(
+    annotations: List[Annotation],
+    documents: Dict[str, List[List[Any]]],
+    class_interner: Dict[str, int],
+    include_all: bool,
+) -> List[SentenceEvidence]:
+    """Converts Corpus-Level annotations to Sentence Level relevance judgments.
+    As this module is about a pipelined approach for evidence identification,
+    inputs to both an evidence identifier and evidence classifier need to be to
+    be on a sentence level, this module converts data to be that form.
+    The return type is of the form
+        annotation id -> docid -> [sentence level annotations]
+    """
+    ret = []
+    for ann in annotations:
+        ann_id = ann.annotation_id
+        docids = set(ev.docid for ev in chain.from_iterable(ann.evidences))
+        annotations_for_doc = defaultdict(list)
+        for d in docids:
+            for index, sent in enumerate(documents[d]):
+                annotations_for_doc[d].append(
+                    SentenceEvidence(
+                        kls=class_interner[ann.classification],
+                        query=ann.query,
+                        ann_id=ann.annotation_id,
+                        docid=d,
+                        index=index,
+                        sentence=tuple(sent),
+                    )
+                )
+        if include_all:
+            ret.extend(chain.from_iterable(annotations_for_doc.values()))
+        else:
+            contributes = set()
+            for ev in chain.from_iterable(ann.evidences):
+                for index in range(ev.start_sentence, ev.end_sentence):
+                    contributes.add(annotations_for_doc[ev.docid][index])
+            ret.extend(contributes)
+    assert len(ret) > 0
+    return ret
+def annotations_to_evidence_identification(
+    annotations: List[Annotation], documents: Dict[str, List[List[Any]]]
+) -> Dict[str, Dict[str, List[SentenceEvidence]]]:
+    """Converts Corpus-Level annotations to Sentence Level relevance judgments.
+    As this module is about a pipelined approach for evidence identification,
+    inputs to both an evidence identifier and evidence classifier need to be to
+    be on a sentence level, this module converts data to be that form.
+    The return type is of the form
+        annotation id -> docid -> [sentence level annotations]
+    """
+    ret = defaultdict(dict)  # annotation id -> docid -> sentences
+    for ann in annotations:
+        ann_id = ann.annotation_id
+        for ev_group in ann.evidences:
+            for ev in ev_group:
+                if len(ev.text) == 0:
+                    continue
+                if ev.docid not in ret[ann_id]:
+                    ret[ann.annotation_id][ev.docid] = []
+                    # populate the document with "not evidence"; to be filled in later
+                    for index, sent in enumerate(documents[ev.docid]):
+                        ret[ann.annotation_id][ev.docid].append(
+                            SentenceEvidence(
+                                kls=0,
+                                query=ann.query,
+                                ann_id=ann.annotation_id,
+                                docid=ev.docid,
+                                index=index,
+                                sentence=sent,
+                            )
+                        )
+                # define the evidence sections of the document
+                for s in range(ev.start_sentence, ev.end_sentence):
+                    ret[ann.annotation_id][ev.docid][s] = SentenceEvidence(
+                        kls=1,
+                        ann_id=ann.annotation_id,
+                        query=ann.query,
+                        docid=ev.docid,
+                        index=ret[ann.annotation_id][ev.docid][s].index,
+                        sentence=ret[ann.annotation_id][ev.docid][s].sentence,
+                    )
+    return ret
+def annotations_to_evidence_token_identification(
+    annotations: List[Annotation],
+    source_documents: Dict[str, List[List[str]]],
+    interned_documents: Dict[str, List[List[int]]],
+    token_mapping: Dict[str, List[List[Tuple[int, int]]]],
+) -> Dict[str, Dict[str, List[SentenceEvidence]]]:
+    # TODO document
+    # TODO should we simplify to use only source text?
+    ret = defaultdict(lambda: defaultdict(list))  # annotation id -> docid -> sentences
+    positive_tokens = 0
+    negative_tokens = 0
+    for ann in annotations:
+        annid = ann.annotation_id
+        docids = set(ev.docid for ev in chain.from_iterable(ann.evidences))
+        sentence_offsets = defaultdict(list)  # docid -> [(start, end)]
+        classes = defaultdict(list)  # docid -> [token is yea or nay]
+        for docid in docids:
+            start = 0
+            assert len(source_documents[docid]) == len(interned_documents[docid])
+            for whole_token_sent, wordpiece_sent in zip(
+                source_documents[docid], interned_documents[docid]
+            ):
+                classes[docid].extend([0 for _ in wordpiece_sent])
+                end = start + len(wordpiece_sent)
+                sentence_offsets[docid].append((start, end))
+                start = end
+        for ev in chain.from_iterable(ann.evidences):
+            if len(ev.text) == 0:
+                continue
+            flat_token_map = list(chain.from_iterable(token_mapping[ev.docid]))
+            if ev.start_token != -1:
+                # start, end = token_mapping[ev.docid][ev.start_token][0], token_mapping[ev.docid][ev.end_token][1]
+                start, end = (
+                    flat_token_map[ev.start_token][0],
+                    flat_token_map[ev.end_token - 1][1],
+                )
+            else:
+                start = flat_token_map[sentence_offsets[ev.start_sentence][0]][0]
+                end = flat_token_map[sentence_offsets[ev.end_sentence - 1][1]][1]
+            for i in range(start, end):
+                classes[ev.docid][i] = 1
+        for docid, offsets in sentence_offsets.items():
+            token_assignments = classes[docid]
+            positive_tokens += sum(token_assignments)
+            negative_tokens += len(token_assignments) - sum(token_assignments)
+            for s, (start, end) in enumerate(offsets):
+                sent = interned_documents[docid][s]
+                ret[annid][docid].append(
+                    SentenceEvidence(
+                        kls=tuple(token_assignments[start:end]),
+                        query=ann.query,
+                        ann_id=ann.annotation_id,
+                        docid=docid,
+                        index=s,
+                        sentence=sent,
+                    )
+                )
+    logging.info(
+        f"Have {positive_tokens} positive wordpiece tokens, {negative_tokens} negative wordpiece tokens"
+    )
+    return ret
+def make_preds_batch(
+    classifier: nn.Module,
+    batch_elements: List[SentenceEvidence],
+    device=None,
+    criterion: nn.Module = None,
+    tensorize_model_inputs: bool = True,
+) -> Tuple[float, List[float], List[int], List[int]]:
+    """Batch predictions
+    Args:
+        classifier: a module that looks like an AttentiveClassifier
+        batch_elements: a list of elements to make predictions over. These must be SentenceEvidence objects.
+        device: Optional; what compute device this should run on
+        criterion: Optional; a loss function
+        tensorize_model_inputs: should we convert our data to tensors before passing it to the model? Useful if we have a model that performs its own tokenization
+    """
+    # delete any "None" padding, if any (imposed by the use of the "grouper")
+    batch_elements = filter(lambda x: x is not None, batch_elements)
+    targets, queries, sentences = zip(
+        *[(s.kls, s.query, s.sentence) for s in batch_elements]
+    )
+    ids = [(s.ann_id, s.docid, s.index) for s in batch_elements]
+    targets = torch.tensor(targets, dtype=torch.long, device=device)
+    if tensorize_model_inputs:
+        queries = [torch.tensor(q, dtype=torch.long) for q in queries]
+        sentences = [torch.tensor(s, dtype=torch.long) for s in sentences]
+    preds = classifier(queries, ids, sentences)
+    targets = targets.to(device=preds.device)
+    if criterion:
+        loss = criterion(preds, targets)
+    else:
+        loss = None
+    # .float() because pytorch 1.3 introduces a bug where argmax is unsupported for float16
+    hard_preds = torch.argmax(preds.float(), dim=-1)
+    return loss, preds, hard_preds, targets
+def make_preds_epoch(
+    classifier: nn.Module,
+    data: List[SentenceEvidence],
+    batch_size: int,
+    device=None,
+    criterion: nn.Module = None,
+    tensorize_model_inputs: bool = True,
+):
+    """Predictions for more than one batch.
+    Args:
+        classifier: a module that looks like an AttentiveClassifier
+        data: a list of elements to make predictions over. These must be SentenceEvidence objects.
+        batch_size: the biggest chunk we can fit in one batch.
+        device: Optional; what compute device this should run on
+        criterion: Optional; a loss function
+        tensorize_model_inputs: should we convert our data to tensors before passing it to the model? Useful if we have a model that performs its own tokenization
+    """
+    epoch_loss = 0
+    epoch_soft_pred = []
+    epoch_hard_pred = []
+    epoch_truth = []
+    batches = _grouper(data, batch_size)
+    classifier.eval()
+    for batch in batches:
+        loss, soft_preds, hard_preds, targets = make_preds_batch(
+            classifier,
+            batch,
+            device,
+            criterion=criterion,
+            tensorize_model_inputs=tensorize_model_inputs,
+        )
+        if loss is not None:
+            epoch_loss += loss.sum().item()
+        epoch_hard_pred.extend(hard_preds)
+        epoch_soft_pred.extend(soft_preds.cpu())
+        epoch_truth.extend(targets)
+    epoch_loss /= len(data)
+    epoch_hard_pred = [x.item() for x in epoch_hard_pred]
+    epoch_truth = [x.item() for x in epoch_truth]
+    return epoch_loss, epoch_soft_pred, epoch_hard_pred, epoch_truth
+def make_token_preds_batch(
+    classifier: nn.Module,
+    batch_elements: List[SentenceEvidence],
+    token_mapping: Dict[str, List[List[Tuple[int, int]]]],
+    device=None,
+    criterion: nn.Module = None,
+    tensorize_model_inputs: bool = True,
+) -> Tuple[float, List[float], List[int], List[int]]:
+    """Batch predictions
+    Args:
+        classifier: a module that looks like an AttentiveClassifier
+        batch_elements: a list of elements to make predictions over. These must be SentenceEvidence objects.
+        device: Optional; what compute device this should run on
+        criterion: Optional; a loss function
+        tensorize_model_inputs: should we convert our data to tensors before passing it to the model? Useful if we have a model that performs its own tokenization
+    """
+    # delete any "None" padding, if any (imposed by the use of the "grouper")
+    batch_elements = filter(lambda x: x is not None, batch_elements)
+    targets, queries, sentences = zip(
+        *[(s.kls, s.query, s.sentence) for s in batch_elements]
+    )
+    ids = [(s.ann_id, s.docid, s.index) for s in batch_elements]
+    targets = PaddedSequence.autopad(
+        [torch.tensor(t, dtype=torch.long, device=device) for t in targets],
+        batch_first=True,
+        device=device,
+    )
+    aggregate_spans = [token_mapping[s.docid][s.index] for s in batch_elements]
+    if tensorize_model_inputs:
+        queries = [torch.tensor(q, dtype=torch.long) for q in queries]
+        sentences = [torch.tensor(s, dtype=torch.long) for s in sentences]
+    preds = classifier(queries, ids, sentences, aggregate_spans)
+    targets = targets.to(device=preds.device)
+    mask = targets.mask(on=1, off=0, device=preds.device, dtype=torch.float)
+    if criterion:
+        loss = criterion(
+            preds, (targets.data.to(device=preds.device) * mask).squeeze()
+        ).sum()
+    else:
+        loss = None
+    hard_preds = [
+        torch.round(x).to(dtype=torch.int).cpu() for x in targets.unpad(preds)
+    ]
+    targets = [[y.item() for y in x] for x in targets.unpad(targets.data.cpu())]
+    return loss, preds, hard_preds, targets  # targets.unpad(targets.data.cpu())
+# TODO fix the arguments
+def make_token_preds_epoch(
+    classifier: nn.Module,
+    data: List[SentenceEvidence],
+    token_mapping: Dict[str, List[List[Tuple[int, int]]]],
+    batch_size: int,
+    device=None,
+    criterion: nn.Module = None,
+    tensorize_model_inputs: bool = True,
+):
+    """Predictions for more than one batch.
+    Args:
+        classifier: a module that looks like an AttentiveClassifier
+        data: a list of elements to make predictions over. These must be SentenceEvidence objects.
+        batch_size: the biggest chunk we can fit in one batch.
+        device: Optional; what compute device this should run on
+        criterion: Optional; a loss function
+        tensorize_model_inputs: should we convert our data to tensors before passing it to the model? Useful if we have a model that performs its own tokenization
+    """
+    epoch_loss = 0
+    epoch_soft_pred = []
+    epoch_hard_pred = []
+    epoch_truth = []
+    batches = _grouper(data, batch_size)
+    classifier.eval()
+    for batch in batches:
+        loss, soft_preds, hard_preds, targets = make_token_preds_batch(
+            classifier,
+            batch,
+            token_mapping,
+            device,
+            criterion=criterion,
+            tensorize_model_inputs=tensorize_model_inputs,
+        )
+        if loss is not None:
+            epoch_loss += loss.sum().item()
+        epoch_hard_pred.extend(hard_preds)
+        epoch_soft_pred.extend(soft_preds.cpu().tolist())
+        epoch_truth.extend(targets)
+    epoch_loss /= len(data)
+    return epoch_loss, epoch_soft_pred, epoch_hard_pred, epoch_truth
+# copied from https://docs.python.org/3/library/itertools.html#itertools-recipes
+def _grouper(iterable, n, fillvalue=None):
+    "Collect data into fixed-length chunks or blocks"
+    # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
+    args = [iter(iterable)] * n
+    return itertools.zip_longest(*args, fillvalue=fillvalue)
+def score_rationales(
+    truth: List[Annotation],
+    documents: Dict[str, List[List[int]]],
+    input_data: List[SentenceEvidence],
+    scores: List[float],
+) -> dict:
+    results = {}
+    doc_to_sent_scores = dict()  # (annid, docid) -> [sentence scores]
+    for sent, score in zip(input_data, scores):
+        k = (sent.ann_id, sent.docid)
+        if k not in doc_to_sent_scores:
+            doc_to_sent_scores[k] = [0.0 for _ in range(len(documents[sent.docid]))]
+        if not isinstance(score[1], float):
+            score[1] = score[1].item()
+        doc_to_sent_scores[(sent.ann_id, sent.docid)][sent.index] = score[1]
+    # hard rationale scoring
+    best_sentence = {k: np.argmax(np.array(v)) for k, v in doc_to_sent_scores.items()}
+    predicted_rationales = []
+    for (ann_id, docid), sent_idx in best_sentence.items():
+        start_token = sum(len(s) for s in documents[docid][:sent_idx])
+        end_token = start_token + len(documents[docid][sent_idx])
+        predicted_rationales.append(Rationale(ann_id, docid, start_token, end_token))
+    true_rationales = list(
+        chain.from_iterable(Rationale.from_annotation(rat) for rat in truth)
+    )
+    results["hard_rationale_scores"] = score_hard_rationale_predictions(
+        true_rationales, predicted_rationales
+    )
+    results["hard_rationale_partial_match_scores"] = partial_match_score(
+        true_rationales, predicted_rationales, [0.5]
+    )
+    # soft rationale scoring
+    instance_format = []
+    for (ann_id, docid), sentences in doc_to_sent_scores.items():
+        soft_token_predictions = []
+        for sent_score, sent_text in zip(sentences, documents[docid]):
+            soft_token_predictions.extend(sent_score for _ in range(len(sent_text)))
+        instance_format.append(
+            {
+                "annotation_id": ann_id,
+                "rationales": [
+                    {
+                        "docid": docid,
+                        "soft_rationale_predictions": soft_token_predictions,
+                        "soft_sentence_predictions": sentences,
+                    }
+                ],
+            }
+        )
+    flattened_documents = {
+        k: list(chain.from_iterable(v)) for k, v in documents.items()
+    }
+    token_scoring_format = PositionScoredDocument.from_results(
+        instance_format, truth, flattened_documents, use_tokens=True
+    )
+    results["soft_token_scores"] = score_soft_tokens(token_scoring_format)
+    sentence_scoring_format = PositionScoredDocument.from_results(
+        instance_format, truth, documents, use_tokens=False
+    )
+    results["soft_sentence_scores"] = score_soft_tokens(sentence_scoring_format)
+    return results
+def decode(
+    evidence_identifier: nn.Module,
+    evidence_classifier: nn.Module,
+    train: List[Annotation],
+    val: List[Annotation],
+    test: List[Annotation],
+    docs: Dict[str, List[List[int]]],
+    class_interner: Dict[str, int],
+    batch_size: int,
+    tensorize_model_inputs: bool,
+    decoding_docs: Dict[str, List[Any]] = None,
+) -> dict:
+    """Identifies and then classifies evidence
+    Args:
+        evidence_identifier: a module for identifying evidence statements
+        evidence_classifier: a module for making a classification based on evidence statements
+        train: A List of interned Annotations
+        val: A List of interned Annotations
+        test: A List of interned Annotations
+        docs: A Dict of Documents, which are interned sentences.
+        class_interner: Converts an Annotation's final class into ints
+        batch_size: how big should our batches be?
+        tensorize_model_inputs: should we convert our data to tensors before passing it to the model? Useful if we have a model that performs its own tokenization
+    """
+    device = None
+    class_labels = [k for k, v in sorted(class_interner.items(), key=lambda x: x[1])]
+    if decoding_docs is None:
+        decoding_docs = docs
+    def prep(data: List[Annotation]) -> List[Tuple[SentenceEvidence, SentenceEvidence]]:
+        """Prepares data for evidence identification and classification.
+        Creates paired evaluation data, wherein each (annotation, docid, sentence, kls)
+        tuplet appears first as the kls determining if the sentence is evidence, and
+        secondarily what the overall classification for the (annotation/docid) pair is.
+        This allows selection based on model scores of the evidence_identifier for
+        input to the evidence_classifier.
+        """
+        identification_data = annotations_to_evidence_identification(data, docs)
+        classification_data = annotations_to_evidence_classification(
+            data, docs, class_interner, include_all=True
+        )
+        ann_doc_sents = defaultdict(
+            lambda: defaultdict(dict)
+        )  # ann id -> docid -> sent idx -> sent data
+        ret = []
+        for sent_ev in classification_data:
+            id_data = identification_data[sent_ev.ann_id][sent_ev.docid][sent_ev.index]
+            ret.append((id_data, sent_ev))
+            assert id_data.ann_id == sent_ev.ann_id
+            assert id_data.docid == sent_ev.docid
+            assert id_data.index == sent_ev.index
+        assert len(ret) == len(classification_data)
+        return ret
+    def decode_batch(
+        data: List[Tuple[SentenceEvidence, SentenceEvidence]],
+        name: str,
+        score: bool = False,
+        annotations: List[Annotation] = None,
+    ) -> dict:
+        """Identifies evidence statements and then makes classifications based on it.
+        Args:
+            data: a paired list of SentenceEvidences, differing only in the kls field.
+                  The first corresponds to whether or not something is evidence, and the second corresponds to an evidence class
+            name: a name for a results dict
+        """
+        num_uniques = len(set((x.ann_id, x.docid) for x, _ in data))
+        logging.info(
+            f"Decoding dataset {name} with {len(data)} sentences, {num_uniques} annotations"
+        )
+        identifier_data, classifier_data = zip(*data)
+        results = dict()
+        IdentificationClassificationResult = namedtuple(
+            "IdentificationClassificationResult",
+            "identification_data classification_data soft_identification hard_identification soft_classification hard_classification",
+        )
+        with torch.no_grad():
+            # make predictions for the evidence_identifier
+            evidence_identifier.eval()
+            evidence_classifier.eval()
+            (
+                _,
+                soft_identification_preds,
+                hard_identification_preds,
+                _,
+            ) = make_preds_epoch(
+                evidence_identifier,
+                identifier_data,
+                batch_size,
+                device,
+                tensorize_model_inputs=tensorize_model_inputs,
+            )
+            assert len(soft_identification_preds) == len(data)
+            identification_results = defaultdict(list)
+            for id_data, cls_data, soft_id_pred, hard_id_pred in zip(
+                identifier_data,
+                classifier_data,
+                soft_identification_preds,
+                hard_identification_preds,
+            ):
+                res = IdentificationClassificationResult(
+                    identification_data=id_data,
+                    classification_data=cls_data,
+                    # 1 is p(evidence|sent,query)
+                    soft_identification=soft_id_pred[1].float().item(),
+                    hard_identification=hard_id_pred,
+                    soft_classification=None,
+                    hard_classification=False,
+                )
+                identification_results[(id_data.ann_id, id_data.docid)].append(res)
+            best_identification_results = {
+                key: max(value, key=lambda x: x.soft_identification)
+                for key, value in identification_results.items()
+            }
+            logging.info(
+                f"Selected the best sentence for {len(identification_results)} examples from a total of {len(soft_identification_preds)} sentences"
+            )
+            ids, classification_data = zip(
+                *[
+                    (k, v.classification_data)
+                    for k, v in best_identification_results.items()
+                ]
+            )
+            (
+                _,
+                soft_classification_preds,
+                hard_classification_preds,
+                classification_truth,
+            ) = make_preds_epoch(
+                evidence_classifier,
+                classification_data,
+                batch_size,
+                device,
+                tensorize_model_inputs=tensorize_model_inputs,
+            )
+            classification_results = dict()
+            for eyeD, soft_class, hard_class in zip(
+                ids, soft_classification_preds, hard_classification_preds
+            ):
+                input_id_result = best_identification_results[eyeD]
+                res = IdentificationClassificationResult(
+                    identification_data=input_id_result.identification_data,
+                    classification_data=input_id_result.classification_data,
+                    soft_identification=input_id_result.soft_identification,
+                    hard_identification=input_id_result.hard_identification,
+                    soft_classification=soft_class,
+                    hard_classification=hard_class,
+                )
+                classification_results[eyeD] = res
+            if score:
+                truth = []
+                pred = []
+                for res in classification_results.values():
+                    truth.append(res.classification_data.kls)
+                    pred.append(res.hard_classification)
+                # results[f'{name}_f1'] = classification_report(classification_truth, pred, target_names=class_labels, output_dict=True)
+                results[f"{name}_f1"] = classification_report(
+                    classification_truth,
+                    hard_classification_preds,
+                    target_names=class_labels,
+                    output_dict=True,
+                )
+                results[f"{name}_acc"] = accuracy_score(
+                    classification_truth, hard_classification_preds
+                )
+                results[f"{name}_rationale"] = score_rationales(
+                    annotations,
+                    decoding_docs,
+                    identifier_data,
+                    soft_identification_preds,
+                )
+            # turn the above results into a format suitable for scoring via the rationale scorer
+            # n.b. the sentence-level evidence predictions (hard and soft) are
+            # broadcast to the token level for scoring. The comprehensiveness class
+            # score is also a lie since the pipeline model above is faithful by
+            # design.
+            decoded = dict()
+            decoded_scores = defaultdict(list)
+            for (ann_id, docid), pred in classification_results.items():
+                sentence_prediction_scores = [
+                    x.soft_identification
+                    for x in identification_results[(ann_id, docid)]
+                ]
+                sentence_start_token = sum(
+                    len(s)
+                    for s in decoding_docs[docid][: pred.identification_data.index]
+                )
+                sentence_end_token = sentence_start_token + len(
+                    decoding_docs[docid][pred.classification_data.index]
+                )
+                hard_rationale_predictions = [
+                    {
+                        "start_token": sentence_start_token,
+                        "end_token": sentence_end_token,
+                    }
+                ]
+                soft_rationale_predictions = []
+                for sent_result in sorted(
+                    identification_results[(ann_id, docid)],
+                    key=lambda x: x.identification_data.index,
+                ):
+                    soft_rationale_predictions.extend(
+                        sent_result.soft_identification
+                        for _ in range(
+                            len(
+                                decoding_docs[sent_result.identification_data.docid][
+                                    sent_result.identification_data.index
+                                ]
+                            )
+                        )
+                    )
+                if ann_id not in decoded:
+                    decoded[ann_id] = {
+                        "annotation_id": ann_id,
+                        "rationales": [],
+                        "classification": class_labels[pred.hard_classification],
+                        "classification_scores": {
+                            class_labels[i]: s.item()
+                            for i, s in enumerate(pred.soft_classification)
+                        },
+                        # TODO this should turn into the data distribution for the predicted class
+                        # "comprehensiveness_classification_scores": 0.0,
+                        "truth": pred.classification_data.kls,
+                    }
+                decoded[ann_id]["rationales"].append(
+                    {
+                        "docid": docid,
+                        "hard_rationale_predictions": hard_rationale_predictions,
+                        "soft_rationale_predictions": soft_rationale_predictions,
+                        "soft_sentence_predictions": sentence_prediction_scores,
+                    }
+                )
+                decoded_scores[ann_id].append(pred.soft_classification)
+            # in practice, this is always a single element operation:
+            # in evidence inference (prompt is really a prompt + document), fever (we split documents into two classifications), movies (you only have one opinion about a movie), or boolQ (single document prompts)
+            # this exists to support weird models we *might* implement for cose/esnli
+            for ann_id, scores_list in decoded_scores.items():
+                scores = torch.stack(scores_list)
+                score_avg = torch.mean(scores, dim=0)
+                # .float() because pytorch 1.3 introduces a bug where argmax is unsupported for float16
+                hard_pred = torch.argmax(score_avg.float()).item()
+                decoded[ann_id]["classification"] = class_labels[hard_pred]
+                decoded[ann_id]["classification_scores"] = {
+                    class_labels[i]: s.item() for i, s in enumerate(score_avg)
+                }
+            return results, list(decoded.values())
+    test_results, test_decoded = decode_batch(prep(test), "test", score=False)
+    val_results, val_decoded = dict(), []
+    train_results, train_decoded = dict(), []
+    # val_results, val_decoded = decode_batch(prep(val), 'val', score=True, annotations=val)
+    # train_results, train_decoded = decode_batch(prep(train), 'train', score=True, annotations=train)
+    return (
+        dict(**train_results, **val_results, **test_results),
+        train_decoded,
+        val_decoded,
+        test_decoded,
+    )
+def decode_evidence_tokens_and_classify(
+    evidence_token_identifier: nn.Module,
+    evidence_classifier: nn.Module,
+    train: List[Annotation],
+    val: List[Annotation],
+    test: List[Annotation],
+    docs: Dict[str, List[List[int]]],
+    source_documents: Dict[str, List[List[str]]],
+    token_mapping: Dict[str, List[List[Tuple[int, int]]]],
+    class_interner: Dict[str, int],
+    batch_size: int,
+    decoding_docs: Dict[str, List[Any]],
+    use_cose_hack: bool = False,
+) -> dict:
+    """Identifies and then classifies evidence
+    Args:
+        evidence_token_identifier: a module for identifying evidence statements
+        evidence_classifier: a module for making a classification based on evidence statements
+        train: A List of interned Annotations
+        val: A List of interned Annotations
+        test: A List of interned Annotations
+        docs: A Dict of Documents, which are interned sentences.
+        class_interner: Converts an Annotation's final class into ints
+        batch_size: how big should our batches be?
+    """
+    device = None
+    class_labels = [k for k, v in sorted(class_interner.items(), key=lambda x: x[1])]
+    if decoding_docs is None:
+        decoding_docs = docs
+    def prep(data: List[Annotation]) -> List[Tuple[SentenceEvidence, SentenceEvidence]]:
+        """Prepares data for evidence identification and classification.
+        Creates paired evaluation data, wherein each (annotation, docid, sentence, kls)
+        tuplet appears first as the kls determining if the sentence is evidence, and
+        secondarily what the overall classification for the (annotation/docid) pair is.
+        This allows selection based on model scores of the evidence_token_identifier for
+        input to the evidence_classifier.
+        """
+        # identification_data = annotations_to_evidence_identification(data, docs)
+        classification_data = token_annotations_to_evidence_classification(
+            data, docs, class_interner
+        )
+        # annotation id -> docid -> [SentenceEvidence])
+        identification_data = annotations_to_evidence_token_identification(
+            data,
+            source_documents=decoding_docs,
+            interned_documents=docs,
+            token_mapping=token_mapping,
+        )
+        ann_doc_sents = defaultdict(
+            lambda: defaultdict(dict)
+        )  # ann id -> docid -> sent idx -> sent data
+        ret = []
+        for sent_ev in classification_data:
+            id_data = identification_data[sent_ev.ann_id][sent_ev.docid][sent_ev.index]
+            ret.append((id_data, sent_ev))
+            assert id_data.ann_id == sent_ev.ann_id
+            assert id_data.docid == sent_ev.docid
+            # assert id_data.index == sent_ev.index
+        assert len(ret) == len(classification_data)
+        return ret
+    def decode_batch(
+        data: List[Tuple[SentenceEvidence, SentenceEvidence]],
+        name: str,
+        score: bool = False,
+        annotations: List[Annotation] = None,
+        class_labels: dict = class_labels,
+    ) -> dict:
+        """Identifies evidence statements and then makes classifications based on it.
+        Args:
+            data: a paired list of SentenceEvidences, differing only in the kls field.
+                  The first corresponds to whether or not something is evidence, and the second corresponds to an evidence class
+            name: a name for a results dict
+        """
+        num_uniques = len(set((x.ann_id, x.docid) for x, _ in data))
+        logging.info(
+            f"Decoding dataset {name} with {len(data)} sentences, {num_uniques} annotations"
+        )
+        identifier_data, classifier_data = zip(*data)
+        results = dict()
+        with torch.no_grad():
+            # make predictions for the evidence_token_identifier
+            evidence_token_identifier.eval()
+            evidence_classifier.eval()
+            (
+                _,
+                soft_identification_preds,
+                hard_identification_preds,
+                id_preds_truth,
+            ) = make_token_preds_epoch(
+                evidence_token_identifier,
+                identifier_data,
+                token_mapping,
+                batch_size,
+                device,
+                tensorize_model_inputs=True,
+            )
+            assert len(soft_identification_preds) == len(data)
+            evidence_only_cls = []
+            for id_data, cls_data, soft_id_pred, hard_id_pred in zip(
+                identifier_data,
+                classifier_data,
+                soft_identification_preds,
+                hard_identification_preds,
+            ):
+                assert cls_data.ann_id == id_data.ann_id
+                sent = []
+                for start, end in token_mapping[cls_data.docid][0]:
+                    if bool(hard_id_pred[start]):
+                        sent.extend(id_data.sentence[start:end])
+                # assert len(sent) > 0
+                new_cls_data = SentenceEvidence(
+                    cls_data.kls,
+                    cls_data.ann_id,
+                    cls_data.query,
+                    cls_data.docid,
+                    cls_data.index,
+                    tuple(sent),
+                )
+                evidence_only_cls.append(new_cls_data)
+            (
+                _,
+                soft_classification_preds,
+                hard_classification_preds,
+                classification_truth,
+            ) = make_preds_epoch(
+                evidence_classifier,
+                evidence_only_cls,
+                batch_size,
+                device,
+                tensorize_model_inputs=True,
+            )
+            if use_cose_hack:
+                logging.info(
+                    "Reformatting identification and classification results to fit COS-E"
+                )
+                grouping = 5
+                new_soft_identification_preds = []
+                new_hard_identification_preds = []
+                new_id_preds_truth = []
+                new_soft_classification_preds = []
+                new_hard_classification_preds = []
+                new_classification_truth = []
+                new_identifier_data = []
+                class_labels = []
+                # TODO fix the labels for COS-E
+                for i in range(0, len(soft_identification_preds), grouping):
+                    cls_scores = torch.stack(
+                        soft_classification_preds[i : i + grouping]
+                    )
+                    cls_scores = nn.functional.softmax(cls_scores, dim=-1)
+                    cls_scores = cls_scores[:, 1]
+                    choice = torch.argmax(cls_scores)
+                    cls_labels = [
+                        x.ann_id.split("_")[-1]
+                        for x in evidence_only_cls[i : i + grouping]
+                    ]
+                    class_labels = cls_labels  # we need to update the class labels because of the terrible hackery used to train this
+                    cls_truths = [x.kls for x in evidence_only_cls[i : i + grouping]]
+                    # cls_choice = evidence_only_cls[i + choice].ann_id.split('_')[-1]
+                    cls_truth = np.argmax(cls_truths)
+                    new_soft_identification_preds.append(
+                        soft_identification_preds[i + choice]
+                    )
+                    new_hard_identification_preds.append(
+                        hard_identification_preds[i + choice]
+                    )
+                    new_id_preds_truth.append(id_preds_truth[i + choice])
+                    new_soft_classification_preds.append(
+                        soft_classification_preds[i + choice]
+                    )
+                    new_hard_classification_preds.append(choice)
+                    new_identifier_data.append(identifier_data[i + choice])
+                    # new_hard_classification_preds.append(hard_classification_preds[i + choice])
+                    # new_classification_truth.append(classification_truth[i + choice])
+                    new_classification_truth.append(cls_truth)
+                soft_identification_preds = new_soft_identification_preds
+                hard_identification_preds = new_hard_identification_preds
+                id_preds_truth = new_id_preds_truth
+                soft_classification_preds = new_soft_classification_preds
+                hard_classification_preds = new_hard_classification_preds
+                classification_truth = new_classification_truth
+                identifier_data = new_identifier_data
+            if score:
+                results[f"{name}_f1"] = classification_report(
+                    classification_truth,
+                    hard_classification_preds,
+                    target_names=class_labels,
+                    output_dict=True,
+                )
+                results[f"{name}_acc"] = accuracy_score(
+                    classification_truth, hard_classification_preds
+                )
+                results[f"{name}_token_pred_acc"] = accuracy_score(
+                    list(chain.from_iterable(id_preds_truth)),
+                    list(chain.from_iterable(hard_identification_preds)),
+                )
+                results[f"{name}_token_pred_f1"] = classification_report(
+                    list(chain.from_iterable(id_preds_truth)),
+                    list(chain.from_iterable(hard_identification_preds)),
+                    output_dict=True,
+                )
+                # TODO for token level stuff!
+                soft_id_scores = [
+                    [1 - x, x] for x in chain.from_iterable(soft_identification_preds)
+                ]
+                results[f"{name}_rationale"] = score_rationales(
+                    annotations, decoding_docs, identifier_data, soft_id_scores
+                )
+                logging.info(f"Results: {results}")
+            # turn the above results into a format suitable for scoring via the rationale scorer
+            # n.b. the sentence-level evidence predictions (hard and soft) are
+            # broadcast to the token level for scoring. The comprehensiveness class
+            # score is also a lie since the pipeline model above is faithful by
+            # design.
+            decoded = dict()
+            scores = []
+            assert len(identifier_data) == len(soft_identification_preds)
+            for (
+                id_data,
+                soft_id_pred,
+                hard_id_pred,
+                soft_cls_preds,
+                hard_cls_pred,
+            ) in zip(
+                identifier_data,
+                soft_identification_preds,
+                hard_identification_preds,
+                soft_classification_preds,
+                hard_classification_preds,
+            ):
+                docid = id_data.docid
+                if use_cose_hack:
+                    docid = "_".join(docid.split("_")[0:-1])
+                assert len(docid) > 0
+                rationales = {
+                    "docid": docid,
+                    "hard_rationale_predictions": [],
+                    # token level classifications, a value must be provided per-token
+                    # in an ideal world, these correspond to the hard-decoding above.
+                    "soft_rationale_predictions": [],
+                    # sentence level classifications, a value must be provided for every
+                    # sentence in each document, or not at all
+                    "soft_sentence_predictions": [1.0],
+                }
+                last = -1
+                start_span = -1
+                for pos, (start, _) in enumerate(token_mapping[id_data.docid][0]):
+                    rationales["soft_rationale_predictions"].append(soft_id_pred[start])
+                    if bool(hard_id_pred[start]):
+                        if start_span == -1:
+                            start_span = pos
+                        last = pos
+                    else:
+                        if start_span != -1:
+                            rationales["hard_rationale_predictions"].append(
+                                {
+                                    "start_token": start_span,
+                                    "end_token": last + 1,
+                                }
+                            )
+                        last = -1
+                        start_span = -1
+                if start_span != -1:
+                    rationales["hard_rationale_predictions"].append(
+                        {
+                            "start_token": start_span,
+                            "end_token": last + 1,
+                        }
+                    )
+                ann_id = id_data.ann_id
+                if use_cose_hack:
+                    ann_id = "_".join(ann_id.split("_")[0:-1])
+                soft_cls_preds = nn.functional.softmax(soft_cls_preds)
+                decoded[id_data.ann_id] = {
+                    "annotation_id": ann_id,
+                    "rationales": [rationales],
+                    "classification": class_labels[hard_cls_pred],
+                    "classification_scores": {
+                        class_labels[i]: score.item()
+                        for i, score in enumerate(soft_cls_preds)
+                    },
+                }
+            return results, list(decoded.values())
+    # test_results, test_decoded = dict(), []
+    # val_results, val_decoded = dict(), []
+    train_results, train_decoded = dict(), []
+    val_results, val_decoded = decode_batch(
+        prep(val), "val", score=True, annotations=val, class_labels=class_labels
+    )
+    test_results, test_decoded = decode_batch(
+        prep(test), "test", score=False, class_labels=class_labels
+    )
+    # train_results, train_decoded = decode_batch(prep(train), 'train', score=True, annotations=train, class_labels=class_labels)
+    return (
+        dict(**train_results, **val_results, **test_results),
+        train_decoded,
+        val_decoded,
+        test_decoded,
+    )

Transformer-Explainability/BERT_rationale_benchmark/models/sequence_taggers.py ADDED Viewed

	@@ -0,0 +1,78 @@

+from typing import Any, List, Tuple
+import torch
+import torch.nn as nn
+from rationale_benchmark.models.model_utils import PaddedSequence
+from transformers import BertModel
+class BertTagger(nn.Module):
+    def __init__(
+        self,
+        bert_dir: str,
+        pad_token_id: int,
+        cls_token_id: int,
+        sep_token_id: int,
+        max_length: int = 512,
+        use_half_precision=True,
+    ):
+        super(BertTagger, self).__init__()
+        self.sep_token_id = sep_token_id
+        self.cls_token_id = cls_token_id
+        self.pad_token_id = pad_token_id
+        self.max_length = max_length
+        bert = BertModel.from_pretrained(bert_dir)
+        if use_half_precision:
+            import apex
+            bert = bert.half()
+        self.bert = bert
+        self.relevance_tagger = nn.Sequential(
+            nn.Linear(self.bert.config.hidden_size, 1), nn.Sigmoid()
+        )
+    def forward(
+        self,
+        query: List[torch.tensor],
+        docids: List[Any],
+        document_batch: List[torch.tensor],
+        aggregate_spans: List[Tuple[int, int]],
+    ):
+        assert len(query) == len(document_batch)
+        # note about device management: since distributed training is enabled, the inputs to this module can be on
+        # *any* device (preferably cpu, since we wrap and unwrap the module) we want to keep these params on the
+        # input device (assuming CPU) for as long as possible for cheap memory access
+        target_device = next(self.parameters()).device
+        # cls_token = torch.tensor([self.cls_token_id]).to(device=document_batch[0].device)
+        sep_token = torch.tensor([self.sep_token_id]).to(
+            device=document_batch[0].device
+        )
+        input_tensors = []
+        query_lengths = []
+        for q, d in zip(query, document_batch):
+            if len(q) + len(d) + 1 > self.max_length:
+                d = d[: (self.max_length - len(q) - 1)]
+            input_tensors.append(torch.cat([q, sep_token, d]))
+            query_lengths.append(q.size()[0])
+        bert_input = PaddedSequence.autopad(
+            input_tensors,
+            batch_first=True,
+            padding_value=self.pad_token_id,
+            device=target_device,
+        )
+        outputs = self.bert(
+            bert_input.data,
+            attention_mask=bert_input.mask(
+                on=0.0, off=float("-inf"), dtype=torch.float, device=target_device
+            ),
+        )
+        hidden = outputs[0]
+        classes = self.relevance_tagger(hidden)
+        ret = []
+        for ql, cls, doc in zip(query_lengths, classes, document_batch):
+            start = ql + 1
+            end = start + len(doc)
+            ret.append(cls[ql + 1 : end])
+        return PaddedSequence.autopad(
+            ret, batch_first=True, padding_value=0, device=target_device
+        ).data.squeeze(dim=-1)

Transformer-Explainability/BERT_rationale_benchmark/utils.py ADDED Viewed

	@@ -0,0 +1,251 @@

+import json
+import os
+from dataclasses import asdict, dataclass, is_dataclass
+from itertools import chain
+from typing import Dict, FrozenSet, List, Set, Tuple, Union
+@dataclass(eq=True, frozen=True)
+class Evidence:
+    """
+    (docid, start_token, end_token) form the only official Evidence; sentence level annotations are for convenience.
+    Args:
+        text: Some representation of the evidence text
+        docid: Some identifier for the document
+        start_token: The canonical start token, inclusive
+        end_token: The canonical end token, exclusive
+        start_sentence: Best guess start sentence, inclusive
+        end_sentence: Best guess end sentence, exclusive
+    """
+    text: Union[str, Tuple[int], Tuple[str]]
+    docid: str
+    start_token: int = -1
+    end_token: int = -1
+    start_sentence: int = -1
+    end_sentence: int = -1
+@dataclass(eq=True, frozen=True)
+class Annotation:
+    """
+    Args:
+        annotation_id: unique ID for this annotation element
+        query: some representation of a query string
+        evidences: a set of "evidence groups".
+            Each evidence group is:
+                * sufficient to respond to the query (or justify an answer)
+                * composed of one or more Evidences
+                * may have multiple documents in it (depending on the dataset)
+                    - e-snli has multiple documents
+                    - other datasets do not
+        classification: str
+        query_type: Optional str, additional information about the query
+        docids: a set of docids in which one may find evidence.
+    """
+    annotation_id: str
+    query: Union[str, Tuple[int]]
+    evidences: Union[Set[Tuple[Evidence]], FrozenSet[Tuple[Evidence]]]
+    classification: str
+    query_type: str = None
+    docids: Set[str] = None
+    def all_evidences(self) -> Tuple[Evidence]:
+        return tuple(list(chain.from_iterable(self.evidences)))
+def annotations_to_jsonl(annotations, output_file):
+    with open(output_file, "w") as of:
+        for ann in sorted(annotations, key=lambda x: x.annotation_id):
+            as_json = _annotation_to_dict(ann)
+            as_str = json.dumps(as_json, sort_keys=True)
+            of.write(as_str)
+            of.write("\n")
+def _annotation_to_dict(dc):
+    # convenience method
+    if is_dataclass(dc):
+        d = asdict(dc)
+        ret = dict()
+        for k, v in d.items():
+            ret[k] = _annotation_to_dict(v)
+        return ret
+    elif isinstance(dc, dict):
+        ret = dict()
+        for k, v in dc.items():
+            k = _annotation_to_dict(k)
+            v = _annotation_to_dict(v)
+            ret[k] = v
+        return ret
+    elif isinstance(dc, str):
+        return dc
+    elif isinstance(dc, (set, frozenset, list, tuple)):
+        ret = []
+        for x in dc:
+            ret.append(_annotation_to_dict(x))
+        return tuple(ret)
+    else:
+        return dc
+def load_jsonl(fp: str) -> List[dict]:
+    ret = []
+    with open(fp, "r") as inf:
+        for line in inf:
+            content = json.loads(line)
+            ret.append(content)
+    return ret
+def write_jsonl(jsonl, output_file):
+    with open(output_file, "w") as of:
+        for js in jsonl:
+            as_str = json.dumps(js, sort_keys=True)
+            of.write(as_str)
+            of.write("\n")
+def annotations_from_jsonl(fp: str) -> List[Annotation]:
+    ret = []
+    with open(fp, "r") as inf:
+        for line in inf:
+            content = json.loads(line)
+            ev_groups = []
+            for ev_group in content["evidences"]:
+                ev_group = tuple([Evidence(**ev) for ev in ev_group])
+                ev_groups.append(ev_group)
+            content["evidences"] = frozenset(ev_groups)
+            ret.append(Annotation(**content))
+    return ret
+def load_datasets(
+    data_dir: str,
+) -> Tuple[List[Annotation], List[Annotation], List[Annotation]]:
+    """Loads a training, validation, and test dataset
+    Each dataset is assumed to have been serialized by annotations_to_jsonl,
+    that is it is a list of json-serialized Annotation instances.
+    """
+    train_data = annotations_from_jsonl(os.path.join(data_dir, "train.jsonl"))
+    val_data = annotations_from_jsonl(os.path.join(data_dir, "val.jsonl"))
+    test_data = annotations_from_jsonl(os.path.join(data_dir, "test.jsonl"))
+    return train_data, val_data, test_data
+def load_documents(
+    data_dir: str, docids: Set[str] = None
+) -> Dict[str, List[List[str]]]:
+    """Loads a subset of available documents from disk.
+    Each document is assumed to be serialized as newline ('\n') separated sentences.
+    Each sentence is assumed to be space (' ') joined tokens.
+    """
+    if os.path.exists(os.path.join(data_dir, "docs.jsonl")):
+        assert not os.path.exists(os.path.join(data_dir, "docs"))
+        return load_documents_from_file(data_dir, docids)
+    docs_dir = os.path.join(data_dir, "docs")
+    res = dict()
+    if docids is None:
+        docids = sorted(os.listdir(docs_dir))
+    else:
+        docids = sorted(set(str(d) for d in docids))
+    for d in docids:
+        with open(os.path.join(docs_dir, d), "r") as inf:
+            res[d] = inf.read()
+    return res
+def load_flattened_documents(data_dir: str, docids: Set[str]) -> Dict[str, List[str]]:
+    """Loads a subset of available documents from disk.
+    Returns a tokenized version of the document.
+    """
+    unflattened_docs = load_documents(data_dir, docids)
+    flattened_docs = dict()
+    for doc, unflattened in unflattened_docs.items():
+        flattened_docs[doc] = list(chain.from_iterable(unflattened))
+    return flattened_docs
+def intern_documents(
+    documents: Dict[str, List[List[str]]], word_interner: Dict[str, int], unk_token: str
+):
+    """
+    Replaces every word with its index in an embeddings file.
+    If a word is not found, uses the unk_token instead
+    """
+    ret = dict()
+    unk = word_interner[unk_token]
+    for docid, sentences in documents.items():
+        ret[docid] = [[word_interner.get(w, unk) for w in s] for s in sentences]
+    return ret
+def intern_annotations(
+    annotations: List[Annotation], word_interner: Dict[str, int], unk_token: str
+):
+    ret = []
+    for ann in annotations:
+        ev_groups = []
+        for ev_group in ann.evidences:
+            evs = []
+            for ev in ev_group:
+                evs.append(
+                    Evidence(
+                        text=tuple(
+                            [
+                                word_interner.get(t, word_interner[unk_token])
+                                for t in ev.text.split()
+                            ]
+                        ),
+                        docid=ev.docid,
+                        start_token=ev.start_token,
+                        end_token=ev.end_token,
+                        start_sentence=ev.start_sentence,
+                        end_sentence=ev.end_sentence,
+                    )
+                )
+            ev_groups.append(tuple(evs))
+        ret.append(
+            Annotation(
+                annotation_id=ann.annotation_id,
+                query=tuple(
+                    [
+                        word_interner.get(t, word_interner[unk_token])
+                        for t in ann.query.split()
+                    ]
+                ),
+                evidences=frozenset(ev_groups),
+                classification=ann.classification,
+                query_type=ann.query_type,
+            )
+        )
+    return ret
+def load_documents_from_file(
+    data_dir: str, docids: Set[str] = None
+) -> Dict[str, List[List[str]]]:
+    """Loads a subset of available documents from 'docs.jsonl' file on disk.
+    Each document is assumed to be serialized as newline ('\n') separated sentences.
+    Each sentence is assumed to be space (' ') joined tokens.
+    """
+    docs_file = os.path.join(data_dir, "docs.jsonl")
+    documents = load_jsonl(docs_file)
+    documents = {doc["docid"]: doc["document"] for doc in documents}
+    # res = dict()
+    # if docids is None:
+    #     docids = sorted(list(documents.keys()))
+    # else:
+    #     docids = sorted(set(str(d) for d in docids))
+    # for d in docids:
+    #     lines = documents[d].split('\n')
+    #     tokenized = [line.strip().split(' ') for line in lines]
+    #     res[d] = tokenized
+    return documents

Transformer-Explainability/DeiT.PNG ADDED Viewed

Transformer-Explainability/DeiT_example.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Transformer-Explainability/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2020 Hila Chefer
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

Transformer-Explainability/README.md ADDED Viewed

	@@ -0,0 +1,153 @@

+# PyTorch Implementation of [Transformer Interpretability Beyond Attention Visualization](https://arxiv.org/abs/2012.09838) [CVPR 2021]
+#### Check out our new advancements- [Generic Attention-model Explainability for Interpreting Bi-Modal and Encoder-Decoder Transformers](https://github.com/hila-chefer/Transformer-MM-Explainability)!
+Faster, more general, and can be applied to *any* type of attention!
+Among the features:
+* We remove LRP for a simple and quick solution, and prove that the great results from our first paper still hold!
+* We expand our work to *any* type of Transformer- not just self-attention based encoders, but also co-attention encoders and encoder-decoders!
+* We show that VQA models can actually understand both image and text and make connections!
+* We use a DETR object detector and create segmentation masks from our explanations!
+* We provide a colab notebook with all the examples. You can very easily add images and questions of your own!
+<p align="center">
+  <img width="400" height="450" src="new_work.jpg">
+</p>
+---
+## ViT explainability notebook:
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/hila-chefer/Transformer-Explainability/blob/main/Transformer_explainability.ipynb)
+## BERT explainability notebook:
+[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/hila-chefer/Transformer-Explainability/blob/main/BERT_explainability.ipynb)
+---
+## Updates
+April 5 2021: Check out this new [post](https://analyticsindiamag.com/compute-relevancy-of-transformer-networks-via-novel-interpretable-transformer/) about our paper! A great resource for understanding the main concepts behind our work.
+March 15 2021: [A Colab notebook for BERT for sentiment analysis added!](https://colab.research.google.com/github/hila-chefer/Transformer-Explainability/blob/main/BERT_explainability.ipynb)
+Feb 28 2021: Our paper was accepted to CVPR 2021!
+Feb 17 2021: [A Colab notebook with all examples added!](https://github.com/hila-chefer/Transformer-Explainability/blob/main/Transformer_explainability.ipynb)
+Jan 5 2021: [A Jupyter notebook for DeiT added!](https://github.com/hila-chefer/Transformer-Explainability/blob/main/DeiT_example.ipynb)
+<p align="center">
+  <img width="300" height="460" src="https://github.com/hila-chefer/Transformer-Explainability/blob/main/DeiT.PNG">
+</p>
+## Introduction
+Official implementation of [Transformer Interpretability Beyond Attention Visualization](https://arxiv.org/abs/2012.09838).
+We introduce a novel method which allows to visualize classifications made by a Transformer based model for both vision and NLP tasks.
+Our method also allows to visualize explanations per class.
+<p align="center">
+  <img width="600" height="200" src="https://github.com/hila-chefer/Transformer-Explainability/blob/main/method-page-001.jpg">
+</p>
+Method consists of 3 phases:
+1. Calculating relevance for each attention matrix using our novel formulation of LRP.
+2. Backpropagation of gradients for each attention matrix w.r.t. the visualized class. Gradients are used to average attention heads.
+3. Layer aggregation with rollout.
+Please notice our [Jupyter notebook](https://github.com/hila-chefer/Transformer-Explainability/blob/main/example.ipynb) where you can run the two class specific examples from the paper.
+![alt text](https://github.com/hila-chefer/Transformer-Explainability/blob/main/example.PNG)
+To add another input image, simply add the image to the [samples folder](https://github.com/hila-chefer/Transformer-Explainability/tree/main/samples), and use the `generate_visualization` function for your selected class of interest (using the `class_index={class_idx}`), not specifying the index will visualize the top class.
+## Credits
+ViT implementation is based on:
+- https://github.com/rwightman/pytorch-image-models
+- https://github.com/lucidrains/vit-pytorch
+- pretrained weights from: https://github.com/google-research/vision_transformer
+BERT implementation is taken from the huggingface Transformers library:
+https://huggingface.co/transformers/
+ERASER benchmark code adapted from the ERASER GitHub implementation: https://github.com/jayded/eraserbenchmark
+Text visualizations in supplementary were created using TAHV heatmap generator for text: https://github.com/jiesutd/Text-Attention-Heatmap-Visualization
+## Reproducing results on ViT
+### Section A. Segmentation Results
+Example:
+```
+CUDA_VISIBLE_DEVICES=0 PYTHONPATH=./:$PYTHONPATH python3 baselines/ViT/imagenet_seg_eval.py --method transformer_attribution --imagenet-seg-path /path/to/gtsegs_ijcv.mat
+```
+[Link to download dataset](http://calvin-vision.net/bigstuff/proj-imagenet/data/gtsegs_ijcv.mat).
+In the exmaple above we run a segmentation test with our method. Notice you can choose which method you wish to run using the `--method` argument.
+You must provide a path to imagenet segmentation data in `--imagenet-seg-path`.
+### Section B. Perturbation Results
+Example:
+```
+CUDA_VISIBLE_DEVICES=0 PYTHONPATH=./:$PYTHONPATH python3 baselines/ViT/generate_visualizations.py --method transformer_attribution --imagenet-validation-path /path/to/imagenet_validation_directory
+```
+Notice that you can choose to visualize by target or top class by using the `--vis-cls` argument.
+Now to run the perturbation test run the following command:
+```
+CUDA_VISIBLE_DEVICES=0 PYTHONPATH=./:$PYTHONPATH python3 baselines/ViT/pertubation_eval_from_hdf5.py --method transformer_attribution
+```
+Notice that you can use the `--neg` argument to run either positive or negative perturbation.
+## Reproducing results on BERT
+1. Download the pretrained weights:
+- Download `classifier.zip` from https://drive.google.com/file/d/1kGMTr69UWWe70i-o2_JfjmWDQjT66xwQ/view?usp=sharing
+- mkdir -p `./bert_models/movies`
+- unzip classifier.zip -d ./bert_models/movies/
+2. Download the dataset pkl file:
+- Download `preprocessed.pkl` from https://drive.google.com/file/d/1-gfbTj6D87KIm_u1QMHGLKSL3e93hxBH/view?usp=sharing
+- mv preprocessed.pkl ./bert_models/movies
+3. Download the dataset:
+- Download `movies.zip` from https://drive.google.com/file/d/11faFLGkc0hkw3wrGTYJBr1nIvkRb189F/view?usp=sharing
+- unzip movies.zip -d ./data/
+4. Now you can run the model.
+Example:
+```
+CUDA_VISIBLE_DEVICES=0 PYTHONPATH=./:$PYTHONPATH python3 BERT_rationale_benchmark/models/pipeline/bert_pipeline.py --data_dir data/movies/ --output_dir bert_models/movies/ --model_params BERT_params/movies_bert.json
+```
+To control which algorithm to use for explanations change the `method` variable in `BERT_rationale_benchmark/models/pipeline/bert_pipeline.py` (Defaults to 'transformer_attribution' which is our method).
+Running this command will create a directory for the method in `bert_models/movies/<method_name>`.
+In order to run f1 test with k, run the following command:
+```
+PYTHONPATH=./:$PYTHONPATH python3 BERT_rationale_benchmark/metrics.py --data_dir data/movies/ --split test --results bert_models/movies/<method_name>/identifier_results_k.json
+```
+Also, in the method directory there will be created `.tex` files containing the explanations extracted for each example. This corresponds to our visualizations in the supplementary.
+## Citing our paper
+If you make use of our work, please cite our paper:
+```
+@InProceedings{Chefer_2021_CVPR,
+    author    = {Chefer, Hila and Gur, Shir and Wolf, Lior},
+    title     = {Transformer Interpretability Beyond Attention Visualization},
+    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+    month     = {June},
+    year      = {2021},
+    pages     = {782-791}
+}
+```

Transformer-Explainability/Transformer_explainability.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

Transformer-Explainability/baselines/ViT/ViT_LRP.py ADDED Viewed

	@@ -0,0 +1,535 @@

+""" Vision Transformer (ViT) in PyTorch
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import torch
+import torch.nn as nn
+from baselines.ViT.helpers import load_pretrained
+from baselines.ViT.layer_helpers import to_2tuple
+from baselines.ViT.weight_init import trunc_normal_
+from einops import rearrange
+from modules.layers_ours import *
+def _cfg(url="", **kwargs):
+    return {
+        "url": url,
+        "num_classes": 1000,
+        "input_size": (3, 224, 224),
+        "pool_size": None,
+        "crop_pct": 0.9,
+        "interpolation": "bicubic",
+        "first_conv": "patch_embed.proj",
+        "classifier": "head",
+        **kwargs,
+    }
+default_cfgs = {
+    # patch models
+    "vit_small_patch16_224": _cfg(
+        url="https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/vit_small_p16_224-15ec54c9.pth",
+    ),
+    "vit_base_patch16_224": _cfg(
+        url="https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth",
+        mean=(0.5, 0.5, 0.5),
+        std=(0.5, 0.5, 0.5),
+    ),
+    "vit_large_patch16_224": _cfg(
+        url="https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_224-4ee7a4dc.pth",
+        mean=(0.5, 0.5, 0.5),
+        std=(0.5, 0.5, 0.5),
+    ),
+}
+def compute_rollout_attention(all_layer_matrices, start_layer=0):
+    # adding residual consideration
+    num_tokens = all_layer_matrices[0].shape[1]
+    batch_size = all_layer_matrices[0].shape[0]
+    eye = (
+        torch.eye(num_tokens)
+        .expand(batch_size, num_tokens, num_tokens)
+        .to(all_layer_matrices[0].device)
+    )
+    all_layer_matrices = [
+        all_layer_matrices[i] + eye for i in range(len(all_layer_matrices))
+    ]
+    # all_layer_matrices = [all_layer_matrices[i] / all_layer_matrices[i].sum(dim=-1, keepdim=True)
+    #                       for i in range(len(all_layer_matrices))]
+    joint_attention = all_layer_matrices[start_layer]
+    for i in range(start_layer + 1, len(all_layer_matrices)):
+        joint_attention = all_layer_matrices[i].bmm(joint_attention)
+    return joint_attention
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, drop=0.0):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = Linear(in_features, hidden_features)
+        self.act = GELU()
+        self.fc2 = Linear(hidden_features, out_features)
+        self.drop = Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+    def relprop(self, cam, **kwargs):
+        cam = self.drop.relprop(cam, **kwargs)
+        cam = self.fc2.relprop(cam, **kwargs)
+        cam = self.act.relprop(cam, **kwargs)
+        cam = self.fc1.relprop(cam, **kwargs)
+        return cam
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0.0, proj_drop=0.0):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
+        self.scale = head_dim**-0.5
+        # A = Q*K^T
+        self.matmul1 = einsum("bhid,bhjd->bhij")
+        # attn = A*V
+        self.matmul2 = einsum("bhij,bhjd->bhid")
+        self.qkv = Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = Dropout(attn_drop)
+        self.proj = Linear(dim, dim)
+        self.proj_drop = Dropout(proj_drop)
+        self.softmax = Softmax(dim=-1)
+        self.attn_cam = None
+        self.attn = None
+        self.v = None
+        self.v_cam = None
+        self.attn_gradients = None
+    def get_attn(self):
+        return self.attn
+    def save_attn(self, attn):
+        self.attn = attn
+    def save_attn_cam(self, cam):
+        self.attn_cam = cam
+    def get_attn_cam(self):
+        return self.attn_cam
+    def get_v(self):
+        return self.v
+    def save_v(self, v):
+        self.v = v
+    def save_v_cam(self, cam):
+        self.v_cam = cam
+    def get_v_cam(self):
+        return self.v_cam
+    def save_attn_gradients(self, attn_gradients):
+        self.attn_gradients = attn_gradients
+    def get_attn_gradients(self):
+        return self.attn_gradients
+    def forward(self, x):
+        b, n, _, h = *x.shape, self.num_heads
+        qkv = self.qkv(x)
+        q, k, v = rearrange(qkv, "b n (qkv h d) -> qkv b h n d", qkv=3, h=h)
+        self.save_v(v)
+        dots = self.matmul1([q, k]) * self.scale
+        attn = self.softmax(dots)
+        attn = self.attn_drop(attn)
+        self.save_attn(attn)
+        attn.register_hook(self.save_attn_gradients)
+        out = self.matmul2([attn, v])
+        out = rearrange(out, "b h n d -> b n (h d)")
+        out = self.proj(out)
+        out = self.proj_drop(out)
+        return out
+    def relprop(self, cam, **kwargs):
+        cam = self.proj_drop.relprop(cam, **kwargs)
+        cam = self.proj.relprop(cam, **kwargs)
+        cam = rearrange(cam, "b n (h d) -> b h n d", h=self.num_heads)
+        # attn = A*V
+        (cam1, cam_v) = self.matmul2.relprop(cam, **kwargs)
+        cam1 /= 2
+        cam_v /= 2
+        self.save_v_cam(cam_v)
+        self.save_attn_cam(cam1)
+        cam1 = self.attn_drop.relprop(cam1, **kwargs)
+        cam1 = self.softmax.relprop(cam1, **kwargs)
+        # A = Q*K^T
+        (cam_q, cam_k) = self.matmul1.relprop(cam1, **kwargs)
+        cam_q /= 2
+        cam_k /= 2
+        cam_qkv = rearrange(
+            [cam_q, cam_k, cam_v],
+            "qkv b h n d -> b n (qkv h d)",
+            qkv=3,
+            h=self.num_heads,
+        )
+        return self.qkv.relprop(cam_qkv, **kwargs)
+class Block(nn.Module):
+    def __init__(
+        self, dim, num_heads, mlp_ratio=4.0, qkv_bias=False, drop=0.0, attn_drop=0.0
+    ):
+        super().__init__()
+        self.norm1 = LayerNorm(dim, eps=1e-6)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            attn_drop=attn_drop,
+            proj_drop=drop,
+        )
+        self.norm2 = LayerNorm(dim, eps=1e-6)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, drop=drop)
+        self.add1 = Add()
+        self.add2 = Add()
+        self.clone1 = Clone()
+        self.clone2 = Clone()
+    def forward(self, x):
+        x1, x2 = self.clone1(x, 2)
+        x = self.add1([x1, self.attn(self.norm1(x2))])
+        x1, x2 = self.clone2(x, 2)
+        x = self.add2([x1, self.mlp(self.norm2(x2))])
+        return x
+    def relprop(self, cam, **kwargs):
+        (cam1, cam2) = self.add2.relprop(cam, **kwargs)
+        cam2 = self.mlp.relprop(cam2, **kwargs)
+        cam2 = self.norm2.relprop(cam2, **kwargs)
+        cam = self.clone2.relprop((cam1, cam2), **kwargs)
+        (cam1, cam2) = self.add1.relprop(cam, **kwargs)
+        cam2 = self.attn.relprop(cam2, **kwargs)
+        cam2 = self.norm1.relprop(cam2, **kwargs)
+        cam = self.clone1.relprop((cam1, cam2), **kwargs)
+        return cam
+class PatchEmbed(nn.Module):
+    """Image to Patch Embedding"""
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.num_patches = num_patches
+        self.proj = Conv2d(
+            in_chans, embed_dim, kernel_size=patch_size, stride=patch_size
+        )
+    def forward(self, x):
+        B, C, H, W = x.shape
+        # FIXME look at relaxing size constraints
+        assert (
+            H == self.img_size[0] and W == self.img_size[1]
+        ), f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        return x
+    def relprop(self, cam, **kwargs):
+        cam = cam.transpose(1, 2)
+        cam = cam.reshape(
+            cam.shape[0],
+            cam.shape[1],
+            (self.img_size[0] // self.patch_size[0]),
+            (self.img_size[1] // self.patch_size[1]),
+        )
+        return self.proj.relprop(cam, **kwargs)
+class VisionTransformer(nn.Module):
+    """Vision Transformer with support for patch or hybrid CNN input stage"""
+    def __init__(
+        self,
+        img_size=224,
+        patch_size=16,
+        in_chans=3,
+        num_classes=1000,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4.0,
+        qkv_bias=False,
+        mlp_head=False,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+    ):
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_features = (
+            self.embed_dim
+        ) = embed_dim  # num_features for consistency with other models
+        self.patch_embed = PatchEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=in_chans,
+            embed_dim=embed_dim,
+        )
+        num_patches = self.patch_embed.num_patches
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.blocks = nn.ModuleList(
+            [
+                Block(
+                    dim=embed_dim,
+                    num_heads=num_heads,
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    drop=drop_rate,
+                    attn_drop=attn_drop_rate,
+                )
+                for i in range(depth)
+            ]
+        )
+        self.norm = LayerNorm(embed_dim)
+        if mlp_head:
+            # paper diagram suggests 'MLP head', but results in 4M extra parameters vs paper
+            self.head = Mlp(embed_dim, int(embed_dim * mlp_ratio), num_classes)
+        else:
+            # with a single Linear layer as head, the param count within rounding of paper
+            self.head = Linear(embed_dim, num_classes)
+        # FIXME not quite sure what the proper weight init is supposed to be,
+        # normal / trunc normal w/ std == .02 similar to other Bert like transformers
+        trunc_normal_(self.pos_embed, std=0.02)  # embeddings same as weights?
+        trunc_normal_(self.cls_token, std=0.02)
+        self.apply(self._init_weights)
+        self.pool = IndexSelect()
+        self.add = Add()
+        self.inp_grad = None
+    def save_inp_grad(self, grad):
+        self.inp_grad = grad
+    def get_inp_grad(self):
+        return self.inp_grad
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @property
+    def no_weight_decay(self):
+        return {"pos_embed", "cls_token"}
+    def forward(self, x):
+        B = x.shape[0]
+        x = self.patch_embed(x)
+        cls_tokens = self.cls_token.expand(
+            B, -1, -1
+        )  # stole cls_tokens impl from Phil Wang, thanks
+        x = torch.cat((cls_tokens, x), dim=1)
+        x = self.add([x, self.pos_embed])
+        x.register_hook(self.save_inp_grad)
+        for blk in self.blocks:
+            x = blk(x)
+        x = self.norm(x)
+        x = self.pool(x, dim=1, indices=torch.tensor(0, device=x.device))
+        x = x.squeeze(1)
+        x = self.head(x)
+        return x
+    def relprop(
+        self,
+        cam=None,
+        method="transformer_attribution",
+        is_ablation=False,
+        start_layer=0,
+        **kwargs,
+    ):
+        # print(kwargs)
+        # print("conservation 1", cam.sum())
+        cam = self.head.relprop(cam, **kwargs)
+        cam = cam.unsqueeze(1)
+        cam = self.pool.relprop(cam, **kwargs)
+        cam = self.norm.relprop(cam, **kwargs)
+        for blk in reversed(self.blocks):
+            cam = blk.relprop(cam, **kwargs)
+        # print("conservation 2", cam.sum())
+        # print("min", cam.min())
+        if method == "full":
+            (cam, _) = self.add.relprop(cam, **kwargs)
+            cam = cam[:, 1:]
+            cam = self.patch_embed.relprop(cam, **kwargs)
+            # sum on channels
+            cam = cam.sum(dim=1)
+            return cam
+        elif method == "rollout":
+            # cam rollout
+            attn_cams = []
+            for blk in self.blocks:
+                attn_heads = blk.attn.get_attn_cam().clamp(min=0)
+                avg_heads = (attn_heads.sum(dim=1) / attn_heads.shape[1]).detach()
+                attn_cams.append(avg_heads)
+            cam = compute_rollout_attention(attn_cams, start_layer=start_layer)
+            cam = cam[:, 0, 1:]
+            return cam
+        # our method, method name grad is legacy
+        elif method == "transformer_attribution" or method == "grad":
+            cams = []
+            for blk in self.blocks:
+                grad = blk.attn.get_attn_gradients()
+                cam = blk.attn.get_attn_cam()
+                cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+                cam = cam.clamp(min=0).mean(dim=0)
+                cams.append(cam.unsqueeze(0))
+            rollout = compute_rollout_attention(cams, start_layer=start_layer)
+            cam = rollout[:, 0, 1:]
+            return cam
+        elif method == "last_layer":
+            cam = self.blocks[-1].attn.get_attn_cam()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            if is_ablation:
+                grad = self.blocks[-1].attn.get_attn_gradients()
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+        elif method == "last_layer_attn":
+            cam = self.blocks[-1].attn.get_attn()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+        elif method == "second_layer":
+            cam = self.blocks[1].attn.get_attn_cam()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            if is_ablation:
+                grad = self.blocks[1].attn.get_attn_gradients()
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+def _conv_filter(state_dict, patch_size=16):
+    """convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    for k, v in state_dict.items():
+        if "patch_embed.proj.weight" in k:
+            v = v.reshape((v.shape[0], 3, patch_size, patch_size))
+        out_dict[k] = v
+    return out_dict
+def vit_base_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4,
+        qkv_bias=True,
+        **kwargs,
+    )
+    model.default_cfg = default_cfgs["vit_base_patch16_224"]
+    if pretrained:
+        load_pretrained(
+            model,
+            num_classes=model.num_classes,
+            in_chans=kwargs.get("in_chans", 3),
+            filter_fn=_conv_filter,
+        )
+    return model
+def vit_large_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16,
+        embed_dim=1024,
+        depth=24,
+        num_heads=16,
+        mlp_ratio=4,
+        qkv_bias=True,
+        **kwargs,
+    )
+    model.default_cfg = default_cfgs["vit_large_patch16_224"]
+    if pretrained:
+        load_pretrained(
+            model, num_classes=model.num_classes, in_chans=kwargs.get("in_chans", 3)
+        )
+    return model
+def deit_base_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4,
+        qkv_bias=True,
+        **kwargs,
+    )
+    model.default_cfg = _cfg()
+    if pretrained:
+        checkpoint = torch.hub.load_state_dict_from_url(
+            url="https://dl.fbaipublicfiles.com/deit/deit_base_patch16_224-b5f2ef4d.pth",
+            map_location="cpu",
+            check_hash=True,
+        )
+        model.load_state_dict(checkpoint["model"])
+    return model

Transformer-Explainability/baselines/ViT/ViT_explanation_generator.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import argparse
+import numpy as np
+import torch
+from numpy import *
+# compute rollout between attention layers
+def compute_rollout_attention(all_layer_matrices, start_layer=0):
+    # adding residual consideration- code adapted from https://github.com/samiraabnar/attention_flow
+    num_tokens = all_layer_matrices[0].shape[1]
+    batch_size = all_layer_matrices[0].shape[0]
+    eye = (
+        torch.eye(num_tokens)
+        .expand(batch_size, num_tokens, num_tokens)
+        .to(all_layer_matrices[0].device)
+    )
+    all_layer_matrices = [
+        all_layer_matrices[i] + eye for i in range(len(all_layer_matrices))
+    ]
+    matrices_aug = [
+        all_layer_matrices[i] / all_layer_matrices[i].sum(dim=-1, keepdim=True)
+        for i in range(len(all_layer_matrices))
+    ]
+    joint_attention = matrices_aug[start_layer]
+    for i in range(start_layer + 1, len(matrices_aug)):
+        joint_attention = matrices_aug[i].bmm(joint_attention)
+    return joint_attention
+class LRP:
+    def __init__(self, model):
+        self.model = model
+        self.model.eval()
+    def generate_LRP(
+        self,
+        input,
+        index=None,
+        method="transformer_attribution",
+        is_ablation=False,
+        start_layer=0,
+    ):
+        output = self.model(input)
+        kwargs = {"alpha": 1}
+        if index == None:
+            index = np.argmax(output.cpu().data.numpy(), axis=-1)
+        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
+        one_hot[0, index] = 1
+        one_hot_vector = one_hot
+        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
+        one_hot = torch.sum(one_hot.cuda() * output)
+        self.model.zero_grad()
+        one_hot.backward(retain_graph=True)
+        return self.model.relprop(
+            torch.tensor(one_hot_vector).to(input.device),
+            method=method,
+            is_ablation=is_ablation,
+            start_layer=start_layer,
+            **kwargs
+        )
+class Baselines:
+    def __init__(self, model):
+        self.model = model
+        self.model.eval()
+    def generate_cam_attn(self, input, index=None):
+        output = self.model(input.cuda(), register_hook=True)
+        if index == None:
+            index = np.argmax(output.cpu().data.numpy())
+        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
+        one_hot[0][index] = 1
+        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
+        one_hot = torch.sum(one_hot.cuda() * output)
+        self.model.zero_grad()
+        one_hot.backward(retain_graph=True)
+        #################### attn
+        grad = self.model.blocks[-1].attn.get_attn_gradients()
+        cam = self.model.blocks[-1].attn.get_attention_map()
+        cam = cam[0, :, 0, 1:].reshape(-1, 14, 14)
+        grad = grad[0, :, 0, 1:].reshape(-1, 14, 14)
+        grad = grad.mean(dim=[1, 2], keepdim=True)
+        cam = (cam * grad).mean(0).clamp(min=0)
+        cam = (cam - cam.min()) / (cam.max() - cam.min())
+        return cam
+        #################### attn
+    def generate_rollout(self, input, start_layer=0):
+        self.model(input)
+        blocks = self.model.blocks
+        all_layer_attentions = []
+        for blk in blocks:
+            attn_heads = blk.attn.get_attention_map()
+            avg_heads = (attn_heads.sum(dim=1) / attn_heads.shape[1]).detach()
+            all_layer_attentions.append(avg_heads)
+        rollout = compute_rollout_attention(
+            all_layer_attentions, start_layer=start_layer
+        )
+        return rollout[:, 0, 1:]

Transformer-Explainability/baselines/ViT/ViT_new.py ADDED Viewed

	@@ -0,0 +1,329 @@

+""" Vision Transformer (ViT) in PyTorch
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+from functools import partial
+import torch
+import torch.nn as nn
+from baselines.ViT.helpers import load_pretrained
+from baselines.ViT.layer_helpers import to_2tuple
+from baselines.ViT.weight_init import trunc_normal_
+from einops import rearrange
+def _cfg(url="", **kwargs):
+    return {
+        "url": url,
+        "num_classes": 1000,
+        "input_size": (3, 224, 224),
+        "pool_size": None,
+        "crop_pct": 0.9,
+        "interpolation": "bicubic",
+        "first_conv": "patch_embed.proj",
+        "classifier": "head",
+        **kwargs,
+    }
+default_cfgs = {
+    # patch models
+    "vit_small_patch16_224": _cfg(
+        url="https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/vit_small_p16_224-15ec54c9.pth",
+    ),
+    "vit_base_patch16_224": _cfg(
+        url="https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth",
+        mean=(0.5, 0.5, 0.5),
+        std=(0.5, 0.5, 0.5),
+    ),
+    "vit_large_patch16_224": _cfg(
+        url="https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_224-4ee7a4dc.pth",
+        mean=(0.5, 0.5, 0.5),
+        std=(0.5, 0.5, 0.5),
+    ),
+}
+class Mlp(nn.Module):
+    def __init__(
+        self,
+        in_features,
+        hidden_features=None,
+        out_features=None,
+        act_layer=nn.GELU,
+        drop=0.0,
+    ):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0.0, proj_drop=0.0):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
+        self.scale = head_dim**-0.5
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.attn_gradients = None
+        self.attention_map = None
+    def save_attn_gradients(self, attn_gradients):
+        self.attn_gradients = attn_gradients
+    def get_attn_gradients(self):
+        return self.attn_gradients
+    def save_attention_map(self, attention_map):
+        self.attention_map = attention_map
+    def get_attention_map(self):
+        return self.attention_map
+    def forward(self, x, register_hook=False):
+        b, n, _, h = *x.shape, self.num_heads
+        # self.save_output(x)
+        # x.register_hook(self.save_output_grad)
+        qkv = self.qkv(x)
+        q, k, v = rearrange(qkv, "b n (qkv h d) -> qkv b h n d", qkv=3, h=h)
+        dots = torch.einsum("bhid,bhjd->bhij", q, k) * self.scale
+        attn = dots.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        out = torch.einsum("bhij,bhjd->bhid", attn, v)
+        self.save_attention_map(attn)
+        if register_hook:
+            attn.register_hook(self.save_attn_gradients)
+        out = rearrange(out, "b h n d -> b n (h d)")
+        out = self.proj(out)
+        out = self.proj_drop(out)
+        return out
+class Block(nn.Module):
+    def __init__(
+        self,
+        dim,
+        num_heads,
+        mlp_ratio=4.0,
+        qkv_bias=False,
+        drop=0.0,
+        attn_drop=0.0,
+        act_layer=nn.GELU,
+        norm_layer=nn.LayerNorm,
+    ):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            attn_drop=attn_drop,
+            proj_drop=drop,
+        )
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(
+            in_features=dim,
+            hidden_features=mlp_hidden_dim,
+            act_layer=act_layer,
+            drop=drop,
+        )
+    def forward(self, x, register_hook=False):
+        x = x + self.attn(self.norm1(x), register_hook=register_hook)
+        x = x + self.mlp(self.norm2(x))
+        return x
+class PatchEmbed(nn.Module):
+    """Image to Patch Embedding"""
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.num_patches = num_patches
+        self.proj = nn.Conv2d(
+            in_chans, embed_dim, kernel_size=patch_size, stride=patch_size
+        )
+    def forward(self, x):
+        B, C, H, W = x.shape
+        # FIXME look at relaxing size constraints
+        assert (
+            H == self.img_size[0] and W == self.img_size[1]
+        ), f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        return x
+class VisionTransformer(nn.Module):
+    """Vision Transformer"""
+    def __init__(
+        self,
+        img_size=224,
+        patch_size=16,
+        in_chans=3,
+        num_classes=1000,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4.0,
+        qkv_bias=False,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        norm_layer=nn.LayerNorm,
+    ):
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_features = (
+            self.embed_dim
+        ) = embed_dim  # num_features for consistency with other models
+        self.patch_embed = PatchEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=in_chans,
+            embed_dim=embed_dim,
+        )
+        num_patches = self.patch_embed.num_patches
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        self.blocks = nn.ModuleList(
+            [
+                Block(
+                    dim=embed_dim,
+                    num_heads=num_heads,
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    drop=drop_rate,
+                    attn_drop=attn_drop_rate,
+                    norm_layer=norm_layer,
+                )
+                for i in range(depth)
+            ]
+        )
+        self.norm = norm_layer(embed_dim)
+        # Classifier head
+        self.head = (
+            nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+        )
+        trunc_normal_(self.pos_embed, std=0.02)
+        trunc_normal_(self.cls_token, std=0.02)
+        self.apply(self._init_weights)
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {"pos_embed", "cls_token"}
+    def forward(self, x, register_hook=False):
+        B = x.shape[0]
+        x = self.patch_embed(x)
+        cls_tokens = self.cls_token.expand(
+            B, -1, -1
+        )  # stole cls_tokens impl from Phil Wang, thanks
+        x = torch.cat((cls_tokens, x), dim=1)
+        x = x + self.pos_embed
+        x = self.pos_drop(x)
+        for blk in self.blocks:
+            x = blk(x, register_hook=register_hook)
+        x = self.norm(x)
+        x = x[:, 0]
+        x = self.head(x)
+        return x
+def _conv_filter(state_dict, patch_size=16):
+    """convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    for k, v in state_dict.items():
+        if "patch_embed.proj.weight" in k:
+            v = v.reshape((v.shape[0], 3, patch_size, patch_size))
+        out_dict[k] = v
+    return out_dict
+def vit_base_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4,
+        qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6),
+        **kwargs,
+    )
+    model.default_cfg = default_cfgs["vit_base_patch16_224"]
+    if pretrained:
+        load_pretrained(
+            model,
+            num_classes=model.num_classes,
+            in_chans=kwargs.get("in_chans", 3),
+            filter_fn=_conv_filter,
+        )
+    return model
+def vit_large_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16,
+        embed_dim=1024,
+        depth=24,
+        num_heads=16,
+        mlp_ratio=4,
+        qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6),
+        **kwargs,
+    )
+    model.default_cfg = default_cfgs["vit_large_patch16_224"]
+    if pretrained:
+        load_pretrained(
+            model, num_classes=model.num_classes, in_chans=kwargs.get("in_chans", 3)
+        )
+    return model

Transformer-Explainability/baselines/ViT/ViT_orig_LRP.py ADDED Viewed

	@@ -0,0 +1,508 @@

+""" Vision Transformer (ViT) in PyTorch
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+import torch
+import torch.nn as nn
+from baselines.ViT.helpers import load_pretrained
+from baselines.ViT.layer_helpers import to_2tuple
+from baselines.ViT.weight_init import trunc_normal_
+from einops import rearrange
+from modules.layers_lrp import *
+def _cfg(url="", **kwargs):
+    return {
+        "url": url,
+        "num_classes": 1000,
+        "input_size": (3, 224, 224),
+        "pool_size": None,
+        "crop_pct": 0.9,
+        "interpolation": "bicubic",
+        "first_conv": "patch_embed.proj",
+        "classifier": "head",
+        **kwargs,
+    }
+default_cfgs = {
+    # patch models
+    "vit_small_patch16_224": _cfg(
+        url="https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/vit_small_p16_224-15ec54c9.pth",
+    ),
+    "vit_base_patch16_224": _cfg(
+        url="https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_base_p16_224-80ecf9dd.pth",
+        mean=(0.5, 0.5, 0.5),
+        std=(0.5, 0.5, 0.5),
+    ),
+    "vit_large_patch16_224": _cfg(
+        url="https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-vitjx/jx_vit_large_p16_224-4ee7a4dc.pth",
+        mean=(0.5, 0.5, 0.5),
+        std=(0.5, 0.5, 0.5),
+    ),
+}
+def compute_rollout_attention(all_layer_matrices, start_layer=0):
+    # adding residual consideration
+    num_tokens = all_layer_matrices[0].shape[1]
+    batch_size = all_layer_matrices[0].shape[0]
+    eye = (
+        torch.eye(num_tokens)
+        .expand(batch_size, num_tokens, num_tokens)
+        .to(all_layer_matrices[0].device)
+    )
+    all_layer_matrices = [
+        all_layer_matrices[i] + eye for i in range(len(all_layer_matrices))
+    ]
+    # all_layer_matrices = [all_layer_matrices[i] / all_layer_matrices[i].sum(dim=-1, keepdim=True)
+    #                       for i in range(len(all_layer_matrices))]
+    joint_attention = all_layer_matrices[start_layer]
+    for i in range(start_layer + 1, len(all_layer_matrices)):
+        joint_attention = all_layer_matrices[i].bmm(joint_attention)
+    return joint_attention
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, drop=0.0):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = Linear(in_features, hidden_features)
+        self.act = GELU()
+        self.fc2 = Linear(hidden_features, out_features)
+        self.drop = Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+    def relprop(self, cam, **kwargs):
+        cam = self.drop.relprop(cam, **kwargs)
+        cam = self.fc2.relprop(cam, **kwargs)
+        cam = self.act.relprop(cam, **kwargs)
+        cam = self.fc1.relprop(cam, **kwargs)
+        return cam
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0.0, proj_drop=0.0):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
+        self.scale = head_dim**-0.5
+        # A = Q*K^T
+        self.matmul1 = einsum("bhid,bhjd->bhij")
+        # attn = A*V
+        self.matmul2 = einsum("bhij,bhjd->bhid")
+        self.qkv = Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = Dropout(attn_drop)
+        self.proj = Linear(dim, dim)
+        self.proj_drop = Dropout(proj_drop)
+        self.softmax = Softmax(dim=-1)
+        self.attn_cam = None
+        self.attn = None
+        self.v = None
+        self.v_cam = None
+        self.attn_gradients = None
+    def get_attn(self):
+        return self.attn
+    def save_attn(self, attn):
+        self.attn = attn
+    def save_attn_cam(self, cam):
+        self.attn_cam = cam
+    def get_attn_cam(self):
+        return self.attn_cam
+    def get_v(self):
+        return self.v
+    def save_v(self, v):
+        self.v = v
+    def save_v_cam(self, cam):
+        self.v_cam = cam
+    def get_v_cam(self):
+        return self.v_cam
+    def save_attn_gradients(self, attn_gradients):
+        self.attn_gradients = attn_gradients
+    def get_attn_gradients(self):
+        return self.attn_gradients
+    def forward(self, x):
+        b, n, _, h = *x.shape, self.num_heads
+        qkv = self.qkv(x)
+        q, k, v = rearrange(qkv, "b n (qkv h d) -> qkv b h n d", qkv=3, h=h)
+        self.save_v(v)
+        dots = self.matmul1([q, k]) * self.scale
+        attn = self.softmax(dots)
+        attn = self.attn_drop(attn)
+        self.save_attn(attn)
+        attn.register_hook(self.save_attn_gradients)
+        out = self.matmul2([attn, v])
+        out = rearrange(out, "b h n d -> b n (h d)")
+        out = self.proj(out)
+        out = self.proj_drop(out)
+        return out
+    def relprop(self, cam, **kwargs):
+        cam = self.proj_drop.relprop(cam, **kwargs)
+        cam = self.proj.relprop(cam, **kwargs)
+        cam = rearrange(cam, "b n (h d) -> b h n d", h=self.num_heads)
+        # attn = A*V
+        (cam1, cam_v) = self.matmul2.relprop(cam, **kwargs)
+        cam1 /= 2
+        cam_v /= 2
+        self.save_v_cam(cam_v)
+        self.save_attn_cam(cam1)
+        cam1 = self.attn_drop.relprop(cam1, **kwargs)
+        cam1 = self.softmax.relprop(cam1, **kwargs)
+        # A = Q*K^T
+        (cam_q, cam_k) = self.matmul1.relprop(cam1, **kwargs)
+        cam_q /= 2
+        cam_k /= 2
+        cam_qkv = rearrange(
+            [cam_q, cam_k, cam_v],
+            "qkv b h n d -> b n (qkv h d)",
+            qkv=3,
+            h=self.num_heads,
+        )
+        return self.qkv.relprop(cam_qkv, **kwargs)
+class Block(nn.Module):
+    def __init__(
+        self, dim, num_heads, mlp_ratio=4.0, qkv_bias=False, drop=0.0, attn_drop=0.0
+    ):
+        super().__init__()
+        self.norm1 = LayerNorm(dim, eps=1e-6)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            attn_drop=attn_drop,
+            proj_drop=drop,
+        )
+        self.norm2 = LayerNorm(dim, eps=1e-6)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, drop=drop)
+        self.add1 = Add()
+        self.add2 = Add()
+        self.clone1 = Clone()
+        self.clone2 = Clone()
+    def forward(self, x):
+        x1, x2 = self.clone1(x, 2)
+        x = self.add1([x1, self.attn(self.norm1(x2))])
+        x1, x2 = self.clone2(x, 2)
+        x = self.add2([x1, self.mlp(self.norm2(x2))])
+        return x
+    def relprop(self, cam, **kwargs):
+        (cam1, cam2) = self.add2.relprop(cam, **kwargs)
+        cam2 = self.mlp.relprop(cam2, **kwargs)
+        cam2 = self.norm2.relprop(cam2, **kwargs)
+        cam = self.clone2.relprop((cam1, cam2), **kwargs)
+        (cam1, cam2) = self.add1.relprop(cam, **kwargs)
+        cam2 = self.attn.relprop(cam2, **kwargs)
+        cam2 = self.norm1.relprop(cam2, **kwargs)
+        cam = self.clone1.relprop((cam1, cam2), **kwargs)
+        return cam
+class PatchEmbed(nn.Module):
+    """Image to Patch Embedding"""
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.num_patches = num_patches
+        self.proj = Conv2d(
+            in_chans, embed_dim, kernel_size=patch_size, stride=patch_size
+        )
+    def forward(self, x):
+        B, C, H, W = x.shape
+        # FIXME look at relaxing size constraints
+        assert (
+            H == self.img_size[0] and W == self.img_size[1]
+        ), f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        return x
+    def relprop(self, cam, **kwargs):
+        cam = cam.transpose(1, 2)
+        cam = cam.reshape(
+            cam.shape[0],
+            cam.shape[1],
+            (self.img_size[0] // self.patch_size[0]),
+            (self.img_size[1] // self.patch_size[1]),
+        )
+        return self.proj.relprop(cam, **kwargs)
+class VisionTransformer(nn.Module):
+    """Vision Transformer with support for patch or hybrid CNN input stage"""
+    def __init__(
+        self,
+        img_size=224,
+        patch_size=16,
+        in_chans=3,
+        num_classes=1000,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4.0,
+        qkv_bias=False,
+        mlp_head=False,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+    ):
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_features = (
+            self.embed_dim
+        ) = embed_dim  # num_features for consistency with other models
+        self.patch_embed = PatchEmbed(
+            img_size=img_size,
+            patch_size=patch_size,
+            in_chans=in_chans,
+            embed_dim=embed_dim,
+        )
+        num_patches = self.patch_embed.num_patches
+        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        self.blocks = nn.ModuleList(
+            [
+                Block(
+                    dim=embed_dim,
+                    num_heads=num_heads,
+                    mlp_ratio=mlp_ratio,
+                    qkv_bias=qkv_bias,
+                    drop=drop_rate,
+                    attn_drop=attn_drop_rate,
+                )
+                for i in range(depth)
+            ]
+        )
+        self.norm = LayerNorm(embed_dim)
+        if mlp_head:
+            # paper diagram suggests 'MLP head', but results in 4M extra parameters vs paper
+            self.head = Mlp(embed_dim, int(embed_dim * mlp_ratio), num_classes)
+        else:
+            # with a single Linear layer as head, the param count within rounding of paper
+            self.head = Linear(embed_dim, num_classes)
+        # FIXME not quite sure what the proper weight init is supposed to be,
+        # normal / trunc normal w/ std == .02 similar to other Bert like transformers
+        trunc_normal_(self.pos_embed, std=0.02)  # embeddings same as weights?
+        trunc_normal_(self.cls_token, std=0.02)
+        self.apply(self._init_weights)
+        self.pool = IndexSelect()
+        self.add = Add()
+        self.inp_grad = None
+    def save_inp_grad(self, grad):
+        self.inp_grad = grad
+    def get_inp_grad(self):
+        return self.inp_grad
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+    @property
+    def no_weight_decay(self):
+        return {"pos_embed", "cls_token"}
+    def forward(self, x):
+        B = x.shape[0]
+        x = self.patch_embed(x)
+        cls_tokens = self.cls_token.expand(
+            B, -1, -1
+        )  # stole cls_tokens impl from Phil Wang, thanks
+        x = torch.cat((cls_tokens, x), dim=1)
+        x = self.add([x, self.pos_embed])
+        x.register_hook(self.save_inp_grad)
+        for blk in self.blocks:
+            x = blk(x)
+        x = self.norm(x)
+        x = self.pool(x, dim=1, indices=torch.tensor(0, device=x.device))
+        x = x.squeeze(1)
+        x = self.head(x)
+        return x
+    def relprop(
+        self, cam=None, method="grad", is_ablation=False, start_layer=0, **kwargs
+    ):
+        # print(kwargs)
+        # print("conservation 1", cam.sum())
+        cam = self.head.relprop(cam, **kwargs)
+        cam = cam.unsqueeze(1)
+        cam = self.pool.relprop(cam, **kwargs)
+        cam = self.norm.relprop(cam, **kwargs)
+        for blk in reversed(self.blocks):
+            cam = blk.relprop(cam, **kwargs)
+        # print("conservation 2", cam.sum())
+        # print("min", cam.min())
+        if method == "full":
+            (cam, _) = self.add.relprop(cam, **kwargs)
+            cam = cam[:, 1:]
+            cam = self.patch_embed.relprop(cam, **kwargs)
+            # sum on channels
+            cam = cam.sum(dim=1)
+            return cam
+        elif method == "rollout":
+            # cam rollout
+            attn_cams = []
+            for blk in self.blocks:
+                attn_heads = blk.attn.get_attn_cam().clamp(min=0)
+                avg_heads = (attn_heads.sum(dim=1) / attn_heads.shape[1]).detach()
+                attn_cams.append(avg_heads)
+            cam = compute_rollout_attention(attn_cams, start_layer=start_layer)
+            cam = cam[:, 0, 1:]
+            return cam
+        elif method == "grad":
+            cams = []
+            for blk in self.blocks:
+                grad = blk.attn.get_attn_gradients()
+                cam = blk.attn.get_attn_cam()
+                cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+                cam = cam.clamp(min=0).mean(dim=0)
+                cams.append(cam.unsqueeze(0))
+            rollout = compute_rollout_attention(cams, start_layer=start_layer)
+            cam = rollout[:, 0, 1:]
+            return cam
+        elif method == "last_layer":
+            cam = self.blocks[-1].attn.get_attn_cam()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            if is_ablation:
+                grad = self.blocks[-1].attn.get_attn_gradients()
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+        elif method == "last_layer_attn":
+            cam = self.blocks[-1].attn.get_attn()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+        elif method == "second_layer":
+            cam = self.blocks[1].attn.get_attn_cam()
+            cam = cam[0].reshape(-1, cam.shape[-1], cam.shape[-1])
+            if is_ablation:
+                grad = self.blocks[1].attn.get_attn_gradients()
+                grad = grad[0].reshape(-1, grad.shape[-1], grad.shape[-1])
+                cam = grad * cam
+            cam = cam.clamp(min=0).mean(dim=0)
+            cam = cam[0, 1:]
+            return cam
+def _conv_filter(state_dict, patch_size=16):
+    """convert patch embedding weight from manual patchify + linear proj to conv"""
+    out_dict = {}
+    for k, v in state_dict.items():
+        if "patch_embed.proj.weight" in k:
+            v = v.reshape((v.shape[0], 3, patch_size, patch_size))
+        out_dict[k] = v
+    return out_dict
+def vit_base_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4,
+        qkv_bias=True,
+        **kwargs,
+    )
+    model.default_cfg = default_cfgs["vit_base_patch16_224"]
+    if pretrained:
+        load_pretrained(
+            model,
+            num_classes=model.num_classes,
+            in_chans=kwargs.get("in_chans", 3),
+            filter_fn=_conv_filter,
+        )
+    return model
+def vit_large_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16,
+        embed_dim=1024,
+        depth=24,
+        num_heads=16,
+        mlp_ratio=4,
+        qkv_bias=True,
+        **kwargs,
+    )
+    model.default_cfg = default_cfgs["vit_large_patch16_224"]
+    if pretrained:
+        load_pretrained(
+            model, num_classes=model.num_classes, in_chans=kwargs.get("in_chans", 3)
+        )
+    return model