Spaces:

markytools
/

strexp

Build error

App Files Files Community

markytools commited on Jun 3, 2023

Commit

7978529

1 Parent(s): d61b9c7

updated app py

Browse files

Files changed (8) hide show

.gitignore +5 -0
app.py +177 -2
model.py +5 -0
requirements.txt +175 -0
settings.py +1 -1
str_exp_demo.py +2 -2
str_exp_demo_huggingface.py +513 -0
utils.py +6 -7

.gitignore CHANGED Viewed

@@ -21,12 +21,17 @@
 *.sh
 **/__pycache__
 workdir/
 .remote-sync.json
 *.png
 pretrained/
 attributionImgs/
 attributionImgsOld/
 attrSelectivityOld/
 ### Linux ###
 *~

 *.sh
 **/__pycache__
 workdir/
+datasets/
 .remote-sync.json
 *.png
+demo_image_output/
 pretrained/
+attributionData/
 attributionImgs/
 attributionImgsOld/
 attrSelectivityOld/
+pretrained.zip
+datasets.zip
 ### Linux ###
 *~

app.py CHANGED Viewed

@@ -1,4 +1,179 @@
 import streamlit as st
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)

 import streamlit as st
+from PIL import Image
+import settings
+import captum
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torch.backends.cudnn as cudnn
+from utils import get_args
+from utils import CTCLabelConverter, AttnLabelConverter, Averager, TokenLabelConverter
+import string
+import time
+import sys
+from dataset import hierarchical_dataset, AlignCollate
+import validators
+from model import Model, STRScore
+from PIL import Image
+from lime.wrappers.scikit_image import SegmentationAlgorithm
+from captum._utils.models.linear_model import SkLearnLinearModel, SkLearnRidge
+import random
+import os
+from skimage.color import gray2rgb
+import pickle
+from train_shap_corr import getPredAndConf
+import re
+from captum_test import acquire_average_auc, saveAttrData
+import copy
+from skimage.color import gray2rgb
+from matplotlib import pyplot as plt
+from torchvision import transforms
+device = torch.device('cpu')
+opt = get_args(is_train=False)
+""" vocab / character number configuration """
+if opt.sensitive:
+    opt.character = string.printable[:-6]  # same with ASTER setting (use 94 char).
+cudnn.benchmark = True
+cudnn.deterministic = True
+# opt.num_gpu = torch.cuda.device_count()
+# combineBestDataXAI(opt)
+# acquire_average_auc(opt)
+# acquireSingleCharAttrAve(opt)
+modelName = "parseq"
+opt.modelName = modelName
+# opt.eval_data = "datasets/data_lmdb_release/evaluation"
+if modelName=="vitstr":
+    opt.benchmark_all_eval = True
+    opt.Transformation = "None"
+    opt.FeatureExtraction = "None"
+    opt.SequenceModeling = "None"
+    opt.Prediction = "None"
+    opt.Transformer = True
+    opt.sensitive = True
+    opt.imgH = 224
+    opt.imgW = 224
+    opt.data_filtering_off = True
+    opt.TransformerModel= "vitstr_base_patch16_224"
+    opt.saved_model = "pretrained/vitstr_base_patch16_224_aug.pth"
+    opt.batch_size = 1
+    opt.workers = 0
+    opt.scorer = "mean"
+    opt.blackbg = True
+elif modelName=="parseq":
+    opt.benchmark_all_eval = True
+    opt.Transformation = "None"
+    opt.FeatureExtraction = "None"
+    opt.SequenceModeling = "None"
+    opt.Prediction = "None"
+    opt.Transformer = True
+    opt.sensitive = True
+    opt.imgH = 32
+    opt.imgW = 128
+    opt.data_filtering_off = True
+    opt.batch_size = 1
+    opt.workers = 0
+    opt.scorer = "mean"
+    opt.blackbg = True
+# x = st.slider('Select a value')
+# st.write(x, 'squared is', x * x)
+image = Image.open('demo_image/demo_ballys.jpg') #Brand logo image (optional)
+#Create two columns with different width
+col1, col2 = st.columns( [0.8, 0.2])
+with col1:               # To display the header text using css style
+    st.markdown(""" <style> .font {
+    font-size:35px ; font-family: 'Cooper Black'; color: #FF9633;}
+    </style> """, unsafe_allow_html=True)
+    st.markdown('<p class="font">Upload your photo here...</p>', unsafe_allow_html=True)
+with col2:               # To display brand logo
+    st.image(image,  width=150)
+uploaded_file = st.file_uploader("Choose a file", type=["png", "jpg"])
+if uploaded_file is not None:
+    # To read file as bytes:
+    bytes_data = uploaded_file.getvalue()
+    pilImg = Image.open(uploaded_file)
+    orig_img_tensors = transforms.ToTensor()(pilImg).unsqueeze(0)
+    img1 = orig_img_tensors.to(device)
+    # image_tensors = ((torch.clone(orig_img_tensors) + 1.0) / 2.0) * 255.0
+    image_tensors = torch.mean(orig_img_tensors, dim=1).unsqueeze(0).unsqueeze(0)
+    imgDataDict = {}
+    img_numpy = image_tensors.cpu().detach().numpy()[0] ### Need to set batch size to 1 only
+    if img_numpy.shape[0] == 1:
+        img_numpy = gray2rgb(img_numpy[0])
+    # print("img_numpy shape: ", img_numpy.shape) # (1, 32, 128, 3)
+    segmOutput = segmentation_fn(img_numpy[0])
+    results_dict = {}
+    aveAttr = []
+    aveAttr_charContrib = []
+    target = converter.encode([labels])
+    # labels: RONALDO
+    segmDataNP = segmOutput
+    img1.requires_grad = True
+    bgImg = torch.zeros(img1.shape).to(device)
+    # preds = model(img1, seqlen=converter.batch_max_length)
+    input = img1
+    origImgNP = torch.clone(orig_img_tensors).detach().cpu().numpy()[0][0] # (1, 1, 224, 224)
+    origImgNP = gray2rgb(origImgNP)
+    charOffset = 0
+    img1 = transforms.Normalize(0.5, 0.5)(img1) # Between -1 to 1
+    target = converter.encode([labels])
+    ### Local explanations only
+    collectedAttributions = []
+    for charIdx in range(0, len(labels)):
+        scoring_singlechar.setSingleCharOutput(charIdx + charOffset)
+        gtClassNum = target[0][charIdx + charOffset]
+        gs = GradientShap(super_pixel_model_singlechar)
+        baseline_dist = torch.zeros((1, 3, opt.imgH, opt.imgW))
+        baseline_dist = baseline_dist.to(device)
+        attributions = gs.attribute(input, baselines=baseline_dist, target=0)
+        collectedAttributions.append(attributions)
+    aveAttributions = torch.mean(torch.cat(collectedAttributions,dim=0), dim=0).unsqueeze(0)
+    # if not torch.isnan(aveAttributions).any():
+    #     rankedAttr = rankedAttributionsBySegm(aveAttributions, segmDataNP)
+    #     rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
+    #     rankedAttr = gray2rgb(rankedAttr)
+    #     mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map', cmap='RdYlGn')
+    #     mplotfig.savefig(outputDir + '{}_shapley_l.png'.format(nameNoExt))
+    #     mplotfig.clear()
+    #     plt.close(mplotfig)
+    ### Local Sampling
+    gs = GradientShap(super_pixel_model)
+    baseline_dist = torch.zeros((1, 3, opt.imgH, opt.imgW))
+    baseline_dist = baseline_dist.to(device)
+    attributions = gs.attribute(input, baselines=baseline_dist, target=0)
+    # if not torch.isnan(attributions).any():
+    #     collectedAttributions.append(attributions)
+    #     rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP)
+    #     rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
+    #     rankedAttr = gray2rgb(rankedAttr)
+    #     mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map', cmap='RdYlGn')
+    #     mplotfig.savefig(outputDir + '{}_shapley.png'.format(nameNoExt))
+    #     mplotfig.clear()
+    #     plt.close(mplotfig)
+    ### Global + Local context
+    aveAttributions = torch.mean(torch.cat(collectedAttributions,dim=0), dim=0).unsqueeze(0)
+    if not torch.isnan(aveAttributions).any():
+        rankedAttr = rankedAttributionsBySegm(aveAttributions, segmDataNP)
+        rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
+        rankedAttr = gray2rgb(rankedAttr)
+        mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map', cmap='RdYlGn')
+        fig = mplotfig.figure(figsize=(8,8))
+        st.pyplot(fig)
+        # mplotfig.savefig(outputDir + '{}_shapley_gl.png'.format(nameNoExt))
+        # mplotfig.clear()
+        # plt.close(mplotfig)

model.py CHANGED Viewed

@@ -33,8 +33,11 @@ import settings
 class STRScore(nn.Module):
     def __init__(self, opt, converter, device, gtStr="", enableSingleCharAttrAve=False, model=None):
         super(STRScore, self).__init__()
         self.enableSingleCharAttrAve = enableSingleCharAttrAve
         self.singleChar = -1
         self.opt = opt
         self.converter = converter
         self.device = device
@@ -75,6 +78,8 @@ class STRScore(nn.Module):
                 preds_str = self.converter.decode(preds_index[:, 1:], length_for_pred)
             elif settings.MODEL == 'parseq':
                 preds_str, confidence = self.model.tokenizer.decode(preds)
             # print("preds_str: ", preds_str)
         else:
             preds = preds[:, :text_for_loss_length, :]

 class STRScore(nn.Module):
     def __init__(self, opt, converter, device, gtStr="", enableSingleCharAttrAve=False, model=None):
         super(STRScore, self).__init__()
+        if opt.modelName:
+            settings.MODEL = opt.modelName
         self.enableSingleCharAttrAve = enableSingleCharAttrAve
         self.singleChar = -1
+        self.recentlyPredStr = None
         self.opt = opt
         self.converter = converter
         self.device = device
                 preds_str = self.converter.decode(preds_index[:, 1:], length_for_pred)
             elif settings.MODEL == 'parseq':
                 preds_str, confidence = self.model.tokenizer.decode(preds)
+                self.recentlyPredStr = preds_str[-1]
+                # print("preds_str: ", preds_str)
             # print("preds_str: ", preds_str)
         else:
             preds = preds[:, :text_for_loss_length, :]

requirements.txt ADDED Viewed

	@@ -0,0 +1,175 @@

+absl-py==1.2.0
+aiohttp==3.8.1
+aiosignal==1.2.0
+anyio==3.5.0
+argon2-cffi==21.3.0
+argon2-cffi-bindings==21.2.0
+asttokens==2.0.5
+async-timeout==4.0.2
+attrs==21.4.0
+Babel==2.9.1
+backcall==0.2.0
+beautifulsoup4==4.11.1
+bleach==4.1.0
+blinker==1.4
+Bottleneck==1.3.5
+brotlipy==0.7.0
+cachetools==5.2.0
+certifi==2022.6.15
+cffi==1.15.0
+charset-normalizer==2.0.4
+click==8.1.3
+cloudpickle==2.0.0
+colorama==0.4.5
+cryptography==37.0.1
+cycler==0.11.0
+cytoolz==0.11.0
+dask==2022.7.0
+debugpy==1.5.1
+decorator==5.1.1
+defusedxml==0.7.1
+einops==0.4.1
+entrypoints==0.4
+executing==0.8.3
+fastjsonschema==2.15.1
+fonttools==4.25.0
+frozenlist==1.3.1
+fsspec==2022.3.0
+future==0.18.2
+google-auth==2.11.0
+google-auth-oauthlib==0.4.6
+grpcio==1.48.1
+idna==3.3
+imageio==2.19.3
+importlib-metadata==4.11.4
+importlib-resources==5.2.0
+ipykernel==6.9.1
+ipython==8.4.0
+ipython-genutils==0.2.0
+ipywidgets==7.6.5
+jedi==0.18.1
+Jinja2==3.0.3
+joblib==1.1.0
+json5==0.9.6
+jsonschema==4.4.0
+jupyter==1.0.0
+jupyter-client==7.2.2
+jupyter-console==6.4.3
+jupyter-core==4.10.0
+jupyter-server==1.18.1
+jupyterlab==3.4.4
+jupyterlab-pygments==0.1.2
+jupyterlab-server==2.12.0
+jupyterlab-widgets==1.0.0
+kiwisolver==1.4.2
+llvmlite==0.38.1
+lmdb==1.3.0
+locket==1.0.0
+Markdown==3.4.1
+MarkupSafe==2.1.1
+matplotlib==3.5.1
+matplotlib-inline==0.1.2
+mistune==0.8.4
+mkl-fft==1.3.1
+mkl-random==1.2.2
+mkl-service==2.4.0
+multidict==6.0.2
+munkres==1.1.4
+natsort==8.1.0
+nb-conda-kernels==2.3.1
+nbclassic==0.3.5
+nbclient==0.5.13
+nbconvert==6.4.4
+nbformat==5.3.0
+nest-asyncio==1.5.5
+networkx==2.8.4
+nltk==3.6.7
+notebook==6.4.12
+numba==0.55.2
+numexpr==2.8.3
+numpy==1.22.3
+oauthlib==3.2.0
+packaging==21.3
+pandas==1.4.3
+pandocfilters==1.5.0
+parso==0.8.3
+partd==1.2.0
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow==9.2.0
+pip==22.1.2
+ply==3.11
+prometheus-client==0.13.1
+prompt-toolkit==3.0.20
+protobuf==4.21.5
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyasn1==0.4.8
+pyasn1-modules==0.2.7
+pycparser==2.21
+pyDeprecate==0.3.2
+Pygments==2.11.2
+PyJWT==2.4.0
+pyOpenSSL==22.0.0
+pyparsing==3.0.4
+PyQt5==5.12.3
+PyQt5-sip==12.11.0
+PyQtChart==5.12
+PyQtWebEngine==5.12.1
+pyrsistent==0.18.0
+PySocks==1.7.1
+python-dateutil==2.8.2
+pytorch-lightning==1.6.3
+pytorch-wavelets==1.3.0
+pytz==2022.1
+pyu2f==0.1.5
+PyWavelets==1.3.0
+PyYAML==6.0
+pyzmq==23.2.0
+qtconsole==5.3.1
+QtPy==2.0.1
+regex==2022.7.25
+requests==2.28.1
+requests-oauthlib==1.3.1
+rsa==4.9
+scikit-image==0.19.2
+scikit-learn==1.1.1
+scipy==1.7.3
+Send2Trash==1.8.0
+setuptools==59.5.0
+sip==6.6.2
+six==1.16.0
+slicer==0.0.7
+sniffio==1.2.0
+soupsieve==2.3.1
+stack-data==0.2.0
+tensorboard==2.10.0
+tensorboard-data-server==0.6.0
+tensorboard-plugin-wit==1.8.1
+terminado==0.13.1
+testpath==0.6.0
+threadpoolctl==2.2.0
+tifffile==2020.10.1
+timm==0.6.7
+toml==0.10.2
+toolz==0.11.2
+torch==1.10.1
+torch-summary==1.4.5
+torchaudio==0.10.1
+torchmetrics==0.9.3
+torchvision==0.11.2
+tornado==6.1
+tqdm==4.64.0
+traitlets==5.1.1
+typing_extensions==4.1.1
+urllib3==1.26.11
+validators==0.18.2
+Wand==0.6.7
+wcwidth==0.2.5
+webencodings==0.5.1
+websocket-client==0.58.0
+Werkzeug==2.2.2
+wheel==0.37.1
+widgetsnbextension==3.5.2
+yarl==1.7.2
+zipp==3.8.0

settings.py CHANGED Viewed

@@ -1,4 +1,4 @@
 ######### global settings  #########
-MODEL = 'vitstr'                          # model arch: vitstr, parseq, srn, abinet, trba, matrn
 SEGM_DIR = "./datasets/segmentations" # segmentation directory of the real test sets
 TARGET_DATASET = "SVTP" # 'IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867', 'IC13_857', 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80'

 ######### global settings  #########
+MODEL = 'parseq'                          # model arch: vitstr, parseq, srn, abinet, trba, matrn
 SEGM_DIR = "./datasets/segmentations" # segmentation directory of the real test sets
 TARGET_DATASET = "SVTP" # 'IIIT5k_3000', 'SVT', 'IC03_860', 'IC03_867', 'IC13_857', 'IC13_1015', 'IC15_1811', 'IC15_2077', 'SVTP', 'CUTE80'

str_exp_demo.py CHANGED Viewed

@@ -154,7 +154,7 @@ def acquireSelectivityHit(origImg, attributions, segmentations, model, converter
             pred = pred.lower()
             gt = gt.lower()
             alphanumeric_case_insensitve = '0123456789abcdefghijklmnopqrstuvwxyz'
-            out_of_alphanumeric_case_insensitve = f'[^{alphanumeric_case_insensitve}]'
             pred = re.sub(out_of_alphanumeric_case_insensitve, '', pred)
             gt = re.sub(out_of_alphanumeric_case_insensitve, '', gt)
         if pred == gt:
@@ -189,7 +189,7 @@ def acquire_selectivity_auc(opt, pkl_filename=None):
 def sampleDemo(opt):
     targetDataset = "SVTP"
     demoImgDir = "demo_image/"
-    outputDir = "/data/goo/demo_image_output/"
     if not os.path.exists(outputDir):
         os.makedirs(outputDir)

             pred = pred.lower()
             gt = gt.lower()
             alphanumeric_case_insensitve = '0123456789abcdefghijklmnopqrstuvwxyz'
+            out_of_alphanumeric_case_insensitve = f"[^{alphanumeric_case_insensitve}]"
             pred = re.sub(out_of_alphanumeric_case_insensitve, '', pred)
             gt = re.sub(out_of_alphanumeric_case_insensitve, '', gt)
         if pred == gt:
 def sampleDemo(opt):
     targetDataset = "SVTP"
     demoImgDir = "demo_image/"
+    outputDir = "demo_image_output/"
     if not os.path.exists(outputDir):
         os.makedirs(outputDir)

str_exp_demo_huggingface.py ADDED Viewed

	@@ -0,0 +1,513 @@

+import settings
+import captum
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torch.backends.cudnn as cudnn
+from utils import get_args
+from utils import CTCLabelConverter, AttnLabelConverter, Averager, TokenLabelConverter
+import string
+import time
+import sys
+from dataset import hierarchical_dataset, AlignCollate
+import validators
+from model import Model, STRScore
+from PIL import Image
+from lime.wrappers.scikit_image import SegmentationAlgorithm
+from captum._utils.models.linear_model import SkLearnLinearModel, SkLearnRidge
+import random
+import os
+from skimage.color import gray2rgb
+import pickle
+from train_shap_corr import getPredAndConf
+import re
+from captum_test import acquire_average_auc, saveAttrData
+import copy
+from skimage.color import gray2rgb
+from matplotlib import pyplot as plt
+from torchvision import transforms
+device = torch.device('cpu')
+from captum.attr import (
+    GradientShap,
+    DeepLift,
+    DeepLiftShap,
+    IntegratedGradients,
+    LayerConductance,
+    NeuronConductance,
+    NoiseTunnel,
+    Saliency,
+    InputXGradient,
+    GuidedBackprop,
+    Deconvolution,
+    GuidedGradCam,
+    FeatureAblation,
+    ShapleyValueSampling,
+    Lime,
+    KernelShap
+)
+from captum.metrics import (
+    infidelity,
+    sensitivity_max
+)
+from captum.attr._utils.visualization import visualize_image_attr
+### Acquire pixelwise attributions and replace them with ranked numbers averaged
+### across segmentation with the largest contribution having the largest number
+### and the smallest set to 1, which is the minimum number.
+### attr - original attribution
+### segm - image segmentations
+def rankedAttributionsBySegm(attr, segm):
+    aveSegmentations, sortedDict = averageSegmentsOut(attr[0,0], segm)
+    totalSegm = len(sortedDict.keys()) # total segmentations
+    sortedKeys = [k for k, v in sorted(sortedDict.items(), key=lambda item: item[1])]
+    sortedKeys = sortedKeys[::-1] ### A list that should contain largest to smallest score
+    currentRank = totalSegm
+    rankedSegmImg = torch.clone(attr)
+    for totalSegToHide in range(0, len(sortedKeys)):
+        currentSegmentToHide = sortedKeys[totalSegToHide]
+        rankedSegmImg[0,0][segm == currentSegmentToHide] = currentRank
+        currentRank -= 1
+    return rankedSegmImg
+### Returns the mean for each segmentation having shape as the same as the input
+### This function can only one attribution image at a time
+def averageSegmentsOut(attr, segments):
+    averagedInput = torch.clone(attr)
+    sortedDict = {}
+    for x in np.unique(segments):
+        segmentMean = torch.mean(attr[segments == x][:])
+        sortedDict[x] = float(segmentMean.detach().cpu().numpy())
+        averagedInput[segments == x] = segmentMean
+    return averagedInput, sortedDict
+### Output and save segmentations only for one dataset only
+def outputSegmOnly(opt):
+    ### targetDataset - one dataset only, SVTP-645, CUTE80-288images
+    targetDataset = "CUTE80" # ['IIIT5k_3000', 'SVT', 'IC03_867', 'IC13_1015', 'IC15_2077', 'SVTP', 'CUTE80']
+    segmRootDir = "/home/uclpc1/Documents/STR/datasets/segmentations/224X224/{}/".format(targetDataset)
+    if not os.path.exists(segmRootDir):
+        os.makedirs(segmRootDir)
+    opt.eval = True
+    ### Only IIIT5k_3000
+    if opt.fast_acc:
+    # # To easily compute the total accuracy of our paper.
+        eval_data_list = [targetDataset]
+    else:
+        # The evaluation datasets, dataset order is same with Table 1 in our paper.
+        eval_data_list = [targetDataset]
+    ### Taken from LIME
+    segmentation_fn = SegmentationAlgorithm('quickshift', kernel_size=4,
+                                            max_dist=200, ratio=0.2,
+                                            random_seed=random.randint(0, 1000))
+    for eval_data in eval_data_list:
+        eval_data_path = os.path.join(opt.eval_data, eval_data)
+        AlignCollate_evaluation = AlignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio_with_pad=opt.PAD, opt=opt)
+        eval_data, eval_data_log = hierarchical_dataset(root=eval_data_path, opt=opt)
+        evaluation_loader = torch.utils.data.DataLoader(
+            eval_data, batch_size=1,
+            shuffle=False,
+            num_workers=int(opt.workers),
+            collate_fn=AlignCollate_evaluation, pin_memory=True)
+        for i, (image_tensors, labels) in enumerate(evaluation_loader):
+            imgDataDict = {}
+            img_numpy = image_tensors.cpu().detach().numpy()[0] ### Need to set batch size to 1 only
+            if img_numpy.shape[0] == 1:
+                img_numpy = gray2rgb(img_numpy[0])
+            # print("img_numpy shape: ", img_numpy.shape) # (224,224,3)
+            segmOutput = segmentation_fn(img_numpy)
+            imgDataDict['segdata'] = segmOutput
+            imgDataDict['label'] = labels[0]
+            outputPickleFile = segmRootDir + "{}.pkl".format(i)
+            with open(outputPickleFile, 'wb') as f:
+                pickle.dump(imgDataDict, f)
+def acquireSelectivityHit(origImg, attributions, segmentations, model, converter, labels, scoring):
+    # print("segmentations unique len: ", np.unique(segmentations))
+    aveSegmentations, sortedDict = averageSegmentsOut(attributions[0,0], segmentations)
+    sortedKeys = [k for k, v in sorted(sortedDict.items(), key=lambda item: item[1])]
+    sortedKeys = sortedKeys[::-1] ### A list that should contain largest to smallest score
+    # print("sortedDict: ", sortedDict) # {0: -5.51e-06, 1: -1.469e-05, 2: -3.06e-05,...}
+    # print("aveSegmentations unique len: ", np.unique(aveSegmentations))
+    # print("aveSegmentations device: ", aveSegmentations.device) # cuda:0
+    # print("aveSegmentations shape: ", aveSegmentations.shape) # (224,224)
+    # print("aveSegmentations: ", aveSegmentations)
+    n_correct = []
+    confidenceList = [] # First index is one feature removed, second index two features removed, and so on...
+    clonedImg = torch.clone(origImg)
+    gt = str(labels)
+    for totalSegToHide in range(0, len(sortedKeys)):
+        ### Acquire LIME prediction result
+        currentSegmentToHide = sortedKeys[totalSegToHide]
+        clonedImg[0,0][segmentations == currentSegmentToHide] = 0.0
+        pred, confScore = getPredAndConf(opt, model, scoring, clonedImg, converter, np.array([gt]))
+        # To evaluate 'case sensitive model' with alphanumeric and case insensitve setting.
+        if opt.sensitive and opt.data_filtering_off:
+            pred = pred.lower()
+            gt = gt.lower()
+            alphanumeric_case_insensitve = '0123456789abcdefghijklmnopqrstuvwxyz'
+            out_of_alphanumeric_case_insensitve = f"[^{alphanumeric_case_insensitve}]"
+            pred = re.sub(out_of_alphanumeric_case_insensitve, '', pred)
+            gt = re.sub(out_of_alphanumeric_case_insensitve, '', gt)
+        if pred == gt:
+            n_correct.append(1)
+        else:
+            n_correct.append(0)
+        confScore = confScore[0][0]*100
+        confidenceList.append(confScore)
+    return n_correct, confidenceList
+### Once you have the selectivity_eval_results.pkl file,
+def acquire_selectivity_auc(opt, pkl_filename=None):
+    if pkl_filename is None:
+        pkl_filename = "/home/goo/str/str_vit_dataexplain_lambda/metrics_sensitivity_eval_results_CUTE80.pkl" # VITSTR
+    accKeys = []
+    with open(pkl_filename, 'rb') as f:
+        selectivity_data = pickle.load(f)
+    for resDictIdx, resDict in enumerate(selectivity_data):
+        keylistAcc = []
+        keylistConf = []
+        metricsKeys = resDict.keys()
+        for keyStr in resDict.keys():
+            if "_acc" in keyStr: keylistAcc.append(keyStr)
+            if "_conf" in keyStr: keylistConf.append(keyStr)
+        # Need to check if network correctly predicted the image
+        for metrics_accStr in keylistAcc:
+            if 1 not in resDict[metrics_accStr]: print("resDictIdx")
+# Single directory STRExp explanations output demo
+def sampleDemo(opt, modelName):
+    targetDataset = "SVTP"
+    demoImgDir = "demo_image/"
+    outputDir = "demo_image_output/"
+    if not os.path.exists(outputDir):
+        os.makedirs(outputDir)
+    segmentation_fn = SegmentationAlgorithm('quickshift', kernel_size=4,
+                                            max_dist=200, ratio=0.2,
+                                            random_seed=random.randint(0, 1000))
+    if modelName=="vitstr":
+        if opt.Transformer:
+            converter = TokenLabelConverter(opt)
+        elif 'CTC' in opt.Prediction:
+            converter = CTCLabelConverter(opt.character)
+        else:
+            converter = AttnLabelConverter(opt.character)
+        opt.num_class = len(converter.character)
+        if opt.rgb:
+            opt.input_channel = 3
+        model_obj = Model(opt)
+        model = torch.nn.DataParallel(model_obj).to(device)
+        modelCopy = copy.deepcopy(model)
+        """ evaluation """
+        scoring_singlechar = STRScore(opt=opt, converter=converter, device=device, enableSingleCharAttrAve=True)
+        super_pixel_model_singlechar = torch.nn.Sequential(
+            # super_pixler,
+            # numpy2torch_converter,
+            modelCopy,
+            scoring_singlechar
+        ).to(device)
+        modelCopy.eval()
+        scoring_singlechar.eval()
+        super_pixel_model_singlechar.eval()
+        # Single Char Attribution Averaging
+        # enableSingleCharAttrAve - set to True
+        scoring = STRScore(opt=opt, converter=converter, device=device)
+        super_pixel_model = torch.nn.Sequential(
+            # super_pixler,
+            # numpy2torch_converter,
+            model,
+            scoring
+        ).to(device)
+        model.eval()
+        scoring.eval()
+        super_pixel_model.eval()
+    elif modelName=="parseq":
+        model = torch.hub.load('baudm/parseq', 'parseq', pretrained=True)
+        # checkpoint = torch.hub.load_state_dict_from_url('https://github.com/baudm/parseq/releases/download/v1.0.0/parseq-bb5792a6.pt', map_location="cpu")
+        # # state_dict = {key.replace("module.", ""): value for key, value in checkpoint["state_dict"].items()}
+        # model.load_state_dict(checkpoint)
+        model = model.to(device)
+        model_obj = model
+        converter = TokenLabelConverter(opt)
+        modelCopy = copy.deepcopy(model)
+        """ evaluation """
+        scoring_singlechar = STRScore(opt=opt, converter=converter, device=device, enableSingleCharAttrAve=True, model=modelCopy)
+        super_pixel_model_singlechar = torch.nn.Sequential(
+            # super_pixler,
+            # numpy2torch_converter,
+            modelCopy,
+            scoring_singlechar
+        ).to(device)
+        modelCopy.eval()
+        scoring_singlechar.eval()
+        super_pixel_model_singlechar.eval()
+        # Single Char Attribution Averaging
+        # enableSingleCharAttrAve - set to True
+        scoring = STRScore(opt=opt, converter=converter, device=device, model=model)
+        super_pixel_model = torch.nn.Sequential(
+            # super_pixler,
+            # numpy2torch_converter,
+            model,
+            scoring
+        ).to(device)
+        model.eval()
+        scoring.eval()
+        super_pixel_model.eval()
+    if opt.blackbg:
+        shapImgLs = np.zeros(shape=(1, 1, 224, 224)).astype(np.float32)
+        trainList = np.array(shapImgLs)
+        background = torch.from_numpy(trainList).to(device)
+    opt.eval = True
+    for path, subdirs, files in os.walk(demoImgDir):
+        for name in files:
+            nameNoExt = name.split('.')[0]
+            labels = nameNoExt.split("_")[-1]
+            fullfilename = os.path.join(demoImgDir, name) # Value
+            pilImg = Image.open(fullfilename)
+            pilImg = pilImg.resize((opt.imgW, opt.imgH))
+            # fullfilename: /data/goo/strattr/attributionData/trba/CUTE80/66_featablt.pkl
+            ### Single char averaging
+            if modelName == 'vitstr':
+                orig_img_tensors = transforms.ToTensor()(pilImg)
+                orig_img_tensors = torch.mean(orig_img_tensors, dim=0).unsqueeze(0).unsqueeze(0)
+                image_tensors = ((torch.clone(orig_img_tensors) + 1.0) / 2.0) * 255.0
+                imgDataDict = {}
+                img_numpy = image_tensors.cpu().detach().numpy()[0] ### Need to set batch size to 1 only
+                if img_numpy.shape[0] == 1:
+                    img_numpy = gray2rgb(img_numpy[0])
+                # print("img_numpy shape: ", img_numpy.shape) # (32,100,3)
+                segmOutput = segmentation_fn(img_numpy)
+                # print("orig_img_tensors shape: ", orig_img_tensors.shape) # (3, 224, 224)
+                # print("orig_img_tensors max: ", orig_img_tensors.max()) # 0.6824 (1)
+                # print("orig_img_tensors min: ", orig_img_tensors.min()) # 0.0235 (0)
+                # sys.exit()
+                results_dict = {}
+                aveAttr = []
+                aveAttr_charContrib = []
+                # segmData, labels = segAndLabels[0]
+                target = converter.encode([labels])
+                # labels: RONALDO
+                segmDataNP = segmOutput
+                segmTensor = torch.from_numpy(segmDataNP).unsqueeze(0).unsqueeze(0)
+                # print("segmTensor min: ", segmTensor.min()) # 0 starting segmentation
+                segmTensor = segmTensor.to(device)
+                # print("segmTensor shape: ", segmTensor.shape)
+                # img1 = np.asarray(imgPIL.convert('L'))
+                # sys.exit()
+                # img1 = img1 / 255.0
+                # img1 = torch.from_numpy(img1).unsqueeze(0).unsqueeze(0).type(torch.FloatTensor).to(device)
+                img1 = orig_img_tensors.to(device)
+                img1.requires_grad = True
+                bgImg = torch.zeros(img1.shape).to(device)
+                input = img1
+                origImgNP = torch.clone(orig_img_tensors).detach().cpu().numpy()[0][0] # (1, 1, 224, 224)
+                origImgNP = gray2rgb(origImgNP)
+                charOffset = 1
+                # preds = model(img1, seqlen=converter.batch_max_length)
+                ### Local explanations only
+                collectedAttributions = []
+                for charIdx in range(0, len(labels)):
+                    scoring_singlechar.setSingleCharOutput(charIdx + charOffset)
+                    gtClassNum = target[0][charIdx + charOffset]
+                    ### Shapley Value Sampling
+                    svs = ShapleyValueSampling(super_pixel_model_singlechar)
+                    # attr = svs.attribute(input, target=0, n_samples=200) ### Individual pixels, too long to calculate
+                    attributions = svs.attribute(input, target=gtClassNum, feature_mask=segmTensor)
+                    collectedAttributions.append(attributions)
+                aveAttributions = torch.mean(torch.cat(collectedAttributions,dim=0), dim=0).unsqueeze(0)
+                if not torch.isnan(aveAttributions).any():
+                    rankedAttr = rankedAttributionsBySegm(aveAttributions, segmDataNP)
+                    rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
+                    rankedAttr = gray2rgb(rankedAttr)
+                    mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map', cmap='RdYlGn')
+                    mplotfig.savefig(outputDir + '{}_shapley_l.png'.format(nameNoExt))
+                    mplotfig.clear()
+                    plt.close(mplotfig)
+                ### Shapley Value Sampling
+                svs = ShapleyValueSampling(super_pixel_model)
+                # attr = svs.attribute(input, target=0, n_samples=200) ### Individual pixels, too long to calculate
+                attributions = svs.attribute(input, target=0, feature_mask=segmTensor)
+                if not torch.isnan(attributions).any():
+                    collectedAttributions.append(attributions)
+                    rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP)
+                    rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
+                    rankedAttr = gray2rgb(rankedAttr)
+                    mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map', cmap='RdYlGn')
+                    mplotfig.savefig(outputDir + '{}_shapley.png'.format(nameNoExt))
+                    mplotfig.clear()
+                    plt.close(mplotfig)
+                ### Global + Local context
+                aveAttributions = torch.mean(torch.cat(collectedAttributions,dim=0), dim=0).unsqueeze(0)
+                if not torch.isnan(aveAttributions).any():
+                    rankedAttr = rankedAttributionsBySegm(aveAttributions, segmDataNP)
+                    rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
+                    rankedAttr = gray2rgb(rankedAttr)
+                    mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map', cmap='RdYlGn')
+                    mplotfig.savefig(outputDir + '{}_shapley_gl.png'.format(nameNoExt))
+                    mplotfig.clear()
+                    plt.close(mplotfig)
+                return
+            elif modelName == 'parseq':
+                orig_img_tensors = transforms.ToTensor()(pilImg).unsqueeze(0)
+                img1 = orig_img_tensors.to(device)
+                # image_tensors = ((torch.clone(orig_img_tensors) + 1.0) / 2.0) * 255.0
+                image_tensors = torch.mean(orig_img_tensors, dim=1).unsqueeze(0).unsqueeze(0)
+                imgDataDict = {}
+                img_numpy = image_tensors.cpu().detach().numpy()[0] ### Need to set batch size to 1 only
+                if img_numpy.shape[0] == 1:
+                    img_numpy = gray2rgb(img_numpy[0])
+                # print("img_numpy shape: ", img_numpy.shape) # (1, 32, 128, 3)
+                segmOutput = segmentation_fn(img_numpy[0])
+                results_dict = {}
+                aveAttr = []
+                aveAttr_charContrib = []
+                target = converter.encode([labels])
+                # labels: RONALDO
+                segmDataNP = segmOutput
+                img1.requires_grad = True
+                bgImg = torch.zeros(img1.shape).to(device)
+                # preds = model(img1, seqlen=converter.batch_max_length)
+                input = img1
+                origImgNP = torch.clone(orig_img_tensors).detach().cpu().numpy()[0][0] # (1, 1, 224, 224)
+                origImgNP = gray2rgb(origImgNP)
+                charOffset = 0
+                img1 = transforms.Normalize(0.5, 0.5)(img1) # Between -1 to 1
+                target = converter.encode([labels])
+                ### Local explanations only
+                collectedAttributions = []
+                for charIdx in range(0, len(labels)):
+                    scoring_singlechar.setSingleCharOutput(charIdx + charOffset)
+                    gtClassNum = target[0][charIdx + charOffset]
+                    gs = GradientShap(super_pixel_model_singlechar)
+                    baseline_dist = torch.zeros((1, 3, opt.imgH, opt.imgW))
+                    baseline_dist = baseline_dist.to(device)
+                    attributions = gs.attribute(input, baselines=baseline_dist, target=0)
+                    collectedAttributions.append(attributions)
+                aveAttributions = torch.mean(torch.cat(collectedAttributions,dim=0), dim=0).unsqueeze(0)
+                if not torch.isnan(aveAttributions).any():
+                    rankedAttr = rankedAttributionsBySegm(aveAttributions, segmDataNP)
+                    rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
+                    rankedAttr = gray2rgb(rankedAttr)
+                    mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map', cmap='RdYlGn')
+                    mplotfig.savefig(outputDir + '{}_shapley_l.png'.format(nameNoExt))
+                    mplotfig.clear()
+                    plt.close(mplotfig)
+                ### Local Sampling
+                gs = GradientShap(super_pixel_model)
+                baseline_dist = torch.zeros((1, 3, opt.imgH, opt.imgW))
+                baseline_dist = baseline_dist.to(device)
+                attributions = gs.attribute(input, baselines=baseline_dist, target=0)
+                if not torch.isnan(attributions).any():
+                    collectedAttributions.append(attributions)
+                    rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP)
+                    rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
+                    rankedAttr = gray2rgb(rankedAttr)
+                    mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map', cmap='RdYlGn')
+                    mplotfig.savefig(outputDir + '{}_shapley.png'.format(nameNoExt))
+                    mplotfig.clear()
+                    plt.close(mplotfig)
+                ### Global + Local context
+                aveAttributions = torch.mean(torch.cat(collectedAttributions,dim=0), dim=0).unsqueeze(0)
+                if not torch.isnan(aveAttributions).any():
+                    rankedAttr = rankedAttributionsBySegm(aveAttributions, segmDataNP)
+                    rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
+                    rankedAttr = gray2rgb(rankedAttr)
+                    mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map', cmap='RdYlGn')
+                    mplotfig.savefig(outputDir + '{}_shapley_gl.png'.format(nameNoExt))
+                    mplotfig.clear()
+                    plt.close(mplotfig)
+                continue
+if __name__ == '__main__':
+    # deleteInf()
+    opt = get_args(is_train=False)
+    """ vocab / character number configuration """
+    if opt.sensitive:
+        opt.character = string.printable[:-6]  # same with ASTER setting (use 94 char).
+    cudnn.benchmark = True
+    cudnn.deterministic = True
+    # opt.num_gpu = torch.cuda.device_count()
+    # combineBestDataXAI(opt)
+    # acquire_average_auc(opt)
+    # acquireSingleCharAttrAve(opt)
+    modelName = "parseq"
+    opt.modelName = modelName
+    opt.eval_data = "datasets/data_lmdb_release/evaluation"
+    if modelName=="vitstr":
+        opt.benchmark_all_eval = True
+        opt.Transformation = "None"
+        opt.FeatureExtraction = "None"
+        opt.SequenceModeling = "None"
+        opt.Prediction = "None"
+        opt.Transformer = True
+        opt.sensitive = True
+        opt.imgH = 224
+        opt.imgW = 224
+        opt.data_filtering_off = True
+        opt.TransformerModel= "vitstr_base_patch16_224"
+        opt.saved_model = "pretrained/vitstr_base_patch16_224_aug.pth"
+        opt.batch_size = 1
+        opt.workers = 0
+        opt.scorer = "mean"
+        opt.blackbg = True
+    elif modelName=="parseq":
+        opt.benchmark_all_eval = True
+        opt.Transformation = "None"
+        opt.FeatureExtraction = "None"
+        opt.SequenceModeling = "None"
+        opt.Prediction = "None"
+        opt.Transformer = True
+        opt.sensitive = True
+        opt.imgH = 32
+        opt.imgW = 128
+        opt.data_filtering_off = True
+        opt.batch_size = 1
+        opt.workers = 0
+        opt.scorer = "mean"
+        opt.blackbg = True
+    sampleDemo(opt, modelName)

utils.py CHANGED Viewed

@@ -296,11 +296,11 @@ def get_device(verbose=True):
     return device
-def get_args(is_train=True):
     parser = argparse.ArgumentParser(description='STR')
     # for test
-    parser.add_argument('--eval_data', required=not is_train, help='path to evaluation dataset')
     parser.add_argument('--benchmark_all_eval', action='store_true', help='evaluate 10 benchmark evaluation datasets')
     parser.add_argument('--calculate_infer_time', action='store_true', help='calculate inference timing')
     parser.add_argument('--flops', action='store_true', help='calculates approx flops (may not work)')
@@ -362,11 +362,10 @@ def get_args(is_train=True):
     choices = ["vitstr_tiny_patch16_224", "vitstr_small_patch16_224", "vitstr_base_patch16_224", "vitstr_tiny_distilled_patch16_224", "vitstr_small_distilled_patch16_224"]
     parser.add_argument('--TransformerModel', default=choices[0], help='Which vit/deit transformer model', choices=choices)
-    parser.add_argument('--Transformation', type=str, required=True, help='Transformation stage. None|TPS')
-    parser.add_argument('--FeatureExtraction', type=str, required=True,
-                        help='FeatureExtraction stage. VGG|RCNN|ResNet')
-    parser.add_argument('--SequenceModeling', type=str, required=True, help='SequenceModeling stage. None|BiLSTM')
-    parser.add_argument('--Prediction', type=str, required=True, help='Prediction stage. None|CTC|Attn')
     parser.add_argument('--num_fiducial', type=int, default=20, help='number of fiducial points of TPS-STN')
     parser.add_argument('--input_channel', type=int, default=1,
                         help='the number of input channel of Feature extractor')

     return device
+def get_args(is_train=True, model=None):
     parser = argparse.ArgumentParser(description='STR')
     # for test
+    parser.add_argument('--eval_data', help='path to evaluation dataset')
     parser.add_argument('--benchmark_all_eval', action='store_true', help='evaluate 10 benchmark evaluation datasets')
     parser.add_argument('--calculate_infer_time', action='store_true', help='calculate inference timing')
     parser.add_argument('--flops', action='store_true', help='calculates approx flops (may not work)')
     choices = ["vitstr_tiny_patch16_224", "vitstr_small_patch16_224", "vitstr_base_patch16_224", "vitstr_tiny_distilled_patch16_224", "vitstr_small_distilled_patch16_224"]
     parser.add_argument('--TransformerModel', default=choices[0], help='Which vit/deit transformer model', choices=choices)
+    parser.add_argument('--Transformation', type=str, help='Transformation stage. None|TPS')
+    parser.add_argument('--FeatureExtraction', type=str, help='FeatureExtraction stage. VGG|RCNN|ResNet')
+    parser.add_argument('--SequenceModeling', type=str, help='SequenceModeling stage. None|BiLSTM')
+    parser.add_argument('--Prediction', type=str, help='Prediction stage. None|CTC|Attn')
     parser.add_argument('--num_fiducial', type=int, default=20, help='number of fiducial points of TPS-STN')
     parser.add_argument('--input_channel', type=int, default=1,
                         help='the number of input channel of Feature extractor')