import streamlit as st
from PIL import Image
import settings
import captum
import numpy as np
import torch
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from utils import get_args
from utils import CTCLabelConverter, AttnLabelConverter, Averager, TokenLabelConverter
import string
import time
import sys
from dataset import hierarchical_dataset, AlignCollate
import validators
from model import Model, STRScore
from PIL import Image
from lime.wrappers.scikit_image import SegmentationAlgorithm
from captum._utils.models.linear_model import SkLearnLinearModel, SkLearnRidge
import random
import os
from skimage.color import gray2rgb
import pickle
from train_shap_corr import getPredAndConf
import re
from captum_test import acquire_average_auc, saveAttrData
import copy
from skimage.color import gray2rgb
from matplotlib import pyplot as plt
from torchvision import transforms

from captum.attr import (
    GradientShap,
    DeepLift,
    DeepLiftShap,
    IntegratedGradients,
    LayerConductance,
    NeuronConductance,
    NoiseTunnel,
    Saliency,
    InputXGradient,
    GuidedBackprop,
    Deconvolution,
    GuidedGradCam,
    FeatureAblation,
    ShapleyValueSampling,
    Lime,
    KernelShap
)

from captum.metrics import (
    infidelity,
    sensitivity_max
)

from captum.attr._utils.visualization import visualize_image_attr

device = torch.device('cpu')
opt = get_args(is_train=False)

if opt.sensitive:
    opt.character = string.printable[:-6]  # same with ASTER setting (use 94 char).

cudnn.benchmark = True
cudnn.deterministic = True
# opt.num_gpu = torch.cuda.device_count()

# combineBestDataXAI(opt)
# acquire_average_auc(opt)
# acquireSingleCharAttrAve(opt)
modelName = "parseq"
opt.modelName = modelName
# opt.eval_data = "datasets/data_lmdb_release/evaluation"

if modelName=="vitstr":
    opt.benchmark_all_eval = True
    opt.Transformation = "None"
    opt.FeatureExtraction = "None"
    opt.SequenceModeling = "None"
    opt.Prediction = "None"
    opt.Transformer = True
    opt.sensitive = True
    opt.imgH = 224
    opt.imgW = 224
    opt.data_filtering_off = True
    opt.TransformerModel= "vitstr_base_patch16_224"
    opt.saved_model = "pretrained/vitstr_base_patch16_224_aug.pth"
    opt.batch_size = 1
    opt.workers = 0
    opt.scorer = "mean"
    opt.blackbg = True
elif modelName=="parseq":
    opt.benchmark_all_eval = True
    opt.Transformation = "None"
    opt.FeatureExtraction = "None"
    opt.SequenceModeling = "None"
    opt.Prediction = "None"
    opt.Transformer = True
    opt.sensitive = True
    opt.imgH = 32
    opt.imgW = 128
    opt.data_filtering_off = True
    opt.batch_size = 1
    opt.workers = 0
    opt.scorer = "mean"
    opt.blackbg = True

segmentation_fn = SegmentationAlgorithm('quickshift', kernel_size=4,
                                        max_dist=200, ratio=0.2,
                                        random_seed=random.randint(0, 1000))

if modelName=="vitstr":
    if opt.Transformer:
        converter = TokenLabelConverter(opt)
    elif 'CTC' in opt.Prediction:
        converter = CTCLabelConverter(opt.character)
    else:
        converter = AttnLabelConverter(opt.character)
    opt.num_class = len(converter.character)
    if opt.rgb:
        opt.input_channel = 3
    model_obj = Model(opt)

    model = torch.nn.DataParallel(model_obj).to(device)
    modelCopy = copy.deepcopy(model)

    scoring_singlechar = STRScore(opt=opt, converter=converter, device=device, enableSingleCharAttrAve=True)
    super_pixel_model_singlechar = torch.nn.Sequential(
        # super_pixler,
        # numpy2torch_converter,
        modelCopy,
        scoring_singlechar
    ).to(device)
    modelCopy.eval()
    scoring_singlechar.eval()
    super_pixel_model_singlechar.eval()

    # Single Char Attribution Averaging
    # enableSingleCharAttrAve - set to True
    scoring = STRScore(opt=opt, converter=converter, device=device)
    super_pixel_model = torch.nn.Sequential(
        # super_pixler,
        # numpy2torch_converter,
        model,
        scoring
    ).to(device)
    model.eval()
    scoring.eval()
    super_pixel_model.eval()

elif modelName=="parseq":
    model = torch.hub.load('baudm/parseq', 'parseq', pretrained=True)
    # checkpoint = torch.hub.load_state_dict_from_url('https://github.com/baudm/parseq/releases/download/v1.0.0/parseq-bb5792a6.pt', map_location="cpu")
    # # state_dict = {key.replace("module.", ""): value for key, value in checkpoint["state_dict"].items()}
    # model.load_state_dict(checkpoint)
    model = model.to(device)
    model_obj = model
    converter = TokenLabelConverter(opt)
    modelCopy = copy.deepcopy(model)

    scoring_singlechar = STRScore(opt=opt, converter=converter, device=device, enableSingleCharAttrAve=True, model=modelCopy)
    super_pixel_model_singlechar = torch.nn.Sequential(
        # super_pixler,
        # numpy2torch_converter,
        modelCopy,
        scoring_singlechar
    ).to(device)
    modelCopy.eval()
    scoring_singlechar.eval()
    super_pixel_model_singlechar.eval()

    # Single Char Attribution Averaging
    # enableSingleCharAttrAve - set to True
    scoring = STRScore(opt=opt, converter=converter, device=device, model=model)
    super_pixel_model = torch.nn.Sequential(
        # super_pixler,
        # numpy2torch_converter,
        model,
        scoring
    ).to(device)
    model.eval()
    scoring.eval()
    super_pixel_model.eval()

if opt.blackbg:
    shapImgLs = np.zeros(shape=(1, 1, 224, 224)).astype(np.float32)
    trainList = np.array(shapImgLs)
    background = torch.from_numpy(trainList).to(device)

# x = st.slider('Select a value')
# st.write(x, 'squared is', x * x)

### Returns the mean for each segmentation having shape as the same as the input
### This function can only one attribution image at a time
def averageSegmentsOut(attr, segments):
    averagedInput = torch.clone(attr)
    sortedDict = {}
    for x in np.unique(segments):
        segmentMean = torch.mean(attr[segments == x][:])
        sortedDict[x] = float(segmentMean.detach().cpu().numpy())
        averagedInput[segments == x] = segmentMean
    return averagedInput, sortedDict

### Acquire pixelwise attributions and replace them with ranked numbers averaged
### across segmentation with the largest contribution having the largest number
### and the smallest set to 1, which is the minimum number.
### attr - original attribution
### segm - image segmentations
def rankedAttributionsBySegm(attr, segm):
    aveSegmentations, sortedDict = averageSegmentsOut(attr[0,0], segm)
    totalSegm = len(sortedDict.keys()) # total segmentations
    sortedKeys = [k for k, v in sorted(sortedDict.items(), key=lambda item: item[1])]
    sortedKeys = sortedKeys[::-1] ### A list that should contain largest to smallest score
    currentRank = totalSegm
    rankedSegmImg = torch.clone(attr)
    for totalSegToHide in range(0, len(sortedKeys)):
        currentSegmentToHide = sortedKeys[totalSegToHide]
        rankedSegmImg[0,0][segm == currentSegmentToHide] = currentRank
        currentRank -= 1
    return rankedSegmImg

labels = st.text_input('Drag one of the images from the right towards the box below (or you can choose your own image). '
'You need to put the text of the image in the textbox below first (e.g. GAS) before dragging the image.')

image = Image.open('demo_image/demo_gas.jpg') #Brand logo image (optional)
image2 = Image.open('demo_image/demo_shakeshack.jpg') #Brand logo image (optional)
image3 = Image.open('demo_image/demo_ronaldo.jpg') #Brand logo image (optional)
image4 = Image.open('demo_image/demo_car.jpg') #Brand logo image (optional)
#Create two columns with different width
col1, col2 = st.columns( [0.8, 0.2])
with col1:               # To display the header text using css style
    st.markdown(""" <style> .font {
    font-size:35px ; font-family: 'Cooper Black'; color: #FF9633;}
    </style> """, unsafe_allow_html=True)
    st.markdown('<p class="font">STRExp (Explaining PARSeq STR Model)...</p>', unsafe_allow_html=True)
with col2:               # To display brand logo
    st.image(image,  width=150)
    st.image(image2,  width=150)
    st.image(image3,  width=150)
    st.image(image4,  width=150)

uploaded_file = st.file_uploader("Choose a file", type=["png", "jpg"])
if uploaded_file is not None:
    # To read file as bytes:
    bytes_data = uploaded_file.getvalue()
    pilImg = Image.open(uploaded_file)
    pilImg = pilImg.resize((opt.imgW, opt.imgH))

    orig_img_tensors = transforms.ToTensor()(pilImg).unsqueeze(0)
    img1 = orig_img_tensors.to(device)
    # image_tensors = ((torch.clone(orig_img_tensors) + 1.0) / 2.0) * 255.0
    image_tensors = torch.mean(orig_img_tensors, dim=1).unsqueeze(0).unsqueeze(0)
    imgDataDict = {}
    img_numpy = image_tensors.cpu().detach().numpy()[0] ### Need to set batch size to 1 only
    if img_numpy.shape[0] == 1:
        img_numpy = gray2rgb(img_numpy[0])
    # print("img_numpy shape: ", img_numpy.shape) # (1, 32, 128, 3)
    segmOutput = segmentation_fn(img_numpy[0])

    results_dict = {}
    aveAttr = []
    aveAttr_charContrib = []

    # labels: RONALDO
    segmDataNP = segmOutput
    img1.requires_grad = True
    bgImg = torch.zeros(img1.shape).to(device)

    # preds = model(img1, seqlen=converter.batch_max_length)
    input = img1
    origImgNP = torch.clone(orig_img_tensors).detach().cpu().numpy()[0][0] # (1, 1, 224, 224)
    origImgNP = gray2rgb(origImgNP)
    charOffset = 0
    img1 = transforms.Normalize(0.5, 0.5)(img1) # Between -1 to 1
    labels = labels.lower()
    target = converter.encode([labels])

    ### Local explanations only
    collectedAttributions = []
    for charIdx in range(0, len(labels)):
        scoring_singlechar.setSingleCharOutput(charIdx + charOffset)
        gtClassNum = target[0][charIdx + charOffset]

        gs = GradientShap(super_pixel_model_singlechar)
        baseline_dist = torch.zeros((1, 3, opt.imgH, opt.imgW))
        baseline_dist = baseline_dist.to(device)
        attributions = gs.attribute(input, baselines=baseline_dist, target=0)
        collectedAttributions.append(attributions)
    aveAttributions = torch.mean(torch.cat(collectedAttributions,dim=0), dim=0).unsqueeze(0)
    # if not torch.isnan(aveAttributions).any():
    #     rankedAttr = rankedAttributionsBySegm(aveAttributions, segmDataNP)
    #     rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
    #     rankedAttr = gray2rgb(rankedAttr)
    #     mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map', cmap='RdYlGn')
    #     mplotfig.savefig(outputDir + '{}_shapley_l.png'.format(nameNoExt))
    #     mplotfig.clear()
    #     plt.close(mplotfig)

    ### Local Sampling
    gs = GradientShap(super_pixel_model)
    baseline_dist = torch.zeros((1, 3, opt.imgH, opt.imgW))
    baseline_dist = baseline_dist.to(device)
    attributions = gs.attribute(input, baselines=baseline_dist, target=0)
    # if not torch.isnan(attributions).any():
    #     collectedAttributions.append(attributions)
    #     rankedAttr = rankedAttributionsBySegm(attributions, segmDataNP)
    #     rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
    #     rankedAttr = gray2rgb(rankedAttr)
    #     mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map', cmap='RdYlGn')
    #     mplotfig.savefig(outputDir + '{}_shapley.png'.format(nameNoExt))
    #     mplotfig.clear()
    #     plt.close(mplotfig)

    ### Global + Local context
    aveAttributions = torch.mean(torch.cat(collectedAttributions,dim=0), dim=0).unsqueeze(0)
    if not torch.isnan(aveAttributions).any():
        rankedAttr = rankedAttributionsBySegm(aveAttributions, segmDataNP)
        rankedAttr = rankedAttr.detach().cpu().numpy()[0][0]
        rankedAttr = gray2rgb(rankedAttr)
        mplotfig, _ = visualize_image_attr(rankedAttr, origImgNP, method='blended_heat_map', cmap='RdYlGn')
        # fig = mplotfig.figure(figsize=(8,8))
        st.pyplot(mplotfig)
        # mplotfig.savefig(outputDir + '{}_shapley_gl.png'.format(nameNoExt))
        # mplotfig.clear()
        # plt.close(mplotfig)