import math
import json

import numpy as np
import tensorflow as tf
import tfimm
import efficientnet.tfkeras as efnv1
import keras_efficientnet_v2 as efnv2
import tensorflow_hub as hub


embedding_size = 1024
n_images = 51033 + 27956


class DotDict(dict):
    """dot.notation access to dictionary attributes

    Reference:
    https://stackoverflow.com/questions/2352181/how-to-use-a-dot-to-access-members-of-dictionary/23689767#23689767
    """
    __getattr__ = dict.get  # returns None if missing key, don't use getattr() with default!
    __setattr__ = dict.__setitem__
    __delattr__ = dict.__delitem__


def get_cfg(json_file):
    json_file = str(json_file)
    config_dict = json.load(open(json_file))
    return DotDict(config_dict)


def get_embeddings(img, embed_model):
    inp = img[None, ...]
    embeddings = embed_model.predict(inp, verbose=1, batch_size=1, workers=4, use_multiprocessing=True)
    return embeddings


# Train embeddings have to be re-ordered: embeddings were concatenated (train, valid)
# in the training notebook and the valid fold is different for each ensemble model.
FOLDS = 10
shards, n_total = [], 0
for fold in range(10):
    n_img = 5104 if fold <= 2 else 5103
    shards.append(list(range(n_total, n_total + n_img)))
    n_total += n_img
assert n_total == 51033


def get_train_idx(use_fold):
    "Return embedding index that restores the order of images in the tfrec files."
    train_folds = [i for i in range(10) if i % FOLDS != use_fold]
    valid_folds = [i for i in range(10) if i % FOLDS == use_fold]
    folds = train_folds + valid_folds

    # order of saved embeddings (train + valid)
    train_idx = []
    for fold in folds:
        train_idx.append(shards[fold])
    train_idx = np.concatenate(train_idx)

    return np.argsort(train_idx)


def get_comp_embeddings(emb_files, use_folds):
    "Load embeddings for competition images [n_images, embedding_size]"
    comp_embeddings = []

    for npz_file, use_fold in zip(emb_files, use_folds):
        # Get embeddings for all competition images
        d = np.load(str(npz_file))
        comp_train_emb = d['train']
        comp_test_emb = d['test']

        # Restore original order of comp_train_emb, targets (use targets as fingerprint-check)
        comp_train_idx = get_train_idx(use_fold)
        comp_train_emb = comp_train_emb[comp_train_idx, :]
        comp_embs = np.concatenate([comp_train_emb, comp_test_emb], axis=0)
        assert comp_embs.shape == (n_images, embedding_size)

        # Normalize embeddings
        comp_embs_norms = np.linalg.norm(comp_embs, axis=1)
        print("comp_embs norm:", comp_embs_norms.min(), "...", comp_embs_norms.max())
        comp_embs /= comp_embs_norms[:, None]

        comp_embeddings.append(comp_embs)

    return np.concatenate(comp_embeddings, axis=1)


def get_test_embedding(image, embed_models, sizes):
    test_embedding = []

    for embed_model, size in zip(embed_models, sizes):
        # Get model input
        scaled_image = tf.image.resize(image, size)
        scaled_image = tf.cast(scaled_image, tf.float32) / 255.0

        # Get embedding for test image
        test_emb = get_embeddings(scaled_image, embed_model)  # shape: [1, embedding_size]
        assert test_emb.shape == (1, embedding_size)

        # Normalize embeddings
        test_emb_norm = np.linalg.norm(test_emb, axis=1)
        test_emb /= test_emb_norm[:, None]

        test_embedding.append(test_emb)

    return np.concatenate(test_embedding, axis=1)  # [1, embedding_size]


def p2logit(x):
    return np.log(x / (1 - x))


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def get_confidence(similarity, threshold):
    "Calculate confidence in known/unknown prediction"
    if similarity <= 0:
        return 0
    logit_sim = p2logit(similarity)
    logit_threshold = p2logit(threshold)
    return sigmoid(abs(logit_sim - logit_threshold))


class ArcMarginProductSubCenter(tf.keras.layers.Layer):
    '''
    Implements large margin arc distance.

    References:
        https://arxiv.org/pdf/1801.07698.pdf
        https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/
        https://github.com/haqishen/Google-Landmark-Recognition-2020-3rd-Place-Solution/

    Sub-center version:
        for k > 1, the embedding layer can learn k sub-centers per class
    '''
    def __init__(self, n_classes, s=30, m=0.50, k=3, easy_margin=False,
                 ls_eps=0.0, **kwargs):

        super(ArcMarginProductSubCenter, self).__init__(**kwargs)

        self.n_classes = n_classes
        self.s = s
        self.m = m
        self.k = k
        self.ls_eps = ls_eps
        self.easy_margin = easy_margin
        self.cos_m = tf.math.cos(m)
        self.sin_m = tf.math.sin(m)
        self.th = tf.math.cos(math.pi - m)
        self.mm = tf.math.sin(math.pi - m) * m

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'n_classes': self.n_classes,
            's': self.s,
            'm': self.m,
            'k': self.k,
            'ls_eps': self.ls_eps,
            'easy_margin': self.easy_margin,
        })
        return config

    def build(self, input_shape):
        super(ArcMarginProductSubCenter, self).build(input_shape[0])

        self.W = self.add_weight(
            name='W',
            shape=(int(input_shape[0][-1]), self.n_classes * self.k),
            initializer='glorot_uniform',
            dtype='float32',
            trainable=True)

    def call(self, inputs):
        X, y = inputs
        y = tf.cast(y, dtype=tf.int32)
        cosine_all = tf.matmul(
            tf.math.l2_normalize(X, axis=1),
            tf.math.l2_normalize(self.W, axis=0)
        )
        if self.k > 1:
            cosine_all = tf.reshape(cosine_all, [-1, self.n_classes, self.k])
            cosine = tf.math.reduce_max(cosine_all, axis=2)
        else:
            cosine = cosine_all
        sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = tf.where(cosine > 0, phi, cosine)
        else:
            phi = tf.where(cosine > self.th, phi, cosine - self.mm)
        one_hot = tf.cast(
            tf.one_hot(y, depth=self.n_classes),
            dtype=cosine.dtype
        )
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output


TFHUB = {
    'hub_efnv2s': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_s/feature_vector/2",
    'hub_efnv2m': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_m/feature_vector/2",
    'hub_efnv2l': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/feature_vector/2",
    'hub_efnv2xl': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/feature_vector/2",
    'bit_m-r50x1': "https://tfhub.dev/google/bit/m-r50x1/1",
    'bit_m-r50x3': "https://tfhub.dev/google/bit/m-r50x3/1",
    'bit_m-r101x1': "https://tfhub.dev/google/bit/m-r101x1/1",
    'bit_m-r101x3': "https://tfhub.dev/google/bit/m-r101x3/1",
    'bit_m-r152x4': "https://tfhub.dev/google/bit/m-r152x4/1",
}


def get_model(cfg):
    aux_arcface = False  # Chris Deotte suggested this
    if cfg.head == 'arcface':
        head = ArcMarginProductSubCenter
    else:
        assert False, "INVALID HEAD"

    if cfg.adaptive_margin:
        raise NotImplementedError

    if cfg.arch_name.startswith('efnv1'):
        EFN = {'efnv1b0': efnv1.EfficientNetB0, 'efnv1b1': efnv1.EfficientNetB1,
               'efnv1b2': efnv1.EfficientNetB2, 'efnv1b3': efnv1.EfficientNetB3,
               'efnv1b4': efnv1.EfficientNetB4, 'efnv1b5': efnv1.EfficientNetB5,
               'efnv1b6': efnv1.EfficientNetB6, 'efnv1b7': efnv1.EfficientNetB7}

    if cfg.arch_name.startswith('efnv2'):
        EFN = {'efnv2s': efnv2.EfficientNetV2S, 'efnv2m': efnv2.EfficientNetV2M,
               'efnv2l': efnv2.EfficientNetV2L, 'efnv2xl': efnv2.EfficientNetV2XL}

    with tf.distribute.get_strategy().scope():

        margin = head(
            n_classes=cfg.N_CLASSES,
            s=30,
            m=0.3,
            k=cfg.subcenters or 1,
            easy_margin=False,
            name=f'head/{cfg.head}',
            dtype='float32')

        inp = tf.keras.layers.Input(shape=[*cfg.IMAGE_SIZE, 3], name='inp1')
        label = tf.keras.layers.Input(shape=(), name='inp2')
        if aux_arcface:
            label2 = tf.keras.layers.Input(shape=(), name='inp3')

        if cfg.arch_name.startswith('efnv1'):
            x = EFN[cfg.arch_name](weights=cfg.pretrained, include_top=False)(inp)
            if cfg.pool == 'flatten':
                embed = tf.keras.layers.Flatten()(x)
            elif cfg.pool == 'fc':
                embed = tf.keras.layers.Flatten()(x)
                embed = tf.keras.layers.Dropout(0.1)(embed)
                embed = tf.keras.layers.Dense(1024)(embed)
            elif cfg.pool == 'concat':
                embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x),
                                                     tf.keras.layers.GlobalAveragePooling2D()(x)])
            elif cfg.pool == 'max':
                embed = tf.keras.layers.GlobalMaxPooling2D()(x)
            else:
                embed = tf.keras.layers.GlobalAveragePooling2D()(x)

        elif cfg.arch_name.startswith('efnv2'):
            x = EFN[cfg.arch_name](input_shape=(None, None, 3), num_classes=0,
                                   pretrained=cfg.pretrained)(inp)
            if cfg.pool == 'flatten':
                embed = tf.keras.layers.Flatten()(x)
            elif cfg.pool == 'fc':
                embed = tf.keras.layers.Flatten()(x)
                embed = tf.keras.layers.Dropout(0.1)(embed)
                embed = tf.keras.layers.Dense(1024)(embed)
            elif cfg.pool == 'concat':
                embed = tf.keras.layers.concatenate([tf.keras.layers.GlobalAveragePooling2D()(x),
                                                     tf.keras.layers.GlobalAveragePooling2D()(x)])
            elif cfg.pool == 'max':
                embed = tf.keras.layers.GlobalMaxPooling2D()(x)
            else:
                embed = tf.keras.layers.GlobalAveragePooling2D()(x)

        elif cfg.arch_name in TFHUB:
            # tfhub models cannot be modified => Pooling cannot be changed!
            url = TFHUB[cfg.arch_name]
            model = hub.KerasLayer(url, trainable=True)
            embed = model(inp)
            assert cfg.pool in [None, False, 'avg', ''], 'tfhub model, no custom pooling supported!'

        elif cfg.arch_name in tfimm.list_models(pretrained="timm"):
            embed = tfimm.create_model(cfg.arch_name, pretrained=None, nb_classes=0)(inp)

        if len(cfg.dropout_ps) > 0:
            # Chris Deotte posted model code without Dropout/FC1 after pooling
            embed = tf.keras.layers.Dropout(cfg.dropout_ps[0])(embed)
            embed = tf.keras.layers.Dense(1024)(embed)       # tunable embedding size
        embed = tf.keras.layers.BatchNormalization()(embed)  # missing in public notebooks
        x = margin([embed, label])

        output = tf.keras.layers.Softmax(dtype='float32', name='arc' if cfg.aux_loss else None)(x)

        if cfg.aux_loss:
            aux_features = tf.keras.layers.Dense(cfg.n_species)(embed)
            aux_output = tf.keras.layers.Softmax(dtype='float32', name='aux')(aux_features)
        inputs = [inp, label, label2] if (cfg.aux_loss and aux_arcface) else [inp, label]
        outputs = (output, aux_output) if cfg.aux_loss else [output]

        model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
        embed_model = tf.keras.models.Model(inputs=inp, outputs=embed)

        if cfg.FREEZE_BATCH_NORM:
            raise NotImplementedError

        return model, embed_model