import copy

from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import euclidean_distances
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from transformers import AutoTokenizer, AutoModelForMaskedLM
import pandas as pd
from gensim.models import KeyedVectors
from utils_sesgo_en_palabras import (
    cosine_similarity,
    normalize,
    project_params,
    take_two_sides_extreme_sorted
)


DIRECTION_METHODS = ['single', 'sum', 'pca']
DEBIAS_METHODS = ['neutralize', 'hard', 'soft']
FIRST_PC_THRESHOLD = 0.5
MAX_NON_SPECIFIC_EXAMPLES = 1000

__all__ = ['GenderBiasWE', 'BiasWordEmbedding']


class Loader():
    def __init__(self):
        self.path_to_data = ''

    def load_tokenizer(self, tokenizer_path):
        tokenizer = AutoTokenizer.from_pretrained(
            tokenizer_path, do_lower_case=True, )
        return tokenizer

    def load_data_from_file(self, data):
        return data

    def load_corpus_from_file(self, data):
        return data

    def load_language_model(self, model_path):
        model = AutoModelForMaskedLM.from_pretrained(
            model_path, output_hidden_states=True)
        return model


class Corpus():
    def __init__(self, corpus) -> None:
        self.vocabulary = self.load_vocabulary_from_corpus()
        self.corpus = corpus

    def get_context_from_text(self, word):
        pass

    def get_frequency(self, word):
        pass

    def get_most_frequent_coocurrence(self, word):
        pass


class Embedding():
    def __init__(self, word_vectors_path) -> None:
        self.wv = self.load_we_as_keyed_vectors(word_vectors_path)

    def load_we_as_keyed_vectors(self, word_vectors_path):
        we = KeyedVectors.load_word2vec_format(word_vectors_path)
        we.init_sims(replace=True)
        return we

    def get_word_vector(self, word, context=None):
        return word


class BiasExplorer():
    def __init__(self, model, only_lower=False, verbose=False,
                 identify_direction=False, to_normalize=True):
        # pylint: disable=undefined-variable

        # TODO: this is bad Python, ask someone about it
        # probably should be a better design
        # identify_direction doesn't have any meaning
        # for the class BiasWordEmbedding
        # The goal is to force this interfeace of sub-classes.
        if self.__class__ == __class__ and identify_direction is not False:
            raise ValueError('identify_direction must be False'
                             ' for an instance of {}'
                             .format(__class__))

        self.model = model

        # TODO: write unitest for when it is False
        self.only_lower = only_lower

        self._verbose = verbose

        self.direction = None
        self.positive_end = None
        self.negative_end = None

        if to_normalize:
            self.model.init_sims(replace=True)

    def __copy__(self):
        bias_word_embedding = self.__class__(self.model,
                                             self.only_lower,
                                             self._verbose,
                                             identify_direction=False)
        bias_word_embedding.direction = copy.deepcopy(self.direction)
        bias_word_embedding.positive_end = copy.deepcopy(self.positive_end)
        bias_word_embedding.negative_end = copy.deepcopy(self.negative_end)
        return bias_word_embedding

    def __deepcopy__(self, memo):
        bias_word_embedding = copy.copy(self)
        bias_word_embedding.model = copy.deepcopy(bias_word_embedding.model)
        return bias_word_embedding

    def __getitem__(self, key):
        return self.model[key]

    def __contains__(self, item):
        return item in self.model

    def _is_direction_identified(self):
        if self.direction is None:
            raise RuntimeError('The direction was not identified'
                               ' for this {} instance'
                               .format(self.__class__.__name__))

    def _identify_subspace_by_pca(self, definitional_pairs, n_components):
        matrix = []

        for word1, word2 in definitional_pairs:
            vector1 = normalize(self[word1])
            vector2 = normalize(self[word2])

            center = (vector1 + vector2) / 2

            matrix.append(vector1 - center)
            matrix.append(vector2 - center)

        pca = PCA(n_components=n_components)
        pca.fit(matrix)

        if self._verbose:
            table = enumerate(pca.explained_variance_ratio_, start=1)
            headers = ['Principal Component',
                       'Explained Variance Ratio']

        return pca

    def __errorChecking(self, word):
        out_msj = ""

        if not word:
            out_msj = "Error: Primero debe ingresar una palabra!"
        else:
            if not word in self.model:
                out_msj = f"Error: La palabra '<b>{word}</b>' no se encuentra en el vocabulario!"
            
        if out_msj:
            out_msj = "<center><h3>"+out_msj+"</h3></center>"
        
        return out_msj    

    # TODO: add the SVD method from section 6 step 1
    # It seems there is a mistake there, I think it is the same as PCA
    # just with replacing it with SVD
    def _identify_direction(self, positive_end, negative_end,
                            definitional, method='pca'):
        if method not in DIRECTION_METHODS:
            raise ValueError('method should be one of {}, {} was given'.format(
                DIRECTION_METHODS, method))

        if positive_end == negative_end:
            raise ValueError('positive_end and negative_end'
                             'should be different, and not the same "{}"'
                             .format(positive_end))
        if self._verbose:
            print('Identify direction using {} method...'.format(method))

        direction = None

        if method == 'single':
            if self._verbose:
                print('Positive definitional end:', definitional[0])
                print('Negative definitional end:', definitional[1])
            direction = normalize(normalize(self[definitional[0]])
                                  - normalize(self[definitional[1]]))

        elif method == 'sum':
            group1_sum_vector = np.sum([self[word]
                                        for word in definitional[0]], axis=0)
            group2_sum_vector = np.sum([self[word]
                                        for word in definitional[1]], axis=0)

            diff_vector = (normalize(group1_sum_vector)
                           - normalize(group2_sum_vector))

            direction = normalize(diff_vector)

        elif method == 'pca':
            pca = self._identify_subspace_by_pca(definitional, 10)
            if pca.explained_variance_ratio_[0] < FIRST_PC_THRESHOLD:
                raise RuntimeError('The Explained variance'
                                   'of the first principal component should be'
                                   'at least {}, but it is {}'
                                   .format(FIRST_PC_THRESHOLD,
                                           pca.explained_variance_ratio_[0]))
            direction = pca.components_[0]

            # if direction is opposite (e.g. we cannot control
            # what the PCA will return)
            ends_diff_projection = cosine_similarity((self[positive_end]
                                                      - self[negative_end]),
                                                     direction)
            if ends_diff_projection < 0:
                direction = -direction  # pylint: disable=invalid-unary-operand-type

        self.direction = direction
        self.positive_end = positive_end
        self.negative_end = negative_end

    def project_on_direction(self, word):
        """Project the normalized vector of the word on the direction.
        :param str word: The word tor project
        :return float: The projection scalar
        """

        self._is_direction_identified()

        vector = self[word]
        projection_score = self.model.cosine_similarities(self.direction,
                                                          [vector])[0]
        return projection_score

    def _calc_projection_scores(self, words):
        self._is_direction_identified()

        df = pd.DataFrame({'word': words})

        # TODO: maybe using cosine_similarities on all the vectors?
        # it might be faster
        df['projection'] = df['word'].apply(self.project_on_direction)
        df = df.sort_values('projection', ascending=False)

        return df

    def calc_projection_data(self, words):
        """
        Calculate projection, projected and rejected vectors of a words list.
        :param list words: List of words
        :return: :class:`pandas.DataFrame` of the projection,
                 projected and rejected vectors of the words list
        """
        projection_data = []
        for word in words:
            vector = self[word]
            projection = self.project_on_direction(word)
            normalized_vector = normalize(vector)

            (projection,
             projected_vector,
             rejected_vector) = project_params(normalized_vector,
                                               self.direction)

            projection_data.append({'word': word,
                                    'vector': vector,
                                    'projection': projection,
                                    'projected_vector': projected_vector,
                                    'rejected_vector': rejected_vector})

        return pd.DataFrame(projection_data)

    def plot_dist_projections_on_direction(self, word_groups, ax=None):
        """Plot the projection scalars distribution on the direction.
        :param dict word_groups word: The groups to projects
        :return float: The ax object of the plot
        """

        if ax is None:
            _, ax = plt.subplots(1)

        names = sorted(word_groups.keys())

        for name in names:
            words = word_groups[name]
            label = '{} (#{})'.format(name, len(words))
            vectors = [self[word] for word in words]
            projections = self.model.cosine_similarities(self.direction,
                                                         vectors)
            sns.distplot(projections, hist=False, label=label, ax=ax)

        plt.axvline(0, color='k', linestyle='--')

        plt.title('← {} {} {} →'.format(self.negative_end,
                                        ' ' * 20,
                                        self.positive_end))
        plt.xlabel('Direction Projection')
        plt.ylabel('Density')
        ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

        return ax

    def __errorChecking(self, word):
        out_msj = ""

        if not word:
            out_msj = "Error: Primero debe ingresar una palabra!"
        else:
            if not word in self.model:
                out_msj = f"Error: La palabra '<b>{word}</b>' no se encuentra en el vocabulario!"
            
        if out_msj:
            out_msj = "<center><h3>"+out_msj+"</h3></center>"
        
        return out_msj    

    def parse_words(self, string):
        words = string.strip()
        if words:
            words = [word.strip() for word in words.split(',') if word != ""]
        return words

    def check_oov(self, wordlists):
        for wordlist in wordlists:
            parsed_words = self.parse_words(wordlist)
            for word in parsed_words:
                msg = self.__errorChecking(word)
                if msg:
                    return msg 
        return None 

    def plot_projections_2d(self,
                            wordlist,
                            wordlist_1,
                            wordlist_2,
                            wordlist_3,
                            wordlist_4,
                            color_wordlist,
                            color_wordlist_1,
                            color_wordlist_2,
                            color_wordlist_3,
                            color_wordlist_4,
                            plot_neighbors,
                            n_alpha,
                            fontsize,
                            figsize=(15, 15),
                            method='pca'
                            ):
        # convertirlas a vector
        choices = [0, 1, 2, 3, 4]
        word_list = []
        wordlist_choice = [wordlist, wordlist_1, wordlist_2, wordlist_3, wordlist_4]
        err= self.check_oov(wordlist_choice)
        if err:
            return None, err
        words_colors = {}
        label_dict = {
            0: 'Diagnostico',
            1: 'Lista de palabras 1',
            2: 'Lista de palabras 2',
            3: 'Lista de palabras 3',
            4: 'Lista de palabras 4'
        }
        color_dict = {
            0: color_wordlist,
            1: color_wordlist_1,
            2: color_wordlist_2,
            3: color_wordlist_3,
            4: color_wordlist_4
        }
        word_bias_space = {}
        alpha = {}

        for raw_word_list, color in zip(wordlist_choice, choices):
            parsed_words = self.parse_words(raw_word_list)
            if parsed_words:
                for word in parsed_words:
                    word_bias_space[word] = color
                    words_colors[word] = color_dict[color]
                    alpha[word] = 1
                    if plot_neighbors:
                        neighbors = [w for w,s in self.model.most_similar(word,topn=5)]
                        for n in neighbors:
                            if n not in alpha:
                                word_bias_space[n] = color
                                words_colors[n] = color_dict[color]
                                alpha[n] = n_alpha
                        word_list += neighbors
            word_list += parsed_words
        if not word_list:
            return None, "<center><h3>" + "Ingresa al menos 2 palabras para continuar" + "<center><h3>"
        embeddings = [self.model[word] for word in word_list]
        words_embedded = PCA(
            n_components=2, random_state=1).fit_transform(embeddings)
        data = pd.DataFrame(words_embedded)
        data['word'] = word_list
        data['color'] = [words_colors[word] for word in word_list]
        data['alpha'] = [alpha[word] for word in word_list]
        data['word_bias_space'] = [word_bias_space[word] for word in word_list]
        fig, ax = plt.subplots(figsize=figsize)

        sns.scatterplot(
            data=data[data['alpha'] == 1],
            x=0,
            y=1,
            style='word_bias_space',
            hue='word_bias_space',
            ax=ax,
            palette=color_dict
            )
        if plot_neighbors:
            sns.scatterplot(
                data=data[data['alpha'] != 1],
                x=0,
                y=1,
                style='color',
                hue='word_bias_space',
                ax=ax,
                alpha=n_alpha,
                legend=False,
                palette=color_dict
            )
        for i, label in enumerate(word_list):
            x, y = words_embedded[i, :]
            ax.annotate(label, xy=(x, y), xytext=(5, 2),color=words_colors[label],
            textcoords='offset points',
            ha='right', va='bottom', size=fontsize, alpha=alpha[label])

        ax.set_xticks([])
        ax.set_yticks([])

        fig.tight_layout()
        fig.canvas.draw()

        data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
        w, h = fig.canvas.get_width_height()
        im = data.reshape((int(h), int(w), -1))
        return im, ''


class WEBiasExplorer2d(BiasExplorer):
    def __init__(self, word_embedding) -> None:
        super().__init__(word_embedding)

    def calculate_bias(
        self,
        palabras_extremo_1,
        palabras_extremo_2,
        palabras_para_situar
    ):

        wordlists = [
            palabras_extremo_1,
            palabras_extremo_2,
            palabras_para_situar
            ]
        err = self.check_oov(wordlists)
        for wordlist in wordlists:
            if not wordlist:
                err = "<center><h3>" + 'Debe ingresar al menos 1 palabra en las lista de palabras a diagnosticar, sesgo 1 y sesgo 2' +"<center><h3>"
        if err:
            return None, err


        err = self.check_oov([palabras_extremo_1,palabras_extremo_2,palabras_para_situar])
        if err:
            return None, err
        palabras_extremo_1 = self.parse_words(palabras_extremo_1)
        palabras_extremo_2 = self.parse_words(palabras_extremo_2)
        palabras_para_situar = self.parse_words(palabras_para_situar)
        im = self.get_bias_plot(
            palabras_para_situar,
            definitional=(
                palabras_extremo_1, palabras_extremo_2),
            method='sum',
            n_extreme=10
        )
        return im, ''

    def get_bias_plot(self,
                      palabras_para_situar,
                      definitional,
                      method='sum',
                      n_extreme=10,
                      figsize=(10, 10)
                      ):

        fig, ax = plt.subplots(1, figsize=figsize)
        self.method = method
        self.plot_projection_scores(
            definitional,
            palabras_para_situar, n_extreme, ax=ax,)

        fig.tight_layout()
        fig.canvas.draw()

        data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
        w, h = fig.canvas.get_width_height()
        im = data.reshape((int(h), int(w), -1))
        return im

    def plot_projection_scores(self, definitional,
                               words, n_extreme=10,
                               ax=None, axis_projection_step=None):
        """Plot the projection scalar of words on the direction.
        :param list words: The words tor project
        :param int or None n_extreme: The number of extreme words to show
        :return: The ax object of the plot
        """
        nombre_del_extremo_1 = ', '.join(definitional[0])
        nombre_del_extremo_2 = ', '.join(definitional[1])

        self._identify_direction(nombre_del_extremo_1, nombre_del_extremo_2,
                                 definitional=definitional,
                                 method='sum')

        self._is_direction_identified()

        projections_df = self._calc_projection_scores(words)
        projections_df['projection'] = projections_df['projection'].round(2)

        if n_extreme is not None:
            projections_df = take_two_sides_extreme_sorted(projections_df,
                                                           n_extreme=n_extreme)

        if ax is None:
            _, ax = plt.subplots(1)

        if axis_projection_step is None:
            axis_projection_step = 0.1

        cmap = plt.get_cmap('RdBu')
        projections_df['color'] = ((projections_df['projection'] + 0.5)
                                   .apply(cmap))

        most_extream_projection = np.round(
            projections_df['projection']
            .abs()
            .max(),
            decimals=1)

        sns.barplot(x='projection', y='word', data=projections_df,
                    palette=projections_df['color'])

        plt.xticks(np.arange(-most_extream_projection,
                             most_extream_projection + axis_projection_step,
                             axis_projection_step))
        xlabel = ('← {} {} {} →'.format(self.negative_end,
                                        ' ' * 20,
                                        self.positive_end))

        plt.xlabel(xlabel)
        plt.ylabel('Words')

        return ax


class WEBiasExplorer4d(BiasExplorer):
    def __init__(self, word_embedding) -> None:
        super().__init__(word_embedding)

    def calculate_bias(
        self,
        palabras_extremo_1,
        palabras_extremo_2,
        palabras_extremo_3,
        palabras_extremo_4,
        palabras_para_situar
    ):
        wordlists = [
            palabras_extremo_1,
            palabras_extremo_2,
            palabras_extremo_3, 
            palabras_extremo_4,
            palabras_para_situar
            ]
        err = self.check_oov(wordlists)
        for wordlist in wordlists:
            if not wordlist:
                err = "<center><h3>" + '¡Para graficar con 4 espacios, debe ingresar al menos 1 palabra en todas las listas!' + "<center><h3>"
        if err:
            return None, err

        palabras_extremo_1 = self.parse_words(palabras_extremo_1)
        palabras_extremo_2 = self.parse_words(palabras_extremo_2)
        palabras_extremo_3 = self.parse_words(palabras_extremo_3)
        palabras_extremo_4 = self.parse_words(palabras_extremo_4)

        palabras_para_situar = self.parse_words(palabras_para_situar)

        im = self.get_bias_plot(
            palabras_para_situar,
            definitional_1=(
                palabras_extremo_1, palabras_extremo_2),
            definitional_2=(
                palabras_extremo_3, palabras_extremo_4),
            method='sum',
            n_extreme=10
        )
        return im, ''

    def get_bias_plot(self,
                      palabras_para_situar,
                      definitional_1,
                      definitional_2,
                      method='sum',
                      n_extreme=10,
                      figsize=(10, 10)
                      ):

        fig, ax = plt.subplots(1, figsize=figsize)
        self.method = method
        self.plot_projection_scores(
            definitional_1,
            definitional_2,
            palabras_para_situar, n_extreme, ax=ax,)
        fig.canvas.draw()

        data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
        w, h = fig.canvas.get_width_height()
        im = data.reshape((int(h), int(w), -1))
        return im

    def plot_projection_scores(self, definitional_1, definitional_2,
                               words, n_extreme=10,
                               ax=None, axis_projection_step=None):
        """Plot the projection scalar of words on the direction.
        :param list words: The words tor project
        :param int or None n_extreme: The number of extreme words to show
        :return: The ax object of the plot
        """

        nombre_del_extremo_1 = ', '.join(definitional_1[1])
        nombre_del_extremo_2 = ', '.join(definitional_1[0])

        self._identify_direction(nombre_del_extremo_1, nombre_del_extremo_2,
                                 definitional=definitional_1,
                                 method='sum')

        self._is_direction_identified()

        projections_df = self._calc_projection_scores(words)
        projections_df['projection_x'] = projections_df['projection'].round(2)

        nombre_del_extremo_3 = ', '.join(definitional_2[1])
        nombre_del_extremo_4 = ', '.join(definitional_2[0])
        self._identify_direction(nombre_del_extremo_3, nombre_del_extremo_4,
                                 definitional=definitional_2,
                                 method='sum')

        self._is_direction_identified()

        projections_df['projection_y'] = self._calc_projection_scores(words)[
            'projection'].round(2)

        if n_extreme is not None:
            projections_df = take_two_sides_extreme_sorted(projections_df,
                                                           n_extreme=n_extreme)

        if ax is None:
            _, ax = plt.subplots(1)

        if axis_projection_step is None:
            axis_projection_step = 0.1

        cmap = plt.get_cmap('RdBu')
        projections_df['color'] = ((projections_df['projection'] + 0.5)
                                   .apply(cmap))
        most_extream_projection = np.round(
            projections_df['projection']
            .abs()
            .max(),
            decimals=1)
        sns.scatterplot(x='projection_x', y='projection_y', data=projections_df,
                        palette=projections_df['color'])

        plt.xticks(np.arange(-most_extream_projection,
                             most_extream_projection + axis_projection_step,
                             axis_projection_step))
        for _, row in (projections_df.iterrows()):
            ax.annotate(
                row['word'], (row['projection_x'], row['projection_y']))
        x_label = '← {} {} {} →'.format(nombre_del_extremo_1,
                                        ' ' * 20,
                                        nombre_del_extremo_2)

        y_label = '← {} {} {} →'.format(nombre_del_extremo_3,
                                        ' ' * 20,
                                        nombre_del_extremo_4)

        plt.xlabel(x_label)
        ax.xaxis.set_label_position('bottom')
        ax.xaxis.set_label_coords(.5, 0)

        plt.ylabel(y_label)
        ax.yaxis.set_label_position('left')
        ax.yaxis.set_label_coords(0, .5)

        ax.spines['left'].set_position('center')
        ax.spines['bottom'].set_position('center')

        ax.set_xticks([])
        ax.set_yticks([])
        #plt.yticks([], [])
        # ax.spines['left'].set_position('zero')
        # ax.spines['bottom'].set_position('zero')

        return ax