Spaces:

vialibre
/

edia_we_en

Runtime error

App Files Files Community

LMartinezEXEX commited on Dec 13, 2022

Commit

8081e11

1 Parent(s): 6ff911e

Type hinted BiasExplorer classes.

Browse files

Moved utility functions to utils.py
Backtracked on using child classes.

Files changed (3) hide show

modules/module_BiasExplorer.py +135 -314
modules/module_connection.py +13 -10
modules/utils.py +58 -0

modules/module_BiasExplorer.py CHANGED Viewed

@@ -1,67 +1,15 @@
-# ToDo: Pendiente eliminar clases/métodos que no son utilizados. Luego, unificar sintaxix e incluir typing.
 import copy
 import numpy as np
 import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt
 from sklearn.decomposition import PCA
-def take_two_sides_extreme_sorted(
-    df,
-    n_extreme,
-    part_column=None,
-    head_value='',
-    tail_value=''
-):
-    head_df = df.head(n_extreme)[:]
-    tail_df = df.tail(n_extreme)[:]
-    if part_column is not None:
-        head_df[part_column] = head_value
-        tail_df[part_column] = tail_value
-    return (pd.concat([head_df, tail_df])
-            .drop_duplicates()
-            .reset_index(drop=True))
-def normalize(v):
-    """Normalize a 1-D vector."""
-    if v.ndim != 1:
-        raise ValueError('v should be 1-D, {}-D was given'.format(
-            v.ndim))
-    norm = np.linalg.norm(v)
-    if norm == 0:
-        return v
-    return v / norm
-def project_params(u, v):
-    """Projecting and rejecting the vector v onto direction u with scalar."""
-    normalize_u = normalize(u)
-    projection = (v @ normalize_u)
-    projected_vector = projection * normalize_u
-    rejected_vector = v - projected_vector
-    return projection, projected_vector, rejected_vector
-def cosine_similarity(v, u):
-    """Calculate the cosine similarity between two vectors."""
-    v_norm = np.linalg.norm(v)
-    u_norm = np.linalg.norm(u)
-    similarity = v @ u / (v_norm * u_norm)
-    return similarity
-DIRECTION_METHODS = ['single', 'sum', 'pca']
-DEBIAS_METHODS = ['neutralize', 'hard', 'soft']
-FIRST_PC_THRESHOLD = 0.5
-MAX_NON_SPECIFIC_EXAMPLES = 1000
-__all__ = ['GenderBiasWE', 'BiasWordEmbedding']
-class WordBiasExplorer():
     def __init__(
         self,
         embedding  # Class Embedding instance
@@ -71,10 +19,11 @@ class WordBiasExplorer():
         self.direction = None
         self.positive_end = None
         self.negative_end = None
     def __copy__(
         self
-    ):
         bias_word_embedding = self.__class__(self.embedding)
         bias_word_embedding.direction = copy.deepcopy(self.direction)
@@ -84,8 +33,8 @@ class WordBiasExplorer():
     def __deepcopy__(
         self,
-        memo
-    ):
         bias_word_embedding = copy.copy(self)
         bias_word_embedding.model = copy.deepcopy(bias_word_embedding.model)
@@ -115,9 +64,9 @@ class WordBiasExplorer():
     def _identify_subspace_by_pca(
         self,
-        definitional_pairs,
-        n_components
-    ):
         matrix = []
@@ -137,15 +86,16 @@ class WordBiasExplorer():
     def _identify_direction(
         self,
-        positive_end,
-        negative_end,
-        definitional,
-        method='pca'
-    ):
-        if method not in DIRECTION_METHODS:
             raise ValueError('method should be one of {}, {} was given'.format(
-                DIRECTION_METHODS, method))
         if positive_end == negative_end:
             raise ValueError('positive_end and negative_end'
@@ -170,11 +120,11 @@ class WordBiasExplorer():
         elif method == 'pca':
             pca = self._identify_subspace_by_pca(definitional, 10)
-            if pca.explained_variance_ratio_[0] < FIRST_PC_THRESHOLD:
                 raise RuntimeError('The Explained variance'
                                    'of the first principal component should be'
                                    'at least {}, but it is {}'
-                                   .format(FIRST_PC_THRESHOLD,
                                            pca.explained_variance_ratio_[0]))
             direction = pca.components_[0]
@@ -193,7 +143,7 @@ class WordBiasExplorer():
     def project_on_direction(
         self,
         word: str
-    ):
         """Project the normalized vector of the word on the direction.
         :param str word: The word tor project
@@ -209,8 +159,8 @@ class WordBiasExplorer():
     def _calc_projection_scores(
         self,
-        words
-    ):
         self._is_direction_identified()
@@ -225,8 +175,8 @@ class WordBiasExplorer():
     def calc_projection_data(
         self,
-        words
-    ):
         """
         Calculate projection, projected and rejected vectors of a words list.
@@ -254,9 +204,9 @@ class WordBiasExplorer():
     def plot_dist_projections_on_direction(
         self,
-        word_groups,
-        ax=None
-    ):
         """Plot the projection scalars distribution on the direction.
         :param dict word_groups word: The groups to projects
@@ -289,8 +239,8 @@ class WordBiasExplorer():
     def __errorChecking(
         self,
-        word
-    ):
         out_msj = ""
@@ -304,8 +254,8 @@ class WordBiasExplorer():
     def check_oov(
         self,
-        wordlists
-    ):
         for wordlist in wordlists:
             for word in wordlist:
@@ -314,201 +264,72 @@ class WordBiasExplorer():
                     return msg
         return None
-    def plot_biased_words(
-        self,
-        words_to_diagnose,
-        wordlist_right,
-        wordlist_left,
-        wordlist_top=[],
-        wordlist_bottom=[]
-    ):
-        bias_2D = wordlist_top == [] and wordlist_bottom == []
-        if bias_2D and (not wordlist_right or not wordlist_left):
-            raise Exception('For bar plot, wordlist right and left can NOT be empty')
-        elif not bias_2D and (not wordlist_right or not wordlist_left or not wordlist_top or not wordlist_bottom):
-            raise Exception('For plane plot, wordlist right, left, top and down can NOT be empty')
-        err = self.check_oov([words_to_diagnose + wordlist_right + wordlist_left + wordlist_top + wordlist_bottom])
         if err:
             raise Exception(err)
         return self.get_bias_plot(
-            bias_2D,
-            words_to_diagnose,
-            definitional_1=(wordlist_right, wordlist_left),
-            definitional_2=(wordlist_top, wordlist_bottom)
-        )
     def get_bias_plot(
         self,
-        plot_2D,
-        words_to_diagnose,
-        definitional_1,
-        definitional_2=([], []),
-        method='sum',
-        n_extreme=10,
-        figsize=(15, 10)
-    ):
-        fig, ax = plt.subplots(1, figsize=figsize)
-        self.method = method
-        self.plot_projection_scores(plot_2D, words_to_diagnose, definitional_1, definitional_2, n_extreme, ax)
-        if plot_2D:
-            fig.tight_layout()
-        fig.canvas.draw()
-        return fig
-    def plot_projection_scores(
-        self,
-        plot_2D,
-        words,
-        definitional_1,
-        definitional_2=([], []),
-        n_extreme=10,
-        ax=None,
-        axis_projection_step=0.1
-    ):
-        name_left  = ', '.join(definitional_1[1])
-        name_right = ', '.join(definitional_1[0])
-        self._identify_direction(name_left, name_right, definitional=definitional_1, method='sum')
-        self._is_direction_identified()
-        projections_df = self._calc_projection_scores(words)
-        projections_df['projection_x'] = projections_df['projection'].round(2)
-        if not plot_2D:
-            name_top    = ', '.join(definitional_2[1])
-            name_bottom = ', '.join(definitional_2[0])
-            self._identify_direction(name_top, name_bottom, definitional=definitional_2, method='sum')
-            self._is_direction_identified()
-            projections_df['projection_y'] = self._calc_projection_scores(words)['projection'].round(2)
-        if n_extreme is not None:
-            projections_df = take_two_sides_extreme_sorted(projections_df, n_extreme=n_extreme)
-        if ax is None:
-            _, ax = plt.subplots(1)
-        cmap = plt.get_cmap('RdBu')
-        projections_df['color'] = ((projections_df['projection'] + 0.5).apply(cmap))
-        most_extream_projection = np.round(
-            projections_df['projection']
-            .abs()
-            .max(),
-            decimals=1)
-        if plot_2D:
-            sns.barplot(x='projection', y='word', data=projections_df,
-                    palette=projections_df['color'])
-        else:
-            # ToDo: revisar este warning:
-            # Ignoring `palette` because no `hue` variable has been assigned. sns.scatterplot(x='projection_x', y='projection_y', data=projections_df,
-            sns.scatterplot(x='projection_x', y='projection_y', data=projections_df,
-                        palette=projections_df['color'])
-        plt.xticks(np.arange(-most_extream_projection,
-                             most_extream_projection + axis_projection_step,
-                             axis_projection_step))
-        x_label = '← {} {} {} →'.format(name_left,
-                                        ' ' * 20,
-                                        name_right)
-        if not plot_2D:
-            y_label = '← {} {} {} →'.format(name_top,
-                                        ' ' * 20,
-                                        name_bottom)
-            for _, row in (projections_df.iterrows()):
-                ax.annotate(row['word'], (row['projection_x'], row['projection_y']))
-        plt.xlabel(x_label)
-        plt.ylabel('Words')
-        if not plot_2D:
-            ax.xaxis.set_label_position('bottom')
-            ax.xaxis.set_label_coords(.5, 0)
-            plt.ylabel(y_label)
-            ax.yaxis.set_label_position('left')
-            ax.yaxis.set_label_coords(0, .5)
-            ax.spines['left'].set_position('center')
-            ax.spines['bottom'].set_position('center')
-            ax.set_xticks([])
-            ax.set_yticks([])
-        return ax
-# TODO: Would be erased if decided to keep all info in BiasWordExplorer
-class WEBiasExplorer2d(WordBiasExplorer):
-    def __init__(self, word_embedding) -> None:
-        super().__init__(word_embedding)
-    def calculate_bias( self,
-                        palabras_extremo_1,
-                        palabras_extremo_2,
-                        palabras_para_situar
-                        ):
-        wordlists = [palabras_extremo_1, palabras_extremo_2, palabras_para_situar]
-        err = self.check_oov(wordlists)
-        for wordlist in wordlists:
-            if not wordlist:
-                err = "<center><h3>" + 'Debe ingresar al menos 1 palabra en las lista de palabras a diagnosticar, sesgo 1 y sesgo 2' + "<center><h3>"
-        if err:
-            return None, err
-        im = self.get_bias_plot(
-            palabras_para_situar,
-            definitional=(
-                palabras_extremo_1, palabras_extremo_2),
-            method='sum',
-            n_extreme=10
-        )
-        return im, ''
-    def get_bias_plot(self,
-                      palabras_para_situar,
-                      definitional,
-                      method='sum',
-                      n_extreme=10,
-                      figsize=(10, 10)
-                      ):
         fig, ax = plt.subplots(1, figsize=figsize)
         self.method = method
         self.plot_projection_scores(
             definitional,
-            palabras_para_situar, n_extreme, ax=ax,)
         fig.tight_layout()
         fig.canvas.draw()
-        data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
-        w, h = fig.canvas.get_width_height()
-        im = data.reshape((int(h), int(w), -1))
-        return im
-    def plot_projection_scores(self, definitional,
-                               words, n_extreme=10,
-                               ax=None, axis_projection_step=None):
         """Plot the projection scalar of words on the direction.
         :param list words: The words tor project
         :param int or None n_extreme: The number of extreme words to show
         :return: The ax object of the plot
         """
-        nombre_del_extremo_1 = ', '.join(definitional[0])
-        nombre_del_extremo_2 = ', '.join(definitional[1])
-        self._identify_direction(nombre_del_extremo_1, nombre_del_extremo_2,
                                  definitional=definitional,
                                  method='sum')
@@ -553,80 +374,83 @@ class WEBiasExplorer2d(WordBiasExplorer):
         return ax
-class WEBiasExplorer4d(WordBiasExplorer):
-    def __init__(self, word_embedding) -> None:
-        super().__init__(word_embedding)
-    def calculate_bias( self,
-                        palabras_extremo_1,
-                        palabras_extremo_2,
-                        palabras_extremo_3,
-                        palabras_extremo_4,
-                        palabras_para_situar
-                        ):
         wordlists = [
-            palabras_extremo_1,
-            palabras_extremo_2,
-            palabras_extremo_3,
-            palabras_extremo_4,
-            palabras_para_situar
         ]
         for wordlist in wordlists:
             if not wordlist:
-                err = "<center><h3>" + \
-                    '¡Para graficar con 4 espacios, debe ingresar al menos 1 palabra en todas las listas!' + "<center><h3>"
         err = self.check_oov(wordlist)
         if err:
-            return None, err
-        im = self.get_bias_plot(
-            palabras_para_situar,
-            definitional_1=(
-                palabras_extremo_1, palabras_extremo_2),
-            definitional_2=(
-                palabras_extremo_3, palabras_extremo_4),
-            method='sum',
-            n_extreme=10
-        )
-        return im, ''
-    def get_bias_plot(self,
-                      palabras_para_situar,
-                      definitional_1,
-                      definitional_2,
-                      method='sum',
-                      n_extreme=10,
-                      figsize=(10, 10)
-                      ):
         fig, ax = plt.subplots(1, figsize=figsize)
         self.method = method
         self.plot_projection_scores(
             definitional_1,
             definitional_2,
-            palabras_para_situar, n_extreme, ax=ax,)
         fig.canvas.draw()
-        data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
-        w, h = fig.canvas.get_width_height()
-        im = data.reshape((int(h), int(w), -1))
-        return im
-    def plot_projection_scores(self, definitional_1, definitional_2,
-                               words, n_extreme=10,
-                               ax=None, axis_projection_step=None):
         """Plot the projection scalar of words on the direction.
         :param list words: The words tor project
         :param int or None n_extreme: The number of extreme words to show
         :return: The ax object of the plot
         """
-        nombre_del_extremo_1 = ', '.join(definitional_1[1])
-        nombre_del_extremo_2 = ', '.join(definitional_1[0])
-        self._identify_direction(nombre_del_extremo_1, nombre_del_extremo_2,
                                  definitional=definitional_1,
                                  method='sum')
@@ -635,9 +459,9 @@ class WEBiasExplorer4d(WordBiasExplorer):
         projections_df = self._calc_projection_scores(words)
         projections_df['projection_x'] = projections_df['projection'].round(2)
-        nombre_del_extremo_3 = ', '.join(definitional_2[1])
-        nombre_del_extremo_4 = ', '.join(definitional_2[0])
-        self._identify_direction(nombre_del_extremo_3, nombre_del_extremo_4,
                                  definitional=definitional_2,
                                  method='sum')
@@ -673,13 +497,13 @@ class WEBiasExplorer4d(WordBiasExplorer):
         for _, row in (projections_df.iterrows()):
             ax.annotate(
                 row['word'], (row['projection_x'], row['projection_y']))
-        x_label = '← {} {} {} →'.format(nombre_del_extremo_1,
                                         ' ' * 20,
-                                        nombre_del_extremo_2)
-        y_label = '← {} {} {} →'.format(nombre_del_extremo_3,
                                         ' ' * 20,
-                                        nombre_del_extremo_4)
         plt.xlabel(x_label)
         ax.xaxis.set_label_position('bottom')
@@ -694,8 +518,5 @@ class WEBiasExplorer4d(WordBiasExplorer):
         ax.set_xticks([])
         ax.set_yticks([])
-        #plt.yticks([], [])
-        # ax.spines['left'].set_position('zero')
-        # ax.spines['bottom'].set_position('zero')
-        return ax

 import copy
 import numpy as np
 import pandas as pd
 import seaborn as sns
 import matplotlib.pyplot as plt
 from sklearn.decomposition import PCA
+from typing import List, Dict, Tuple, Optional, Any
+from modules.utils import normalize, cosine_similarity, project_params, take_two_sides_extreme_sorted
+__all__ = ['WordBiasExplorer', 'WEBiasExplorer2Spaces', 'WEBiasExplorer4Spaces']
+class WordBiasExplorer:
     def __init__(
         self,
         embedding  # Class Embedding instance
         self.direction = None
         self.positive_end = None
         self.negative_end = None
+        self.DIRECTION_METHODS = ['single', 'sum', 'pca']
     def __copy__(
         self
+    ) -> 'WordBiasExplorer':
         bias_word_embedding = self.__class__(self.embedding)
         bias_word_embedding.direction = copy.deepcopy(self.direction)
     def __deepcopy__(
         self,
+        memo: Optional[Dict[int, Any]]
+    )-> 'WordBiasExplorer':
         bias_word_embedding = copy.copy(self)
         bias_word_embedding.model = copy.deepcopy(bias_word_embedding.model)
     def _identify_subspace_by_pca(
         self,
+        definitional_pairs: List[Tuple[str, str]],
+        n_components: int
+    ) -> PCA:
         matrix = []
     def _identify_direction(
         self,
+        positive_end: str,
+        negative_end: str,
+        definitional: Tuple[str, str],
+        method: str='pca',
+        first_pca_threshold: float=0.5
+    ) -> None:
+        if method not in self.DIRECTION_METHODS:
             raise ValueError('method should be one of {}, {} was given'.format(
+                self.DIRECTION_METHODS, method))
         if positive_end == negative_end:
             raise ValueError('positive_end and negative_end'
         elif method == 'pca':
             pca = self._identify_subspace_by_pca(definitional, 10)
+            if pca.explained_variance_ratio_[0] < first_pca_threshold:
                 raise RuntimeError('The Explained variance'
                                    'of the first principal component should be'
                                    'at least {}, but it is {}'
+                                   .format(first_pca_threshold,
                                            pca.explained_variance_ratio_[0]))
             direction = pca.components_[0]
     def project_on_direction(
         self,
         word: str
+    ) -> float:
         """Project the normalized vector of the word on the direction.
         :param str word: The word tor project
     def _calc_projection_scores(
         self,
+        words: List[str]
+    ) -> pd.DataFrame:
         self._is_direction_identified()
     def calc_projection_data(
         self,
+        words: List[str]
+    ) -> pd.DataFrame:
         """
         Calculate projection, projected and rejected vectors of a words list.
     def plot_dist_projections_on_direction(
         self,
+        word_groups: Dict[str, List[str]],
+        ax: plt.Axes=None
+    ) -> plt.Axes:
         """Plot the projection scalars distribution on the direction.
         :param dict word_groups word: The groups to projects
     def __errorChecking(
         self,
+        word: str
+    ) -> str:
         out_msj = ""
     def check_oov(
         self,
+        wordlists: List[str]
+    ) -> str:
         for wordlist in wordlists:
             for word in wordlist:
                     return msg
         return None
+class WEBiasExplorer2Spaces(WordBiasExplorer):
+    def __init__(self, embedding) -> None:
+        super().__init__(embedding)
+    def calculate_bias(
+        self,
+        wordlist_to_diagnose: List[str],
+        wordlist_right: List[str],
+        wordlist_left: List[str]
+    ) -> plt.Figure:
+        wordlists = [wordlist_to_diagnose, wordlist_right, wordlist_left]
+        for wordlist in wordlists:
+            if not wordlist:
+                raise Exception('At least one word should be in the to diagnose list, bias 1 list and bias 2 list')
+        err = self.check_oov(wordlists)
         if err:
             raise Exception(err)
         return self.get_bias_plot(
+                wordlist_to_diagnose,
+                definitional=(wordlist_left, wordlist_right),
+                method='sum',
+                n_extreme=10
+            )
     def get_bias_plot(
         self,
+        wordlist_to_diagnose: List[str],
+        definitional: Tuple[List[str], List[str]],
+        method: str='sum',
+        n_extreme: int=10,
+        figsize: Tuple[int, int]=(10, 10)
+    ) -> plt.Figure:
         fig, ax = plt.subplots(1, figsize=figsize)
         self.method = method
         self.plot_projection_scores(
             definitional,
+            wordlist_to_diagnose, n_extreme, ax=ax,)
         fig.tight_layout()
         fig.canvas.draw()
+        return fig
+    def plot_projection_scores(
+        self,
+        definitional: Tuple[List[str], List[str]],
+        words: List[str],
+        n_extreme: int=10,
+        ax: plt.Axes=None,
+        axis_projection_step: float=None
+    ) -> plt.Axes:
         """Plot the projection scalar of words on the direction.
         :param list words: The words tor project
         :param int or None n_extreme: The number of extreme words to show
         :return: The ax object of the plot
         """
+        name_left = ', '.join(definitional[0])
+        name_right = ', '.join(definitional[1])
+        self._identify_direction(name_left, name_right,
                                  definitional=definitional,
                                  method='sum')
         return ax
+class WEBiasExplorer4Spaces(WordBiasExplorer):
+    def __init__(self, embedding) -> None:
+        super().__init__(embedding)
+    def calculate_bias(
+        self,
+        wordlist_to_diagnose: List[str],
+        wordlist_right: List[str],
+        wordlist_left: List[str],
+        wordlist_top: List[str],
+        wordlist_bottom: List[str],
+    ) -> plt.Figure:
         wordlists = [
+            wordlist_to_diagnose,
+            wordlist_left,
+            wordlist_right,
+            wordlist_top,
+            wordlist_bottom
         ]
         for wordlist in wordlists:
             if not wordlist:
+                raise Exception('To plot with 4 spaces, you must enter at least one word in all lists')
         err = self.check_oov(wordlist)
         if err:
+            raise Exception(err)
+        return self.get_bias_plot(
+                wordlist_to_diagnose,
+                definitional_1=(wordlist_right, wordlist_left),
+                definitional_2=(wordlist_top, wordlist_bottom),
+                method='sum',
+                n_extreme=10
+            )
+    def get_bias_plot(
+        self,
+        wordlist_to_diagnose: List[str],
+        definitional_1: Tuple[List[str], List[str]],
+        definitional_2: Tuple[List[str], List[str]],
+        method: str='sum',
+        n_extreme: int=10,
+        figsize: Tuple[int, int]=(10, 10)
+    ) -> plt.Figure:
         fig, ax = plt.subplots(1, figsize=figsize)
         self.method = method
         self.plot_projection_scores(
             definitional_1,
             definitional_2,
+            wordlist_to_diagnose, n_extreme, ax=ax,)
         fig.canvas.draw()
+        return fig
+    def plot_projection_scores(
+        self,
+        definitional_1: Tuple[List[str], List[str]],
+        definitional_2: Tuple[List[str], List[str]],
+        words: List[str],
+        n_extreme: int=10,
+        ax: plt.Axes=None,
+        axis_projection_step: float=None
+    ) -> plt.Axes:
         """Plot the projection scalar of words on the direction.
         :param list words: The words tor project
         :param int or None n_extreme: The number of extreme words to show
         :return: The ax object of the plot
         """
+        name_left = ', '.join(definitional_1[1])
+        name_right = ', '.join(definitional_1[0])
+        self._identify_direction(name_left, name_right,
                                  definitional=definitional_1,
                                  method='sum')
         projections_df = self._calc_projection_scores(words)
         projections_df['projection_x'] = projections_df['projection'].round(2)
+        name_top = ', '.join(definitional_2[1])
+        name_bottom = ', '.join(definitional_2[0])
+        self._identify_direction(name_top, name_bottom,
                                  definitional=definitional_2,
                                  method='sum')
         for _, row in (projections_df.iterrows()):
             ax.annotate(
                 row['word'], (row['projection_x'], row['projection_y']))
+        x_label = '← {} {} {} →'.format(name_left,
                                         ' ' * 20,
+                                        name_right)
+        y_label = '← {} {} {} →'.format(name_top,
                                         ' ' * 20,
+                                        name_bottom)
         plt.xlabel(x_label)
         ax.xaxis.set_label_position('bottom')
         ax.set_xticks([])
         ax.set_yticks([])
+        return ax

modules/module_connection.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from abc import ABC
 from modules.module_WordExplorer import WordExplorer
-from modules.module_BiasExplorer import WordBiasExplorer
 from typing import List, Tuple
@@ -120,7 +120,10 @@ class BiasWordExplorerConnector(Connector):
         else:
             raise KeyError
-        self.bias_word_explorer = WordBiasExplorer(
             embedding=embedding
         )
@@ -143,14 +146,14 @@ class BiasWordExplorerConnector(Connector):
         if err:
             return None, self.process_error(err)
-        err = self.bias_word_explorer.check_oov(word_lists)
         if err:
             return None, self.process_error(err)
-        fig = self.bias_word_explorer.plot_biased_words(
             to_diagnose_list,
-            wordlist_2,
-            wordlist_1
         )
         return fig, self.process_error(err)
@@ -174,20 +177,20 @@ class BiasWordExplorerConnector(Connector):
         wordlists = [wordlist_1, wordlist_2, wordlist_3, wordlist_4, to_diagnose_list]
         for _list in wordlists:
             if not _list:
-                err = "To plot with 4 spaces, you must enter at least one word in all lists."
         if err:
             return None, self.process_error(err)
-        err = self.bias_word_explorer.check_oov(wordlists)
         if err:
             return None, self.process_error(err)
-        fig = self.bias_word_explorer.plot_biased_words(
             to_diagnose_list,
             wordlist_1,
             wordlist_2,
             wordlist_3,
             wordlist_4
         )
         return fig, self.process_error(err)

 from abc import ABC
 from modules.module_WordExplorer import WordExplorer
+from modules.module_BiasExplorer import WEBiasExplorer2Spaces, WEBiasExplorer4Spaces
 from typing import List, Tuple
         else:
             raise KeyError
+        self.bias_word_explorer_2_spaces = WEBiasExplorer2Spaces(
+            embedding=embedding
+        )
+        self.bias_word_explorer_4_spaces = WEBiasExplorer4Spaces(
             embedding=embedding
         )
         if err:
             return None, self.process_error(err)
+        err = self.bias_word_explorer_2_spaces.check_oov(word_lists)
         if err:
             return None, self.process_error(err)
+        fig = self.bias_word_explorer_2_spaces.calculate_bias(
             to_diagnose_list,
+            wordlist_1,
+            wordlist_2
         )
         return fig, self.process_error(err)
         wordlists = [wordlist_1, wordlist_2, wordlist_3, wordlist_4, to_diagnose_list]
         for _list in wordlists:
             if not _list:
+                err = "To plot with 4 spaces, you must enter at least one word in all lists"
         if err:
             return None, self.process_error(err)
+        err = self.bias_word_explorer_4_spaces.check_oov(wordlists)
         if err:
             return None, self.process_error(err)
+        fig = self.bias_word_explorer_4_spaces.calculate_bias(
             to_diagnose_list,
             wordlist_1,
             wordlist_2,
             wordlist_3,
             wordlist_4
         )
         return fig, self.process_error(err)

modules/utils.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import numpy as np
+import pandas as pd
+def take_two_sides_extreme_sorted(
+    df: pd.DataFrame,
+    n_extreme: int,
+    part_column: str=None,
+    head_value: str='',
+    tail_value: str=''
+) -> pd.DataFrame:
+    head_df = df.head(n_extreme)[:]
+    tail_df = df.tail(n_extreme)[:]
+    if part_column is not None:
+        head_df[part_column] = head_value
+        tail_df[part_column] = tail_value
+    return (pd.concat([head_df, tail_df])
+            .drop_duplicates()
+            .reset_index(drop=True))
+def normalize(
+    v: np.ndarray
+) -> np.ndarray:
+    """Normalize a 1-D vector."""
+    if v.ndim != 1:
+        raise ValueError('v should be 1-D, {}-D was given'.format(
+            v.ndim))
+    norm = np.linalg.norm(v)
+    if norm == 0:
+        return v
+    return v / norm
+def project_params(
+    u: np.ndarray,
+    v: np.ndarray
+) -> np.ndarray:
+    """Projecting and rejecting the vector v onto direction u with scalar."""
+    normalize_u = normalize(u)
+    projection = (v @ normalize_u)
+    projected_vector = projection * normalize_u
+    rejected_vector = v - projected_vector
+    return projection, projected_vector, rejected_vector
+def cosine_similarity(
+    v: np.ndarray,
+    u: np.ndarray
+) -> np.ndarray:
+    """Calculate the cosine similarity between two vectors."""
+    v_norm = np.linalg.norm(v)
+    u_norm = np.linalg.norm(u)
+    similarity = v @ u / (v_norm * u_norm)
+    return similarity