LMartinezEXEX commited on
Commit
8081e11
·
1 Parent(s): 6ff911e

Type hinted BiasExplorer classes.

Browse files

Moved utility functions to utils.py
Backtracked on using child classes.

modules/module_BiasExplorer.py CHANGED
@@ -1,67 +1,15 @@
1
- # ToDo: Pendiente eliminar clases/métodos que no son utilizados. Luego, unificar sintaxix e incluir typing.
2
-
3
  import copy
4
  import numpy as np
5
  import pandas as pd
6
  import seaborn as sns
7
  import matplotlib.pyplot as plt
8
  from sklearn.decomposition import PCA
 
 
9
 
10
- def take_two_sides_extreme_sorted(
11
- df,
12
- n_extreme,
13
- part_column=None,
14
- head_value='',
15
- tail_value=''
16
- ):
17
-
18
- head_df = df.head(n_extreme)[:]
19
- tail_df = df.tail(n_extreme)[:]
20
-
21
- if part_column is not None:
22
- head_df[part_column] = head_value
23
- tail_df[part_column] = tail_value
24
-
25
- return (pd.concat([head_df, tail_df])
26
- .drop_duplicates()
27
- .reset_index(drop=True))
28
-
29
- def normalize(v):
30
- """Normalize a 1-D vector."""
31
- if v.ndim != 1:
32
- raise ValueError('v should be 1-D, {}-D was given'.format(
33
- v.ndim))
34
- norm = np.linalg.norm(v)
35
- if norm == 0:
36
- return v
37
- return v / norm
38
-
39
- def project_params(u, v):
40
- """Projecting and rejecting the vector v onto direction u with scalar."""
41
- normalize_u = normalize(u)
42
- projection = (v @ normalize_u)
43
- projected_vector = projection * normalize_u
44
- rejected_vector = v - projected_vector
45
- return projection, projected_vector, rejected_vector
46
-
47
-
48
- def cosine_similarity(v, u):
49
- """Calculate the cosine similarity between two vectors."""
50
- v_norm = np.linalg.norm(v)
51
- u_norm = np.linalg.norm(u)
52
- similarity = v @ u / (v_norm * u_norm)
53
- return similarity
54
-
55
-
56
- DIRECTION_METHODS = ['single', 'sum', 'pca']
57
- DEBIAS_METHODS = ['neutralize', 'hard', 'soft']
58
- FIRST_PC_THRESHOLD = 0.5
59
- MAX_NON_SPECIFIC_EXAMPLES = 1000
60
-
61
- __all__ = ['GenderBiasWE', 'BiasWordEmbedding']
62
-
63
-
64
- class WordBiasExplorer():
65
  def __init__(
66
  self,
67
  embedding # Class Embedding instance
@@ -71,10 +19,11 @@ class WordBiasExplorer():
71
  self.direction = None
72
  self.positive_end = None
73
  self.negative_end = None
 
74
 
75
  def __copy__(
76
  self
77
- ):
78
 
79
  bias_word_embedding = self.__class__(self.embedding)
80
  bias_word_embedding.direction = copy.deepcopy(self.direction)
@@ -84,8 +33,8 @@ class WordBiasExplorer():
84
 
85
  def __deepcopy__(
86
  self,
87
- memo
88
- ):
89
 
90
  bias_word_embedding = copy.copy(self)
91
  bias_word_embedding.model = copy.deepcopy(bias_word_embedding.model)
@@ -115,9 +64,9 @@ class WordBiasExplorer():
115
 
116
  def _identify_subspace_by_pca(
117
  self,
118
- definitional_pairs,
119
- n_components
120
- ):
121
 
122
  matrix = []
123
 
@@ -137,15 +86,16 @@ class WordBiasExplorer():
137
 
138
  def _identify_direction(
139
  self,
140
- positive_end,
141
- negative_end,
142
- definitional,
143
- method='pca'
144
- ):
 
145
 
146
- if method not in DIRECTION_METHODS:
147
  raise ValueError('method should be one of {}, {} was given'.format(
148
- DIRECTION_METHODS, method))
149
 
150
  if positive_end == negative_end:
151
  raise ValueError('positive_end and negative_end'
@@ -170,11 +120,11 @@ class WordBiasExplorer():
170
 
171
  elif method == 'pca':
172
  pca = self._identify_subspace_by_pca(definitional, 10)
173
- if pca.explained_variance_ratio_[0] < FIRST_PC_THRESHOLD:
174
  raise RuntimeError('The Explained variance'
175
  'of the first principal component should be'
176
  'at least {}, but it is {}'
177
- .format(FIRST_PC_THRESHOLD,
178
  pca.explained_variance_ratio_[0]))
179
  direction = pca.components_[0]
180
 
@@ -193,7 +143,7 @@ class WordBiasExplorer():
193
  def project_on_direction(
194
  self,
195
  word: str
196
- ):
197
 
198
  """Project the normalized vector of the word on the direction.
199
  :param str word: The word tor project
@@ -209,8 +159,8 @@ class WordBiasExplorer():
209
 
210
  def _calc_projection_scores(
211
  self,
212
- words
213
- ):
214
 
215
  self._is_direction_identified()
216
 
@@ -225,8 +175,8 @@ class WordBiasExplorer():
225
 
226
  def calc_projection_data(
227
  self,
228
- words
229
- ):
230
 
231
  """
232
  Calculate projection, projected and rejected vectors of a words list.
@@ -254,9 +204,9 @@ class WordBiasExplorer():
254
 
255
  def plot_dist_projections_on_direction(
256
  self,
257
- word_groups,
258
- ax=None
259
- ):
260
 
261
  """Plot the projection scalars distribution on the direction.
262
  :param dict word_groups word: The groups to projects
@@ -289,8 +239,8 @@ class WordBiasExplorer():
289
 
290
  def __errorChecking(
291
  self,
292
- word
293
- ):
294
 
295
  out_msj = ""
296
 
@@ -304,8 +254,8 @@ class WordBiasExplorer():
304
 
305
  def check_oov(
306
  self,
307
- wordlists
308
- ):
309
 
310
  for wordlist in wordlists:
311
  for word in wordlist:
@@ -314,201 +264,72 @@ class WordBiasExplorer():
314
  return msg
315
  return None
316
 
317
- def plot_biased_words(
318
- self,
319
- words_to_diagnose,
320
- wordlist_right,
321
- wordlist_left,
322
- wordlist_top=[],
323
- wordlist_bottom=[]
324
- ):
325
 
326
- bias_2D = wordlist_top == [] and wordlist_bottom == []
 
 
 
 
 
327
 
328
- if bias_2D and (not wordlist_right or not wordlist_left):
329
- raise Exception('For bar plot, wordlist right and left can NOT be empty')
330
- elif not bias_2D and (not wordlist_right or not wordlist_left or not wordlist_top or not wordlist_bottom):
331
- raise Exception('For plane plot, wordlist right, left, top and down can NOT be empty')
332
 
333
- err = self.check_oov([words_to_diagnose + wordlist_right + wordlist_left + wordlist_top + wordlist_bottom])
 
 
 
 
334
  if err:
335
  raise Exception(err)
336
 
337
  return self.get_bias_plot(
338
- bias_2D,
339
- words_to_diagnose,
340
- definitional_1=(wordlist_right, wordlist_left),
341
- definitional_2=(wordlist_top, wordlist_bottom)
342
- )
343
 
344
  def get_bias_plot(
345
  self,
346
- plot_2D,
347
- words_to_diagnose,
348
- definitional_1,
349
- definitional_2=([], []),
350
- method='sum',
351
- n_extreme=10,
352
- figsize=(15, 10)
353
- ):
354
-
355
- fig, ax = plt.subplots(1, figsize=figsize)
356
- self.method = method
357
- self.plot_projection_scores(plot_2D, words_to_diagnose, definitional_1, definitional_2, n_extreme, ax)
358
-
359
- if plot_2D:
360
- fig.tight_layout()
361
- fig.canvas.draw()
362
-
363
- return fig
364
-
365
- def plot_projection_scores(
366
- self,
367
- plot_2D,
368
- words,
369
- definitional_1,
370
- definitional_2=([], []),
371
- n_extreme=10,
372
- ax=None,
373
- axis_projection_step=0.1
374
- ):
375
-
376
- name_left = ', '.join(definitional_1[1])
377
- name_right = ', '.join(definitional_1[0])
378
-
379
- self._identify_direction(name_left, name_right, definitional=definitional_1, method='sum')
380
- self._is_direction_identified()
381
-
382
- projections_df = self._calc_projection_scores(words)
383
- projections_df['projection_x'] = projections_df['projection'].round(2)
384
-
385
- if not plot_2D:
386
- name_top = ', '.join(definitional_2[1])
387
- name_bottom = ', '.join(definitional_2[0])
388
- self._identify_direction(name_top, name_bottom, definitional=definitional_2, method='sum')
389
- self._is_direction_identified()
390
-
391
- projections_df['projection_y'] = self._calc_projection_scores(words)['projection'].round(2)
392
-
393
- if n_extreme is not None:
394
- projections_df = take_two_sides_extreme_sorted(projections_df, n_extreme=n_extreme)
395
-
396
- if ax is None:
397
- _, ax = plt.subplots(1)
398
-
399
- cmap = plt.get_cmap('RdBu')
400
- projections_df['color'] = ((projections_df['projection'] + 0.5).apply(cmap))
401
- most_extream_projection = np.round(
402
- projections_df['projection']
403
- .abs()
404
- .max(),
405
- decimals=1)
406
-
407
- if plot_2D:
408
- sns.barplot(x='projection', y='word', data=projections_df,
409
- palette=projections_df['color'])
410
- else:
411
- # ToDo: revisar este warning:
412
- # Ignoring `palette` because no `hue` variable has been assigned. sns.scatterplot(x='projection_x', y='projection_y', data=projections_df,
413
-
414
- sns.scatterplot(x='projection_x', y='projection_y', data=projections_df,
415
- palette=projections_df['color'])
416
-
417
- plt.xticks(np.arange(-most_extream_projection,
418
- most_extream_projection + axis_projection_step,
419
- axis_projection_step))
420
-
421
- x_label = '← {} {} {} →'.format(name_left,
422
- ' ' * 20,
423
- name_right)
424
- if not plot_2D:
425
- y_label = '← {} {} {} →'.format(name_top,
426
- ' ' * 20,
427
- name_bottom)
428
- for _, row in (projections_df.iterrows()):
429
- ax.annotate(row['word'], (row['projection_x'], row['projection_y']))
430
-
431
- plt.xlabel(x_label)
432
- plt.ylabel('Words')
433
-
434
- if not plot_2D:
435
- ax.xaxis.set_label_position('bottom')
436
- ax.xaxis.set_label_coords(.5, 0)
437
-
438
- plt.ylabel(y_label)
439
- ax.yaxis.set_label_position('left')
440
- ax.yaxis.set_label_coords(0, .5)
441
-
442
- ax.spines['left'].set_position('center')
443
- ax.spines['bottom'].set_position('center')
444
-
445
- ax.set_xticks([])
446
- ax.set_yticks([])
447
-
448
- return ax
449
-
450
- # TODO: Would be erased if decided to keep all info in BiasWordExplorer
451
- class WEBiasExplorer2d(WordBiasExplorer):
452
- def __init__(self, word_embedding) -> None:
453
- super().__init__(word_embedding)
454
-
455
- def calculate_bias( self,
456
- palabras_extremo_1,
457
- palabras_extremo_2,
458
- palabras_para_situar
459
- ):
460
- wordlists = [palabras_extremo_1, palabras_extremo_2, palabras_para_situar]
461
-
462
- err = self.check_oov(wordlists)
463
- for wordlist in wordlists:
464
- if not wordlist:
465
- err = "<center><h3>" + 'Debe ingresar al menos 1 palabra en las lista de palabras a diagnosticar, sesgo 1 y sesgo 2' + "<center><h3>"
466
- if err:
467
- return None, err
468
-
469
- im = self.get_bias_plot(
470
- palabras_para_situar,
471
- definitional=(
472
- palabras_extremo_1, palabras_extremo_2),
473
- method='sum',
474
- n_extreme=10
475
- )
476
- return im, ''
477
-
478
- def get_bias_plot(self,
479
- palabras_para_situar,
480
- definitional,
481
- method='sum',
482
- n_extreme=10,
483
- figsize=(10, 10)
484
- ):
485
 
486
  fig, ax = plt.subplots(1, figsize=figsize)
487
  self.method = method
488
  self.plot_projection_scores(
489
  definitional,
490
- palabras_para_situar, n_extreme, ax=ax,)
491
 
492
  fig.tight_layout()
493
  fig.canvas.draw()
494
 
495
- data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
496
- w, h = fig.canvas.get_width_height()
497
- im = data.reshape((int(h), int(w), -1))
498
- return im
 
 
 
 
 
 
499
 
500
- def plot_projection_scores(self, definitional,
501
- words, n_extreme=10,
502
- ax=None, axis_projection_step=None):
503
  """Plot the projection scalar of words on the direction.
504
  :param list words: The words tor project
505
  :param int or None n_extreme: The number of extreme words to show
506
  :return: The ax object of the plot
507
  """
508
- nombre_del_extremo_1 = ', '.join(definitional[0])
509
- nombre_del_extremo_2 = ', '.join(definitional[1])
510
 
511
- self._identify_direction(nombre_del_extremo_1, nombre_del_extremo_2,
512
  definitional=definitional,
513
  method='sum')
514
 
@@ -553,80 +374,83 @@ class WEBiasExplorer2d(WordBiasExplorer):
553
  return ax
554
 
555
 
556
- class WEBiasExplorer4d(WordBiasExplorer):
557
- def __init__(self, word_embedding) -> None:
558
- super().__init__(word_embedding)
 
 
 
 
 
 
 
 
 
559
 
560
- def calculate_bias( self,
561
- palabras_extremo_1,
562
- palabras_extremo_2,
563
- palabras_extremo_3,
564
- palabras_extremo_4,
565
- palabras_para_situar
566
- ):
567
  wordlists = [
568
- palabras_extremo_1,
569
- palabras_extremo_2,
570
- palabras_extremo_3,
571
- palabras_extremo_4,
572
- palabras_para_situar
573
  ]
 
574
  for wordlist in wordlists:
575
  if not wordlist:
576
- err = "<center><h3>" + \
577
- '¡Para graficar con 4 espacios, debe ingresar al menos 1 palabra en todas las listas!' + "<center><h3>"
578
 
579
  err = self.check_oov(wordlist)
580
-
581
  if err:
582
- return None, err
583
-
584
- im = self.get_bias_plot(
585
- palabras_para_situar,
586
- definitional_1=(
587
- palabras_extremo_1, palabras_extremo_2),
588
- definitional_2=(
589
- palabras_extremo_3, palabras_extremo_4),
590
- method='sum',
591
- n_extreme=10
592
- )
593
- return im, ''
594
-
595
- def get_bias_plot(self,
596
- palabras_para_situar,
597
- definitional_1,
598
- definitional_2,
599
- method='sum',
600
- n_extreme=10,
601
- figsize=(10, 10)
602
- ):
603
 
604
  fig, ax = plt.subplots(1, figsize=figsize)
605
  self.method = method
606
  self.plot_projection_scores(
607
  definitional_1,
608
  definitional_2,
609
- palabras_para_situar, n_extreme, ax=ax,)
610
  fig.canvas.draw()
611
 
612
- data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
613
- w, h = fig.canvas.get_width_height()
614
- im = data.reshape((int(h), int(w), -1))
615
- return im
 
 
 
 
 
 
 
616
 
617
- def plot_projection_scores(self, definitional_1, definitional_2,
618
- words, n_extreme=10,
619
- ax=None, axis_projection_step=None):
620
  """Plot the projection scalar of words on the direction.
621
  :param list words: The words tor project
622
  :param int or None n_extreme: The number of extreme words to show
623
  :return: The ax object of the plot
624
  """
625
 
626
- nombre_del_extremo_1 = ', '.join(definitional_1[1])
627
- nombre_del_extremo_2 = ', '.join(definitional_1[0])
628
 
629
- self._identify_direction(nombre_del_extremo_1, nombre_del_extremo_2,
630
  definitional=definitional_1,
631
  method='sum')
632
 
@@ -635,9 +459,9 @@ class WEBiasExplorer4d(WordBiasExplorer):
635
  projections_df = self._calc_projection_scores(words)
636
  projections_df['projection_x'] = projections_df['projection'].round(2)
637
 
638
- nombre_del_extremo_3 = ', '.join(definitional_2[1])
639
- nombre_del_extremo_4 = ', '.join(definitional_2[0])
640
- self._identify_direction(nombre_del_extremo_3, nombre_del_extremo_4,
641
  definitional=definitional_2,
642
  method='sum')
643
 
@@ -673,13 +497,13 @@ class WEBiasExplorer4d(WordBiasExplorer):
673
  for _, row in (projections_df.iterrows()):
674
  ax.annotate(
675
  row['word'], (row['projection_x'], row['projection_y']))
676
- x_label = '← {} {} {} →'.format(nombre_del_extremo_1,
677
  ' ' * 20,
678
- nombre_del_extremo_2)
679
 
680
- y_label = '← {} {} {} →'.format(nombre_del_extremo_3,
681
  ' ' * 20,
682
- nombre_del_extremo_4)
683
 
684
  plt.xlabel(x_label)
685
  ax.xaxis.set_label_position('bottom')
@@ -694,8 +518,5 @@ class WEBiasExplorer4d(WordBiasExplorer):
694
 
695
  ax.set_xticks([])
696
  ax.set_yticks([])
697
- #plt.yticks([], [])
698
- # ax.spines['left'].set_position('zero')
699
- # ax.spines['bottom'].set_position('zero')
700
 
701
- return ax
 
 
 
1
  import copy
2
  import numpy as np
3
  import pandas as pd
4
  import seaborn as sns
5
  import matplotlib.pyplot as plt
6
  from sklearn.decomposition import PCA
7
+ from typing import List, Dict, Tuple, Optional, Any
8
+ from modules.utils import normalize, cosine_similarity, project_params, take_two_sides_extreme_sorted
9
 
10
+ __all__ = ['WordBiasExplorer', 'WEBiasExplorer2Spaces', 'WEBiasExplorer4Spaces']
11
+
12
+ class WordBiasExplorer:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def __init__(
14
  self,
15
  embedding # Class Embedding instance
 
19
  self.direction = None
20
  self.positive_end = None
21
  self.negative_end = None
22
+ self.DIRECTION_METHODS = ['single', 'sum', 'pca']
23
 
24
  def __copy__(
25
  self
26
+ ) -> 'WordBiasExplorer':
27
 
28
  bias_word_embedding = self.__class__(self.embedding)
29
  bias_word_embedding.direction = copy.deepcopy(self.direction)
 
33
 
34
  def __deepcopy__(
35
  self,
36
+ memo: Optional[Dict[int, Any]]
37
+ )-> 'WordBiasExplorer':
38
 
39
  bias_word_embedding = copy.copy(self)
40
  bias_word_embedding.model = copy.deepcopy(bias_word_embedding.model)
 
64
 
65
  def _identify_subspace_by_pca(
66
  self,
67
+ definitional_pairs: List[Tuple[str, str]],
68
+ n_components: int
69
+ ) -> PCA:
70
 
71
  matrix = []
72
 
 
86
 
87
  def _identify_direction(
88
  self,
89
+ positive_end: str,
90
+ negative_end: str,
91
+ definitional: Tuple[str, str],
92
+ method: str='pca',
93
+ first_pca_threshold: float=0.5
94
+ ) -> None:
95
 
96
+ if method not in self.DIRECTION_METHODS:
97
  raise ValueError('method should be one of {}, {} was given'.format(
98
+ self.DIRECTION_METHODS, method))
99
 
100
  if positive_end == negative_end:
101
  raise ValueError('positive_end and negative_end'
 
120
 
121
  elif method == 'pca':
122
  pca = self._identify_subspace_by_pca(definitional, 10)
123
+ if pca.explained_variance_ratio_[0] < first_pca_threshold:
124
  raise RuntimeError('The Explained variance'
125
  'of the first principal component should be'
126
  'at least {}, but it is {}'
127
+ .format(first_pca_threshold,
128
  pca.explained_variance_ratio_[0]))
129
  direction = pca.components_[0]
130
 
 
143
  def project_on_direction(
144
  self,
145
  word: str
146
+ ) -> float:
147
 
148
  """Project the normalized vector of the word on the direction.
149
  :param str word: The word tor project
 
159
 
160
  def _calc_projection_scores(
161
  self,
162
+ words: List[str]
163
+ ) -> pd.DataFrame:
164
 
165
  self._is_direction_identified()
166
 
 
175
 
176
  def calc_projection_data(
177
  self,
178
+ words: List[str]
179
+ ) -> pd.DataFrame:
180
 
181
  """
182
  Calculate projection, projected and rejected vectors of a words list.
 
204
 
205
  def plot_dist_projections_on_direction(
206
  self,
207
+ word_groups: Dict[str, List[str]],
208
+ ax: plt.Axes=None
209
+ ) -> plt.Axes:
210
 
211
  """Plot the projection scalars distribution on the direction.
212
  :param dict word_groups word: The groups to projects
 
239
 
240
  def __errorChecking(
241
  self,
242
+ word: str
243
+ ) -> str:
244
 
245
  out_msj = ""
246
 
 
254
 
255
  def check_oov(
256
  self,
257
+ wordlists: List[str]
258
+ ) -> str:
259
 
260
  for wordlist in wordlists:
261
  for word in wordlist:
 
264
  return msg
265
  return None
266
 
267
+ class WEBiasExplorer2Spaces(WordBiasExplorer):
268
+ def __init__(self, embedding) -> None:
269
+ super().__init__(embedding)
 
 
 
 
 
270
 
271
+ def calculate_bias(
272
+ self,
273
+ wordlist_to_diagnose: List[str],
274
+ wordlist_right: List[str],
275
+ wordlist_left: List[str]
276
+ ) -> plt.Figure:
277
 
278
+ wordlists = [wordlist_to_diagnose, wordlist_right, wordlist_left]
 
 
 
279
 
280
+ for wordlist in wordlists:
281
+ if not wordlist:
282
+ raise Exception('At least one word should be in the to diagnose list, bias 1 list and bias 2 list')
283
+
284
+ err = self.check_oov(wordlists)
285
  if err:
286
  raise Exception(err)
287
 
288
  return self.get_bias_plot(
289
+ wordlist_to_diagnose,
290
+ definitional=(wordlist_left, wordlist_right),
291
+ method='sum',
292
+ n_extreme=10
293
+ )
294
 
295
  def get_bias_plot(
296
  self,
297
+ wordlist_to_diagnose: List[str],
298
+ definitional: Tuple[List[str], List[str]],
299
+ method: str='sum',
300
+ n_extreme: int=10,
301
+ figsize: Tuple[int, int]=(10, 10)
302
+ ) -> plt.Figure:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
  fig, ax = plt.subplots(1, figsize=figsize)
305
  self.method = method
306
  self.plot_projection_scores(
307
  definitional,
308
+ wordlist_to_diagnose, n_extreme, ax=ax,)
309
 
310
  fig.tight_layout()
311
  fig.canvas.draw()
312
 
313
+ return fig
314
+
315
+ def plot_projection_scores(
316
+ self,
317
+ definitional: Tuple[List[str], List[str]],
318
+ words: List[str],
319
+ n_extreme: int=10,
320
+ ax: plt.Axes=None,
321
+ axis_projection_step: float=None
322
+ ) -> plt.Axes:
323
 
 
 
 
324
  """Plot the projection scalar of words on the direction.
325
  :param list words: The words tor project
326
  :param int or None n_extreme: The number of extreme words to show
327
  :return: The ax object of the plot
328
  """
329
+ name_left = ', '.join(definitional[0])
330
+ name_right = ', '.join(definitional[1])
331
 
332
+ self._identify_direction(name_left, name_right,
333
  definitional=definitional,
334
  method='sum')
335
 
 
374
  return ax
375
 
376
 
377
+ class WEBiasExplorer4Spaces(WordBiasExplorer):
378
+ def __init__(self, embedding) -> None:
379
+ super().__init__(embedding)
380
+
381
+ def calculate_bias(
382
+ self,
383
+ wordlist_to_diagnose: List[str],
384
+ wordlist_right: List[str],
385
+ wordlist_left: List[str],
386
+ wordlist_top: List[str],
387
+ wordlist_bottom: List[str],
388
+ ) -> plt.Figure:
389
 
 
 
 
 
 
 
 
390
  wordlists = [
391
+ wordlist_to_diagnose,
392
+ wordlist_left,
393
+ wordlist_right,
394
+ wordlist_top,
395
+ wordlist_bottom
396
  ]
397
+
398
  for wordlist in wordlists:
399
  if not wordlist:
400
+ raise Exception('To plot with 4 spaces, you must enter at least one word in all lists')
 
401
 
402
  err = self.check_oov(wordlist)
 
403
  if err:
404
+ raise Exception(err)
405
+
406
+ return self.get_bias_plot(
407
+ wordlist_to_diagnose,
408
+ definitional_1=(wordlist_right, wordlist_left),
409
+ definitional_2=(wordlist_top, wordlist_bottom),
410
+ method='sum',
411
+ n_extreme=10
412
+ )
413
+
414
+ def get_bias_plot(
415
+ self,
416
+ wordlist_to_diagnose: List[str],
417
+ definitional_1: Tuple[List[str], List[str]],
418
+ definitional_2: Tuple[List[str], List[str]],
419
+ method: str='sum',
420
+ n_extreme: int=10,
421
+ figsize: Tuple[int, int]=(10, 10)
422
+ ) -> plt.Figure:
 
 
423
 
424
  fig, ax = plt.subplots(1, figsize=figsize)
425
  self.method = method
426
  self.plot_projection_scores(
427
  definitional_1,
428
  definitional_2,
429
+ wordlist_to_diagnose, n_extreme, ax=ax,)
430
  fig.canvas.draw()
431
 
432
+ return fig
433
+
434
+ def plot_projection_scores(
435
+ self,
436
+ definitional_1: Tuple[List[str], List[str]],
437
+ definitional_2: Tuple[List[str], List[str]],
438
+ words: List[str],
439
+ n_extreme: int=10,
440
+ ax: plt.Axes=None,
441
+ axis_projection_step: float=None
442
+ ) -> plt.Axes:
443
 
 
 
 
444
  """Plot the projection scalar of words on the direction.
445
  :param list words: The words tor project
446
  :param int or None n_extreme: The number of extreme words to show
447
  :return: The ax object of the plot
448
  """
449
 
450
+ name_left = ', '.join(definitional_1[1])
451
+ name_right = ', '.join(definitional_1[0])
452
 
453
+ self._identify_direction(name_left, name_right,
454
  definitional=definitional_1,
455
  method='sum')
456
 
 
459
  projections_df = self._calc_projection_scores(words)
460
  projections_df['projection_x'] = projections_df['projection'].round(2)
461
 
462
+ name_top = ', '.join(definitional_2[1])
463
+ name_bottom = ', '.join(definitional_2[0])
464
+ self._identify_direction(name_top, name_bottom,
465
  definitional=definitional_2,
466
  method='sum')
467
 
 
497
  for _, row in (projections_df.iterrows()):
498
  ax.annotate(
499
  row['word'], (row['projection_x'], row['projection_y']))
500
+ x_label = '← {} {} {} →'.format(name_left,
501
  ' ' * 20,
502
+ name_right)
503
 
504
+ y_label = '← {} {} {} →'.format(name_top,
505
  ' ' * 20,
506
+ name_bottom)
507
 
508
  plt.xlabel(x_label)
509
  ax.xaxis.set_label_position('bottom')
 
518
 
519
  ax.set_xticks([])
520
  ax.set_yticks([])
 
 
 
521
 
522
+ return ax
modules/module_connection.py CHANGED
@@ -1,7 +1,7 @@
1
  from abc import ABC
2
 
3
  from modules.module_WordExplorer import WordExplorer
4
- from modules.module_BiasExplorer import WordBiasExplorer
5
  from typing import List, Tuple
6
 
7
 
@@ -120,7 +120,10 @@ class BiasWordExplorerConnector(Connector):
120
  else:
121
  raise KeyError
122
 
123
- self.bias_word_explorer = WordBiasExplorer(
 
 
 
124
  embedding=embedding
125
  )
126
 
@@ -143,14 +146,14 @@ class BiasWordExplorerConnector(Connector):
143
  if err:
144
  return None, self.process_error(err)
145
 
146
- err = self.bias_word_explorer.check_oov(word_lists)
147
  if err:
148
  return None, self.process_error(err)
149
 
150
- fig = self.bias_word_explorer.plot_biased_words(
151
  to_diagnose_list,
152
- wordlist_2,
153
- wordlist_1
154
  )
155
 
156
  return fig, self.process_error(err)
@@ -174,20 +177,20 @@ class BiasWordExplorerConnector(Connector):
174
  wordlists = [wordlist_1, wordlist_2, wordlist_3, wordlist_4, to_diagnose_list]
175
  for _list in wordlists:
176
  if not _list:
177
- err = "To plot with 4 spaces, you must enter at least one word in all lists."
178
  if err:
179
  return None, self.process_error(err)
180
 
181
- err = self.bias_word_explorer.check_oov(wordlists)
182
  if err:
183
  return None, self.process_error(err)
184
 
185
- fig = self.bias_word_explorer.plot_biased_words(
186
  to_diagnose_list,
187
  wordlist_1,
188
  wordlist_2,
189
  wordlist_3,
190
  wordlist_4
191
  )
192
-
193
  return fig, self.process_error(err)
 
1
  from abc import ABC
2
 
3
  from modules.module_WordExplorer import WordExplorer
4
+ from modules.module_BiasExplorer import WEBiasExplorer2Spaces, WEBiasExplorer4Spaces
5
  from typing import List, Tuple
6
 
7
 
 
120
  else:
121
  raise KeyError
122
 
123
+ self.bias_word_explorer_2_spaces = WEBiasExplorer2Spaces(
124
+ embedding=embedding
125
+ )
126
+ self.bias_word_explorer_4_spaces = WEBiasExplorer4Spaces(
127
  embedding=embedding
128
  )
129
 
 
146
  if err:
147
  return None, self.process_error(err)
148
 
149
+ err = self.bias_word_explorer_2_spaces.check_oov(word_lists)
150
  if err:
151
  return None, self.process_error(err)
152
 
153
+ fig = self.bias_word_explorer_2_spaces.calculate_bias(
154
  to_diagnose_list,
155
+ wordlist_1,
156
+ wordlist_2
157
  )
158
 
159
  return fig, self.process_error(err)
 
177
  wordlists = [wordlist_1, wordlist_2, wordlist_3, wordlist_4, to_diagnose_list]
178
  for _list in wordlists:
179
  if not _list:
180
+ err = "To plot with 4 spaces, you must enter at least one word in all lists"
181
  if err:
182
  return None, self.process_error(err)
183
 
184
+ err = self.bias_word_explorer_4_spaces.check_oov(wordlists)
185
  if err:
186
  return None, self.process_error(err)
187
 
188
+ fig = self.bias_word_explorer_4_spaces.calculate_bias(
189
  to_diagnose_list,
190
  wordlist_1,
191
  wordlist_2,
192
  wordlist_3,
193
  wordlist_4
194
  )
195
+
196
  return fig, self.process_error(err)
modules/utils.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+
4
+ def take_two_sides_extreme_sorted(
5
+ df: pd.DataFrame,
6
+ n_extreme: int,
7
+ part_column: str=None,
8
+ head_value: str='',
9
+ tail_value: str=''
10
+ ) -> pd.DataFrame:
11
+
12
+ head_df = df.head(n_extreme)[:]
13
+ tail_df = df.tail(n_extreme)[:]
14
+
15
+ if part_column is not None:
16
+ head_df[part_column] = head_value
17
+ tail_df[part_column] = tail_value
18
+
19
+ return (pd.concat([head_df, tail_df])
20
+ .drop_duplicates()
21
+ .reset_index(drop=True))
22
+
23
+ def normalize(
24
+ v: np.ndarray
25
+ ) -> np.ndarray:
26
+
27
+ """Normalize a 1-D vector."""
28
+ if v.ndim != 1:
29
+ raise ValueError('v should be 1-D, {}-D was given'.format(
30
+ v.ndim))
31
+ norm = np.linalg.norm(v)
32
+ if norm == 0:
33
+ return v
34
+ return v / norm
35
+
36
+ def project_params(
37
+ u: np.ndarray,
38
+ v: np.ndarray
39
+ ) -> np.ndarray:
40
+
41
+ """Projecting and rejecting the vector v onto direction u with scalar."""
42
+ normalize_u = normalize(u)
43
+ projection = (v @ normalize_u)
44
+ projected_vector = projection * normalize_u
45
+ rejected_vector = v - projected_vector
46
+ return projection, projected_vector, rejected_vector
47
+
48
+
49
+ def cosine_similarity(
50
+ v: np.ndarray,
51
+ u: np.ndarray
52
+ ) -> np.ndarray:
53
+
54
+ """Calculate the cosine similarity between two vectors."""
55
+ v_norm = np.linalg.norm(v)
56
+ u_norm = np.linalg.norm(u)
57
+ similarity = v @ u / (v_norm * u_norm)
58
+ return similarity