nanom commited on
Commit
1804cab
·
1 Parent(s): 33738cd

Improvement in the display of the graph axes labels. Minor fixes.

Browse files
Files changed (2) hide show
  1. modules/module_BiasExplorer.py +45 -23
  2. modules/utils.py +87 -1
modules/module_BiasExplorer.py CHANGED
@@ -5,14 +5,15 @@ import seaborn as sns
5
  import matplotlib.pyplot as plt
6
  from sklearn.decomposition import PCA
7
  from typing import List, Dict, Tuple, Optional, Any
8
- from modules.utils import normalize, cosine_similarity, project_params, take_two_sides_extreme_sorted
9
 
10
  __all__ = ['WordBiasExplorer', 'WEBiasExplorer2Spaces', 'WEBiasExplorer4Spaces']
11
 
12
  class WordBiasExplorer:
13
  def __init__(
14
  self,
15
- embedding # Embedding Class instance
 
16
  ) -> None:
17
 
18
  self.embedding = embedding
@@ -20,6 +21,7 @@ class WordBiasExplorer:
20
  self.positive_end = None
21
  self.negative_end = None
22
  self.DIRECTION_METHODS = ['single', 'sum', 'pca']
 
23
 
24
  def __copy__(
25
  self
@@ -245,12 +247,12 @@ class WordBiasExplorer:
245
  out_msj = ""
246
 
247
  if not word:
248
- out_msj = "Error: Primero debe ingresar una palabra!"
249
  else:
250
  if word not in self.embedding:
251
- out_msj = f"Error: La palabra '<b>{word}</b>' no se encuentra en el vocabulario!"
252
 
253
- return out_msj
254
 
255
  def check_oov(
256
  self,
@@ -267,10 +269,11 @@ class WordBiasExplorer:
267
  class WEBiasExplorer2Spaces(WordBiasExplorer):
268
  def __init__(
269
  self,
270
- embedding # Embedding class instance
 
271
  ) -> None:
272
 
273
- super().__init__(embedding)
274
 
275
  def calculate_bias(
276
  self,
@@ -283,7 +286,7 @@ class WEBiasExplorer2Spaces(WordBiasExplorer):
283
 
284
  for wordlist in wordlists:
285
  if not wordlist:
286
- raise Exception('Debe ingresar al menos 1 palabra en las lista de palabras a diagnosticar, sesgo 1 y sesgo 2')
287
 
288
  err = self.check_oov(wordlists)
289
  if err:
@@ -368,9 +371,14 @@ class WEBiasExplorer2Spaces(WordBiasExplorer):
368
  plt.xticks(np.arange(-most_extream_projection,
369
  most_extream_projection + axis_projection_step,
370
  axis_projection_step))
371
- xlabel = ('← {} {} {} →'.format(self.negative_end,
372
- ' ' * 20,
373
- self.positive_end))
 
 
 
 
 
374
 
375
  plt.xlabel(xlabel)
376
  plt.ylabel('Words')
@@ -381,10 +389,11 @@ class WEBiasExplorer2Spaces(WordBiasExplorer):
381
  class WEBiasExplorer4Spaces(WordBiasExplorer):
382
  def __init__(
383
  self,
384
- embedding # Embedding Class instance
 
385
  ) -> None:
386
 
387
- super().__init__(embedding)
388
 
389
  def calculate_bias(
390
  self,
@@ -405,7 +414,7 @@ class WEBiasExplorer4Spaces(WordBiasExplorer):
405
 
406
  for wordlist in wordlists:
407
  if not wordlist:
408
- raise Exception('¡Para graficar con 4 espacios, debe ingresar al menos 1 palabra en todas las listas!')
409
 
410
  err = self.check_oov(wordlists)
411
  if err:
@@ -495,9 +504,15 @@ class WEBiasExplorer4Spaces(WordBiasExplorer):
495
  projections_df['projection']
496
  .abs()
497
  .max(),
498
- decimals=1)
499
- sns.scatterplot(x='projection_x', y='projection_y', data=projections_df,
500
- palette=projections_df['color'])
 
 
 
 
 
 
501
 
502
  plt.xticks(np.arange(-most_extream_projection,
503
  most_extream_projection + axis_projection_step,
@@ -505,13 +520,20 @@ class WEBiasExplorer4Spaces(WordBiasExplorer):
505
  for _, row in (projections_df.iterrows()):
506
  ax.annotate(
507
  row['word'], (row['projection_x'], row['projection_y']))
508
- x_label = '← {} {} {} →'.format(name_left,
509
- ' ' * 20,
510
- name_right)
511
 
512
- y_label = '← {} {} {} →'.format(name_top,
513
- ' ' * 20,
514
- name_bottom)
 
 
 
 
 
 
 
 
 
 
515
 
516
  plt.xlabel(x_label)
517
  ax.xaxis.set_label_position('bottom')
 
5
  import matplotlib.pyplot as plt
6
  from sklearn.decomposition import PCA
7
  from typing import List, Dict, Tuple, Optional, Any
8
+ from modules.utils import normalize, cosine_similarity, project_params, take_two_sides_extreme_sorted, axes_labels_format
9
 
10
  __all__ = ['WordBiasExplorer', 'WEBiasExplorer2Spaces', 'WEBiasExplorer4Spaces']
11
 
12
  class WordBiasExplorer:
13
  def __init__(
14
  self,
15
+ embedding, # Embedding class instance
16
+ errorManager # ErrorManager class instance
17
  ) -> None:
18
 
19
  self.embedding = embedding
 
21
  self.positive_end = None
22
  self.negative_end = None
23
  self.DIRECTION_METHODS = ['single', 'sum', 'pca']
24
+ self.errorManager = errorManager
25
 
26
  def __copy__(
27
  self
 
247
  out_msj = ""
248
 
249
  if not word:
250
+ out_msj = ['EMBEDDING_NO_WORD_PROVIDED']
251
  else:
252
  if word not in self.embedding:
253
+ out_msj = ['EMBEDDING_WORD_OOV', word]
254
 
255
+ return self.errorManager.process(out_msj)
256
 
257
  def check_oov(
258
  self,
 
269
  class WEBiasExplorer2Spaces(WordBiasExplorer):
270
  def __init__(
271
  self,
272
+ embedding, # Embedding class instance
273
+ errorManager # ErrorManager class instance
274
  ) -> None:
275
 
276
+ super().__init__(embedding, errorManager)
277
 
278
  def calculate_bias(
279
  self,
 
286
 
287
  for wordlist in wordlists:
288
  if not wordlist:
289
+ raise Exception('At least one word should be in the to diagnose list, bias 1 list and bias 2 list')
290
 
291
  err = self.check_oov(wordlists)
292
  if err:
 
371
  plt.xticks(np.arange(-most_extream_projection,
372
  most_extream_projection + axis_projection_step,
373
  axis_projection_step))
374
+
375
+
376
+ xlabel = axes_labels_format(
377
+ left=self.negative_end,
378
+ right=self.positive_end,
379
+ sep=' ' * 20,
380
+ word_wrap=3
381
+ )
382
 
383
  plt.xlabel(xlabel)
384
  plt.ylabel('Words')
 
389
  class WEBiasExplorer4Spaces(WordBiasExplorer):
390
  def __init__(
391
  self,
392
+ embedding, # Embedding Class instance
393
+ errorManager # ErrorManager class instance
394
  ) -> None:
395
 
396
+ super().__init__(embedding, errorManager)
397
 
398
  def calculate_bias(
399
  self,
 
414
 
415
  for wordlist in wordlists:
416
  if not wordlist:
417
+ raise Exception('To plot with 4 spaces, you must enter at least one word in all lists')
418
 
419
  err = self.check_oov(wordlists)
420
  if err:
 
504
  projections_df['projection']
505
  .abs()
506
  .max(),
507
+ decimals=1
508
+ )
509
+
510
+ sns.scatterplot(x='projection_x',
511
+ y='projection_y',
512
+ data=projections_df,
513
+ # color=list(projections_df['color'].to_list()), # No se distinguen los colores
514
+ color='blue'
515
+ )
516
 
517
  plt.xticks(np.arange(-most_extream_projection,
518
  most_extream_projection + axis_projection_step,
 
520
  for _, row in (projections_df.iterrows()):
521
  ax.annotate(
522
  row['word'], (row['projection_x'], row['projection_y']))
 
 
 
523
 
524
+
525
+ x_label = axes_labels_format(
526
+ left=name_left,
527
+ right=name_right,
528
+ sep=' ' * 20,
529
+ word_wrap=3
530
+ )
531
+ y_label = axes_labels_format(
532
+ left=name_top,
533
+ right=name_bottom,
534
+ sep=' ' * 20,
535
+ word_wrap=3
536
+ )
537
 
538
  plt.xlabel(x_label)
539
  ax.xaxis.set_label_position('bottom')
modules/utils.py CHANGED
@@ -1,5 +1,32 @@
1
  import numpy as np
2
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def take_two_sides_extreme_sorted(
5
  df: pd.DataFrame,
@@ -55,4 +82,63 @@ def cosine_similarity(
55
  v_norm = np.linalg.norm(v)
56
  u_norm = np.linalg.norm(u)
57
  similarity = v @ u / (v_norm * u_norm)
58
- return similarity
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import numpy as np
2
  import pandas as pd
3
+ import pytz
4
+ from datetime import datetime
5
+ from typing import List
6
+
7
+
8
+
9
+ class DateLogs:
10
+ def __init__(
11
+ self,
12
+ zone: str = "America/Argentina/Cordoba"
13
+ ) -> None:
14
+
15
+ self.time_zone = pytz.timezone(zone)
16
+
17
+ def full(
18
+ self
19
+ ) -> str:
20
+
21
+ now = datetime.now(self.time_zone)
22
+ return now.strftime("%H:%M:%S %d-%m-%Y")
23
+
24
+ def day(
25
+ self
26
+ ) -> str:
27
+
28
+ now = datetime.now(self.time_zone)
29
+ return now.strftime("%d-%m-%Y")
30
 
31
  def take_two_sides_extreme_sorted(
32
  df: pd.DataFrame,
 
82
  v_norm = np.linalg.norm(v)
83
  u_norm = np.linalg.norm(u)
84
  similarity = v @ u / (v_norm * u_norm)
85
+ return similarity
86
+
87
+
88
+ def axes_labels_format(
89
+ left: str,
90
+ right: str,
91
+ sep: str,
92
+ word_wrap: int = 4
93
+ ) -> str:
94
+
95
+ def sparse(
96
+ word: str,
97
+ max_len: int
98
+ ) -> str:
99
+
100
+ diff = max_len-len(word)
101
+ rest = diff if diff > 0 else 0
102
+ return word+" "*rest
103
+
104
+ def gen_block(
105
+ list_: List[str],
106
+ n_rows:int,
107
+ n_cols:int
108
+ ) -> List[str]:
109
+
110
+ block = []
111
+ block_row = []
112
+ for r in range(n_rows):
113
+ for c in range(n_cols):
114
+ i = r * n_cols + c
115
+ w = list_[i] if i <= len(list_) - 1 else ""
116
+ block_row.append(w)
117
+ if (i+1) % n_cols == 0:
118
+ block.append(block_row)
119
+ block_row = []
120
+ return block
121
+
122
+ # Transform 'string' to list of string
123
+ l_list = [word.strip() for word in left.split(",") if word.strip() != ""]
124
+ r_list = [word.strip() for word in right.split(",") if word.strip() != ""]
125
+
126
+ # Get longest word, and longest_list
127
+ longest_list = max(len(l_list), len(r_list))
128
+ longest_word = len(max( max(l_list, key=len), max(r_list, key=len)))
129
+
130
+ # Creation of word blocks for each list
131
+ n_rows = (longest_list // word_wrap) if longest_list % word_wrap == 0 else (longest_list // word_wrap) + 1
132
+ n_cols = word_wrap
133
+
134
+ l_block = gen_block(l_list, n_rows, n_cols)
135
+ r_block = gen_block(r_list, n_rows, n_cols)
136
+
137
+ # Transform list of list to sparse string
138
+ labels = ""
139
+ for i,(l,r) in enumerate(zip(l_block, r_block)):
140
+ line = ' '.join([sparse(w, longest_word) for w in l]) + sep + \
141
+ ' '.join([sparse(w, longest_word) for w in r])
142
+ labels += f"← {line} →\n" if i==0 else f" {line} \n"
143
+
144
+ return labels