Andrea Maldonado commited on
Commit
02e62ba
·
1 Parent(s): 1418a81

Ranks features

Browse files
gedi/plotter.py CHANGED
@@ -14,7 +14,7 @@ from matplotlib.lines import Line2D
14
  from utils.param_keys import PLOT_TYPE, PROJECTION, EXPLAINED_VAR, PLOT_3D_MAP
15
  from utils.param_keys import INPUT_PATH, OUTPUT_PATH, PIPELINE_STEP
16
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, PLOT_REFERENCE_FEATURE
17
- from utils.param_keys.plotter import REAL_EVENTLOG_PATH, FONT_SIZE
18
  from collections import defaultdict
19
 
20
  from sklearn.preprocessing import Normalizer, StandardScaler
@@ -320,6 +320,7 @@ class FeaturesPlotter:
320
  output_path = params[OUTPUT_PATH] if OUTPUT_PATH in params else None
321
  plot_type = f", plot_type='{params[PLOT_TYPE]}'" if params.get(PLOT_TYPE) else ""
322
  font_size = f", font_size='{params[FONT_SIZE]}'" if params.get(FONT_SIZE) else ""
 
323
  LEGEND = ", legend=True" if params.get(PIPELINE_STEP) else ""
324
 
325
  source_name = os.path.split(params['input_path'])[-1].replace(".csv", "")+"_"
@@ -327,9 +328,9 @@ class FeaturesPlotter:
327
  if REAL_EVENTLOG_PATH in params:
328
  real_eventlogs_path=params[REAL_EVENTLOG_PATH]
329
  real_eventlogs = pd.read_csv(real_eventlogs_path)
330
- fig, output_path = eval(f"self.plot_violinplot_multi(features, output_path, real_eventlogs, source='{source_name}' {plot_type}{font_size}{LEGEND})")
331
  else:
332
- fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type}{font_size}{LEGEND})")
333
 
334
  if output_path != None:
335
  os.makedirs(os.path.split(output_path)[0], exist_ok=True)
@@ -337,14 +338,14 @@ class FeaturesPlotter:
337
  print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
338
 
339
 
340
- def plot_violinplot_single(self, features, output_path=None, source="_", plot_type="violinplot"):
341
  columns = features.columns[1:]
342
  df1=features.select_dtypes(exclude=['object'])
343
 
344
- fig, axes = plt.subplots(len(df1.columns),1, figsize=(17,len(df1.columns)))
345
  for i, ax in enumerate(axes):
346
  eval(f"sns.{plot_type}(data=df1, x=df1[df1.columns[i]], ax=ax)")
347
- fig.suptitle(f"{len(columns)} features distribution for {len(features)} generated event-logs", fontsize=16, y=1)
348
  fig.tight_layout()
349
 
350
 
@@ -352,7 +353,8 @@ class FeaturesPlotter:
352
 
353
  return fig, output_path
354
 
355
- def plot_violinplot_multi(self, features, output_path, real_eventlogs, source="_", plot_type="violinplot", font_size=24, legend=False):
 
356
  LOG_NATURE = "Log Nature"
357
  GENERATED = "Generated"
358
  REAL = "Real"
@@ -375,7 +377,7 @@ class FeaturesPlotter:
375
  if plot_type == 'violinplot':
376
  inner_param = 'inner = None,'
377
 
378
- fig, axes = plt.subplots(len(dmf1.columns),1, figsize=(16,len(dmf1.columns)*1.25), dpi=300)
379
  if isinstance(axes, Axes): # not isinstance(axes, list):
380
  axes = [axes]
381
  #nature_types = set(['Generated', 'Real'])#set(bdf['Log Nature'].unique())
 
14
  from utils.param_keys import PLOT_TYPE, PROJECTION, EXPLAINED_VAR, PLOT_3D_MAP
15
  from utils.param_keys import INPUT_PATH, OUTPUT_PATH, PIPELINE_STEP
16
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, PLOT_REFERENCE_FEATURE
17
+ from utils.param_keys.plotter import REAL_EVENTLOG_PATH, FONT_SIZE, BOXPLOT_WIDTH
18
  from collections import defaultdict
19
 
20
  from sklearn.preprocessing import Normalizer, StandardScaler
 
320
  output_path = params[OUTPUT_PATH] if OUTPUT_PATH in params else None
321
  plot_type = f", plot_type='{params[PLOT_TYPE]}'" if params.get(PLOT_TYPE) else ""
322
  font_size = f", font_size='{params[FONT_SIZE]}'" if params.get(FONT_SIZE) else ""
323
+ boxplot_w = f", boxplot_w='{params[BOXPLOT_WIDTH]}'" if params.get(BOXPLOT_WIDTH) else ""
324
  LEGEND = ", legend=True" if params.get(PIPELINE_STEP) else ""
325
 
326
  source_name = os.path.split(params['input_path'])[-1].replace(".csv", "")+"_"
 
328
  if REAL_EVENTLOG_PATH in params:
329
  real_eventlogs_path=params[REAL_EVENTLOG_PATH]
330
  real_eventlogs = pd.read_csv(real_eventlogs_path)
331
+ fig, output_path = eval(f"self.plot_violinplot_multi(features, output_path, real_eventlogs, source='{source_name}' {plot_type}{font_size}{boxplot_w}{LEGEND})")
332
  else:
333
+ fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type}{font_size}{boxplot_w})")
334
 
335
  if output_path != None:
336
  os.makedirs(os.path.split(output_path)[0], exist_ok=True)
 
338
  print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
339
 
340
 
341
+ def plot_violinplot_single(self, features, output_path=None, source="_", plot_type="violinplot", font_size=16, boxplot_w=16):
342
  columns = features.columns[1:]
343
  df1=features.select_dtypes(exclude=['object'])
344
 
345
+ fig, axes = plt.subplots(len(df1.columns),1, figsize=(int(boxplot_w),len(df1.columns)))
346
  for i, ax in enumerate(axes):
347
  eval(f"sns.{plot_type}(data=df1, x=df1[df1.columns[i]], ax=ax)")
348
+ fig.suptitle(f"{len(columns)} features distribution for {len(features)} generated event-logs", fontsize=font_size, y=1)
349
  fig.tight_layout()
350
 
351
 
 
353
 
354
  return fig, output_path
355
 
356
+ def plot_violinplot_multi(self, features, output_path, real_eventlogs, source="_", plot_type="violinplot",
357
+ font_size=24, legend=False, boxplot_w=16):
358
  LOG_NATURE = "Log Nature"
359
  GENERATED = "Generated"
360
  REAL = "Real"
 
377
  if plot_type == 'violinplot':
378
  inner_param = 'inner = None,'
379
 
380
+ fig, axes = plt.subplots(len(dmf1.columns),1, figsize=(int(boxplot_w),len(dmf1.columns)*1.25), dpi=300)
381
  if isinstance(axes, Axes): # not isinstance(axes, list):
382
  axes = [axes]
383
  #nature_types = set(['Generated', 'Real'])#set(bdf['Log Nature'].unique())
notebooks/feature_selection.ipynb CHANGED
The diff for this file is too large to render. See raw diff