Spaces:
Running
Running
import matplotlib as mpl | |
import matplotlib.colors as mcolors | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import os | |
import pandas as pd | |
import seaborn as sns | |
import os | |
import glob | |
from collections import defaultdict | |
from gedi.generator import get_tasks | |
from gedi.utils.io_helpers import get_keys_abbreviation | |
from gedi.utils.io_helpers import read_csvs, select_instance | |
from gedi.utils.param_keys import PLOT_TYPE, PROJECTION, EXPLAINED_VAR, PLOT_3D_MAP | |
from gedi.utils.param_keys import OUTPUT_PATH, PIPELINE_STEP | |
from gedi.utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, PLOT_REFERENCE_FEATURE | |
from gedi.utils.param_keys.plotter import REAL_EVENTLOG_PATH, FONT_SIZE, BOXPLOT_WIDTH | |
from matplotlib.axes import Axes | |
from matplotlib.figure import Figure | |
from matplotlib.lines import Line2D | |
from sklearn.preprocessing import Normalizer, StandardScaler | |
from sklearn.decomposition import PCA | |
def insert_newlines(string, every=140): | |
return '\n'.join(string[i:i+every] for i in range(0, len(string), every)) | |
class MyPlotter: | |
def __init__(self, interactive: bool = True, title_prefix: str = '', for_paper: bool = False): | |
self.fig: Figure = Figure() | |
self.axes: Axes = Axes(self.fig, [0, 0, 0, 0]) | |
self.interactive: bool = interactive | |
self.title_prefix: str = title_prefix | |
self.colors: dict = mcolors.TABLEAU_COLORS | |
self.for_paper: bool = for_paper | |
if self.interactive: | |
mpl.use('TkAgg') | |
if self.for_paper: | |
self.fontsize = 18 | |
else: | |
self.fontsize = 10 | |
def _set_figure_title(self): | |
self.fig.suptitle(self.title_prefix) | |
def _post_processing(self): | |
if not self.for_paper: | |
self._set_figure_title() | |
plt.show() | |
class ModelResultPlotter(MyPlotter): | |
def plot_models(self, model_results, plot_type='', plot_tics=False, components=None): | |
""" | |
Plots the model results in 2d-coordinate system next to each other. | |
Alternatively with tics of the components can be plotted under the figures when `plot_tics` is True | |
:param model_results: list of dictionary | |
dict should contain the keys: 'model', 'projection', 'title_prefix' (optional) | |
:param plot_type: param_key.plot_type | |
:param plot_tics: bool (default: False) | |
Plots the component tics under the base figures if True | |
:param components: int | |
Number of components used for the reduced | |
""" | |
if plot_tics: | |
self.fig, self.axes = plt.subplots(components + 1, len(model_results), | |
constrained_layout=True, figsize=(10,8)) # subplots(rows, columns) | |
main_axes = self.axes[0] # axes[row][column] | |
if len(model_results) == 1: | |
for component_nr in range(components + 1)[1:]: | |
self._plot_time_tics(self.axes[component_nr], model_results[DUMMY_ZERO][PROJECTION], | |
component=component_nr) | |
else: | |
for i, result in enumerate(model_results): | |
df_pca = pd.DataFrame(result[PROJECTION], columns=["PC1", "PC2"]) | |
sns.scatterplot(ax=self.axes[0][i], data=df_pca, x="PC1", y="PC2", palette="bright", hue=['']*len(df_pca), alpha=0.9, s=100) | |
try: | |
self.axes[0][i].set_xlabel(f"PC1 ({np.round(result[EXPLAINED_VAR][0]*100, 2)}% explained variance)") | |
self.axes[0][i].set_ylabel(f"PC2 ({np.round(result[EXPLAINED_VAR][1]*100, 2)}% explained variance)") | |
except TypeError: | |
self.axes[0][i].set_xlabel(f"TSNE_1") | |
self.axes[0][i].set_ylabel(f"TSNE_2") | |
for component_nr in range(components + 1)[1:]: | |
self._plot_time_tics(self.axes[component_nr][i], result[PROJECTION], component=component_nr) | |
else: | |
self.fig, self.axes = plt.subplots(1, len(model_results), constrained_layout=True) | |
main_axes = self.axes | |
plt.show() | |
def _plot_time_tics(ax, projection, component): | |
""" | |
Plot the time tics on a specific axis | |
:param ax: axis | |
:param projection: | |
:param component: | |
:return: | |
""" | |
ax.cla() | |
ax.set_xlabel('Time step') | |
ax.set_ylabel('Component {}'.format(component)) | |
ax.label_outer() | |
ax.plot(projection[:, component - 1]) | |
class ArrayPlotter(MyPlotter): | |
def __init__(self, interactive=False, title_prefix='', x_label='', y_label='', bottom_text=None, y_range=None, | |
show_grid=False, xtick_start=0, for_paper=False): | |
super().__init__(interactive, title_prefix, for_paper) | |
self.x_label = x_label | |
self.y_label = y_label | |
self.bottom_text = bottom_text | |
self.range_tuple = y_range | |
self._activate_legend = False | |
self.show_grid = show_grid | |
self.xtick_start = xtick_start | |
def _post_processing(self, legend_outside=False): | |
# self.axes.set_title(self.title_prefix) | |
self.axes.set_xlabel(self.x_label, fontsize=self.fontsize) | |
self.axes.set_ylabel(self.y_label, fontsize=self.fontsize) | |
# plt.xticks(fontsize=self.fontsize) | |
# plt.yticks(fontsize=self.fontsize) | |
if self.bottom_text is not None: | |
self.fig.text(0.01, 0.01, self.bottom_text, fontsize=self.fontsize) | |
self.fig.tight_layout() | |
self.fig.subplots_adjust(bottom=(self.bottom_text.count('\n') + 1) * 0.1) | |
else: | |
self.fig.tight_layout() | |
if legend_outside: | |
self.axes.legend(bbox_to_anchor=(0.5, -0.05), loc='upper center', fontsize=8) | |
plt.subplots_adjust(bottom=0.25) | |
elif self._activate_legend: | |
self.axes.legend(fontsize=self.fontsize) | |
if self.range_tuple is not None: | |
self.axes.set_ylim(self.range_tuple) | |
if self.show_grid: | |
plt.grid(True, which='both') | |
plt.minorticks_on() | |
super()._post_processing() | |
def matrix_plot(self, matrix, as_surface='2d', show_values=False): | |
""" | |
Plots the values of a matrix on a 2d or a 3d axes | |
:param matrix: ndarray (2-ndim) | |
matrix, which should be plotted | |
:param as_surface: str | |
Plot as a 3d-surface if value PLOT_3D_MAP else 2d-axes | |
:param show_values: If true, then show the values in the matrix | |
""" | |
c_map = plt.cm.viridis | |
# c_map = plt.cm.seismic | |
if as_surface == PLOT_3D_MAP: | |
x_coordinates = np.arange(matrix.shape[0]) | |
y_coordinates = np.arange(matrix.shape[1]) | |
x_coordinates, y_coordinates = np.meshgrid(x_coordinates, y_coordinates) | |
self.fig = plt.figure() | |
self.axes = self.fig.gca(projection='3d') | |
self.axes.set_zlabel('Covariance Values', fontsize=self.fontsize) | |
im = self.axes.plot_surface(x_coordinates, y_coordinates, matrix, cmap=c_map) | |
else: | |
self.fig, self.axes = plt.subplots(1, 1, dpi=80) | |
im = self.axes.matshow(matrix, cmap=c_map) | |
if show_values: | |
for (i, j), value in np.ndenumerate(matrix): | |
self.axes.text(j, i, '{:0.2f}'.format(value), ha='center', va='center', fontsize=8) | |
if not self.for_paper: | |
self.fig.colorbar(im, ax=self.axes) | |
plt.xticks(np.arange(matrix.shape[1]), np.arange(self.xtick_start, matrix.shape[1] + self.xtick_start)) | |
# plt.xticks(np.arange(matrix.shape[1], step=5), | |
# np.arange(self.xtick_start, matrix.shape[1] + self.xtick_start, step=5)) | |
self._post_processing() | |
def plot_gauss2d(self, | |
x_index: np.ndarray, | |
ydata: np.ndarray, | |
new_ydata: np.ndarray, | |
gauss_fitted: np.ndarray, | |
fit_method: str, | |
statistical_function: callable = np.median): | |
""" | |
Plot the original data (ydata), the new data (new_ydata) where the x-axis-indices is given by (x_index), | |
the (fitted) gauss curve and a line (mean, median) | |
:param x_index: ndarray (1-ndim) | |
range of plotting | |
:param ydata: ndarray (1-ndim) | |
original data | |
:param new_ydata: ndarray (1-ndim) | |
the changed new data | |
:param gauss_fitted: ndarray (1-ndim) | |
the fitted curve on the new data | |
:param fit_method: str | |
the name of the fitting method | |
:param statistical_function: callable | |
Some statistical numpy function | |
:return: | |
""" | |
self.fig, self.axes = plt.subplots(1, 1, dpi=80) | |
self.axes.plot(x_index, gauss_fitted, '-', label=f'fit {fit_method}') | |
# self.axes.plot(x_index, gauss_fitted, ' ') | |
self.axes.plot(x_index, ydata, '.', label='original data') | |
# self.axes.plot(x_index, ydata, ' ') | |
statistical_value = np.full(x_index.shape, statistical_function(ydata)) | |
if self.for_paper: | |
function_label = 'threshold' | |
else: | |
function_label = function_name(statistical_function) | |
self._activate_legend = True | |
self.axes.plot(x_index, statistical_value, '-', label=function_label) | |
# self.axes.plot(x_index, statistical_value, ' ') | |
# self.axes.plot(x_index, new_ydata, '.', label='re-scaled data') | |
self.axes.plot(x_index, new_ydata, ' ') | |
self._post_processing() | |
def plot_2d(self, ndarray_data, statistical_func=None): | |
self.fig, self.axes = plt.subplots(1, 1) | |
self.axes.plot(ndarray_data, '-') | |
if statistical_func is not None: | |
statistical_value = statistical_func(ndarray_data) | |
statistical_value_line = np.full(ndarray_data.shape, statistical_value) | |
self.axes.plot(statistical_value_line, '-', | |
label=f'{function_name(statistical_func)}: {statistical_value:.4f}') | |
self._activate_legend = False | |
self._post_processing() | |
def plot_merged_2ds(self, ndarray_dict: dict, statistical_func=None): | |
self.fig, self.axes = plt.subplots(1, 1, dpi=80) | |
self.title_prefix += f'with {function_name(statistical_func)}' if statistical_func is not None else '' | |
for key, ndarray_data in ndarray_dict.items(): | |
# noinspection PyProtectedMember | |
color = next(self.axes._get_lines.prop_cycler)['color'] | |
if statistical_func is not None: | |
if isinstance(ndarray_data, list): | |
ndarray_data = np.asarray(ndarray_data) | |
self.axes.plot(ndarray_data, '-', color=color) | |
statistical_value = statistical_func(ndarray_data) | |
statistical_value_line = np.full(ndarray_data.shape, statistical_value) | |
self.axes.plot(statistical_value_line, '--', | |
label=f'{key.strip()}: {statistical_value:.4f}', color=color) | |
else: | |
self.axes.plot(ndarray_data, '-', color=color, label=f'{key.strip()[:35]}') | |
self._activate_legend = True | |
self._post_processing() | |
class BenchmarkPlotter: | |
def __init__(self, benchmark_results, output_path = None): | |
self.plot_miners_correlation(benchmark_results, output_path=output_path) | |
self.plot_miner_feat_correlation(benchmark_results, output_path=output_path) | |
self.plot_miner_feat_correlation(benchmark_results, mean='methods', output_path=output_path) | |
def plot_miner_feat_correlation(self, benchmark, mean='metrics', output_path=None): | |
df = benchmark.loc[:, benchmark.columns!='log'] | |
corr = df.corr() | |
if mean == 'methods': | |
for method in ['inductive', 'heu', 'ilp']: | |
method_cols = [col for col in corr.columns if col.startswith(method)] | |
corr[method+'_avg'] = corr.loc[:, corr.columns.isin(method_cols)].mean(axis=1) | |
elif mean == 'metrics': | |
for metric in ['fitness', 'precision', 'generalization', 'simplicity']: | |
metric_cols = [col for col in corr.columns if col.endswith(metric)] | |
corr[metric+'_avg'] = corr.loc[:, corr.columns.isin(metric_cols)].mean(axis=1) | |
avg_cols = [col for col in corr.columns if col.endswith('_avg')] | |
benchmark_result_cols = [col for col in corr.columns if col.startswith('inductive') | |
or col.startswith('heu') or col.startswith('ilp')] | |
corr = corr[:][~corr.index.isin(benchmark_result_cols)] | |
fig, axes = plt.subplots( 1, len(avg_cols), figsize=(15,10)) | |
for i, ax in enumerate(axes): | |
cbar = True if i==3 else False | |
corr = corr.sort_values(avg_cols[i], axis=0, ascending=False) | |
b= sns.heatmap(corr[[avg_cols[i]]][:], | |
ax=ax, | |
xticklabels=[avg_cols[i]], | |
yticklabels=corr.index, | |
cbar=cbar) | |
plt.subplots_adjust(wspace = 1, top=0.9, left=0.15) | |
fig.suptitle(f"Feature and performance correlation per {mean.split('s')[0]} for {len(benchmark)} event-logs") | |
if output_path != None: | |
output_path = output_path+f"/minperf_corr_{mean.split('s')[0]}_el{len(benchmark)}.jpg" | |
fig.savefig(output_path) | |
print(f"SUCCESS: Saved correlation plot at {output_path}") | |
#plt.show() | |
def plot_miners_correlation(self, benchmark, output_path=None): | |
benchmark_result_cols = [col for col in benchmark.columns if col.startswith('inductive') | |
or col.startswith('heu') or col.startswith('ilp')] | |
df = benchmark.loc[:, benchmark.columns!='log'] | |
df = df.loc[:, df.columns.isin(benchmark_result_cols)] | |
corr = df.corr() | |
fig, ax = plt.subplots(figsize=(15,10)) | |
b= sns.heatmap(corr, | |
ax=ax, | |
xticklabels=corr.columns.values, | |
yticklabels=corr.columns.values) | |
plt.title(f"Miners and performance correlation for {len(benchmark)} event-logs", loc='center') | |
if output_path != None: | |
output_path = output_path+f"/minperf_corr_el{len(benchmark)}.jpg" | |
fig.savefig(output_path) | |
print(f"SUCCESS: Saved correlation plot at {output_path}") | |
#plt.show() | |
class FeaturesPlotter: | |
def __init__(self, features, params=None): | |
output_path = params[OUTPUT_PATH] if OUTPUT_PATH in params else None | |
plot_type = f", plot_type='{params[PLOT_TYPE]}'" if params.get(PLOT_TYPE) else "" | |
font_size = f", font_size='{params[FONT_SIZE]}'" if params.get(FONT_SIZE) else "" | |
boxplot_w = f", boxplot_w='{params[BOXPLOT_WIDTH]}'" if params.get(BOXPLOT_WIDTH) else "" | |
LEGEND = ", legend=True" if params.get(PIPELINE_STEP) else "" | |
source_name = os.path.split(params['input_path'])[-1].replace(".csv", "")+"_" | |
#output_path = os.path.join(output_path, source_name) | |
if REAL_EVENTLOG_PATH in params: | |
real_eventlogs_path=params[REAL_EVENTLOG_PATH] | |
real_eventlogs = pd.read_csv(real_eventlogs_path) | |
fig, output_path = eval(f"self.plot_violinplot_multi(features, output_path, real_eventlogs, source='{source_name}' {plot_type}{font_size}{boxplot_w}{LEGEND})") | |
else: | |
fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type}{font_size}{boxplot_w})") | |
if output_path != None: | |
os.makedirs(os.path.split(output_path)[0], exist_ok=True) | |
fig.savefig(output_path) | |
print(f"SUCCESS: Saved {plot_type} plot in {output_path}") | |
def plot_violinplot_single(self, features, output_path=None, source="_", plot_type="violinplot", font_size=16, boxplot_w=16): | |
columns = features.columns[1:] | |
df1=features.select_dtypes(exclude=['object']) | |
fig, axes = plt.subplots(len(df1.columns),1, figsize=(int(boxplot_w),len(df1.columns))) | |
for i, ax in enumerate(axes): | |
eval(f"sns.{plot_type}(data=df1, x=df1[df1.columns[i]], ax=ax)") | |
fig.suptitle(f"{len(columns)} features distribution for {len(features)} generated event-logs", fontsize=font_size, y=1) | |
fig.tight_layout() | |
output_path=output_path+f"/{plot_type}s_{source}{len(columns)}fts_{len(df1)}gEL.jpg" | |
return fig, output_path | |
def plot_violinplot_multi(self, features, output_path, real_eventlogs, source="_", plot_type="violinplot", | |
font_size=24, legend=False, boxplot_w=16): | |
LOG_NATURE = "Log Nature" | |
GENERATED = "Generated" | |
REAL = "Real" | |
FONT_SIZE=font_size | |
alpha = 0.7 | |
color = sns.color_palette("bright") | |
markers = ['o','X'] | |
inner_param = '' | |
features[LOG_NATURE] = GENERATED | |
real_eventlogs[LOG_NATURE] = REAL | |
bdf = pd.concat([features, real_eventlogs]) | |
bdf = bdf[features.columns] | |
bdf = bdf.dropna(axis='rows') | |
columns = bdf.columns[3:] | |
dmf1=bdf.select_dtypes(exclude=['object']) | |
if plot_type == 'violinplot': | |
inner_param = 'inner = None,' | |
fig, axes = plt.subplots(len(dmf1.columns),1, figsize=(int(boxplot_w),len(dmf1.columns)*1.25), dpi=300) | |
if isinstance(axes, Axes): # not isinstance(axes, list): | |
axes = [axes] | |
#nature_types = set(['Generated', 'Real'])#set(bdf['Log Nature'].unique()) | |
nature_types = list(reversed(bdf['Log Nature'].unique()[:2])) | |
for i, ax in enumerate(axes): | |
for j, nature in enumerate(nature_types): | |
eval(f"sns.{plot_type}(data=bdf[bdf['Log Nature']==nature], x=dmf1.columns[i], palette=[color[j]], {inner_param} ax=ax)") | |
eval(f"sns.stripplot(data=bdf[bdf['Log Nature']==nature], x=dmf1.columns[i], palette=[color[j]], marker=markers[j], {inner_param} ax=ax)") | |
for collection in ax.collections: | |
collection.set_alpha(alpha) | |
for patch in ax.patches: | |
r, g, b, a = patch.get_facecolor() | |
patch.set_facecolor((r, g, b, alpha)) | |
custom_lines = [ | |
Line2D([0], [0], color=color[nature], lw=4, alpha=alpha) | |
for nature in [0,1,2] | |
] | |
#ax.legend(custom_lines, bdf['Log Nature'].unique(), title= "Log Nature") | |
#sns.set_context("paper", font_scale=1.5) | |
ax.tick_params(axis='both', which='major', labelsize=FONT_SIZE) | |
ax.tick_params(axis='both', which='minor', labelsize=FONT_SIZE) | |
ax.set_xlabel(dmf1.columns[i], fontsize=FONT_SIZE) | |
if legend: | |
fig.legend(custom_lines, nature_types, loc='upper right', ncol=len(nature_types), prop={'size': FONT_SIZE}) | |
plt.legend(fontsize=FONT_SIZE) | |
#fig.suptitle(f"{len(features.columns)-2} features distribution for {len(real_eventlogs[real_eventlogs['Log Nature'].isin(nature_types)])} real and {len(features)} generated event-logs", fontsize=16, y=1) | |
plt.yticks(fontsize=FONT_SIZE) | |
plt.xticks(fontsize=FONT_SIZE) | |
fig.tight_layout() | |
output_path = output_path+f"/{plot_type}s_{source}{len(columns)}fts_{len(features)}gEL_of{len(bdf[bdf['Log Nature'].isin(nature_types)])}.jpg" | |
return fig, output_path | |
class AugmentationPlotter(object): | |
"""Plotter for the augmented features. | |
If just 2 features are examined, the plotter outputs a scatterplot with the two features defining | |
the dimensions. | |
IF more than 2 features are examined, a PCA is performed first before the first two principal | |
components are plotted. | |
Parameters | |
---------- | |
features : pd.DataFrame | |
dataFrame containing the information of the real and synthesized datasets. | |
""" | |
def __init__(self, features, params=None) -> None: | |
output_path = params[OUTPUT_PATH] if OUTPUT_PATH in params else None | |
self.sampler = params['augmentation_params']['method'] | |
eval(f"self.plot_augmented_features(features, output_path)") | |
def plot_augmented_features(self, features, output_path=None) -> None: | |
"""Plotting for augmented features. When more than 2 features are selected, the | |
plot will show the result after applying a PCA; otherwise the 2 features are | |
plotted according to the values. | |
Parameters | |
---------- | |
features : pd.DataFrame | |
DataFrame containing the augmented features | |
output_path : str, optional | |
Path to the output file, by default None | |
""" | |
if len(features.all.columns) < 2: | |
raise AssertionError ("AugmentationPlotter - More than 2 (augmented) features are expected for plotting.") | |
if len(features.all.columns) > 2: | |
self._plot_pca(features, output_path) | |
else: | |
self._plot_2d(features, output_path) | |
def _plot_2d(self, features, output_path=None) -> None: | |
"""Fnc for plotting 2D features without any dimension reduction technique being applied. | |
Parameters | |
---------- | |
features : pd.DataFrame | |
Dataframe containing the augmented features | |
output_path : str, optional | |
Path to the output file, by default None | |
""" | |
col1_name, col2_name = features.all.columns | |
# INIT - settings | |
X = features.all.iloc[:-features.new_samples.shape[0]] | |
X = X.to_numpy() | |
X_aug = features.all.to_numpy() | |
sns.set_theme() | |
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(24, 8)) | |
fig.suptitle(f'Log Descriptors - real: {X.shape[0]}, synth.: {X_aug.shape[0]-X.shape[0]}', fontsize=16) | |
# Normalizer: applied to each observation -> row values have unit norm | |
normalizer = Normalizer(norm="l2").fit(X) | |
normed_data = normalizer.transform(X_aug) | |
# StandardScaler: applied to features -> col values have unit norm | |
scaler = StandardScaler().fit(X) | |
scaled_data = scaler.transform(X_aug) | |
# PLOT - raw 2d data | |
X_aug = self._add_real_synth_encoding(X_aug, X, X_aug) | |
df_raw = self._convert_to_df(X_aug, [col1_name, col2_name, 'type']) | |
sns.scatterplot(ax=ax1, data=df_raw, x=col1_name, y=col2_name, palette="bright", | |
hue = "type", alpha=0.5, s=100).set_title("Raw data") | |
ax1.get_legend().set_title("") | |
# PLOT - normed 2d data | |
normed_data = self._add_real_synth_encoding(normed_data, X, X_aug) | |
df_normed = self._convert_to_df(normed_data, [col1_name, col2_name, 'type']) | |
sns.scatterplot(ax=ax2, data=df_normed, x=col1_name, y=col2_name, palette="bright", | |
hue = 'type', alpha=0.5, s=100).set_title("Normalized data") | |
ax2.get_legend().set_title("") | |
# PLOT - scaled 2d data | |
scaled_data = self._add_real_synth_encoding(scaled_data, X, X_aug) | |
df_scaled = self._convert_to_df(scaled_data, [col1_name, col2_name, 'type']) | |
sns.scatterplot(ax=ax3, data=df_scaled, x=col1_name, y=col2_name, palette="bright", | |
hue = 'type', alpha=0.5, s=100).set_title("Scaled data") | |
ax3.get_legend().set_title("") | |
plt.tight_layout() | |
# OUTPUT | |
if output_path != None: | |
output_path += f"/augmentation_2d_plot_{col1_name}-{col2_name}_{self.sampler}.jpg" | |
fig.savefig(output_path) | |
print(f"SUCCESS: Saved augmentation pca plot at {output_path}") | |
def _add_real_synth_encoding(self, arr, X, X_aug) -> np.array: | |
"""Helper function for adding one additional column to the array in the last column. | |
The last column indicates whether it is a real data (=0) or synthesized (=1). | |
Parameters | |
---------- | |
arr : np.array | |
data array | |
X : np.array | |
data of real datasets | |
X_aug : np.array | |
data of real datasets and synthesized datasets | |
Returns | |
------- | |
np.array | |
array containing the data with an additional last column indicating whether the | |
data comes from a real dataset or synthesized one | |
""" | |
real_synth_enc = np.array([0]*X.shape[0] + [1]*(X_aug.shape[0]-X.shape[0])).reshape(-1, 1) | |
return np.hstack ([arr, real_synth_enc]) | |
def _convert_to_df(self, arr, colnames, enc=['real', 'synth']) -> pd.DataFrame: | |
"""Converts the attached array to a dataframe. The column names are | |
defined by the respective parameters, where the last column is encoded | |
by the string array of the enc parameter. | |
Parameters | |
---------- | |
arr : np.array | |
_description_ | |
colnames : list | |
column names of returned dataframe | |
enc : list, optional | |
labels for real vs. generated data, by default ['real', 'synth'] | |
Returns | |
------- | |
pd.DataFrame | |
dataframe containing the attached data array with encoded values in the last column | |
""" | |
df = pd.DataFrame(arr, columns=colnames) | |
df.loc[df.iloc[:, -1] == 0, colnames[-1]] = enc[0] | |
df.loc[df.iloc[:, -1] == 1, colnames[-1]] = enc[1] | |
return df | |
def _plot_pca(self, features, output_path=None) -> None: | |
"""Fnc for plotting features with PCA as dimension reduction technique being applied. | |
Parameters | |
---------- | |
features : pd.DataFrame | |
DataFrame containing the augmented features | |
output_path : str, optional | |
path to output file, by default None | |
""" | |
# INIT - settings | |
n_features = features.all.shape[1] | |
X = features.all.iloc[:-features.new_samples.shape[0]] | |
X = X.to_numpy() | |
X_aug = features.all.to_numpy() | |
sns.set_theme() | |
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(24, 8)) | |
fig.suptitle(f'Log Descriptors - real: {X.shape[0]}, synth.: {X_aug.shape[0]-X.shape[0]}', fontsize=16) | |
pca_components = 2 | |
pca = PCA(n_components=pca_components) | |
# Normalizer: applied to each observation -> row values have unit norm | |
normalizer = Normalizer(norm="l2").fit(X) | |
normed_data_real = normalizer.transform(X) | |
normed_data_aug = normalizer.transform(X_aug) | |
# StandardScaler: applied to features -> col values have unit norm | |
scaler = StandardScaler().fit(X) | |
scaled_data_real = scaler.transform(X) | |
scaled_data_aug = scaler.transform(X_aug) | |
# PLOT - PCA on raw input | |
fit_pca = pca.fit(X) | |
X_new = fit_pca.transform(X_aug) | |
X_new = self._add_real_synth_encoding(X_new[:, :pca_components], X, X_aug) | |
df_pca = self._convert_to_df(X_new, ['PC_1', 'PC_2', 'type']) | |
sns.scatterplot(ax=ax1, data=df_pca, x="PC_1", y="PC_2", palette="bright", hue = 'type', alpha=0.5, s=100) | |
ax1.set_xlabel(f"PC1 ({np.round(pca.explained_variance_ratio_[0]*100, 2)}% explained variance)") | |
ax1.set_ylabel(f"PC2 ({np.round(pca.explained_variance_ratio_[1]*100, 2)}% explained variance)") | |
ax1.get_legend().set_title("") | |
# PLOT - PCA on normed data | |
fit_norm_pca = pca.fit(normed_data_real) | |
X_new_normed = fit_norm_pca.transform(normed_data_aug) | |
X_new_normed = self._add_real_synth_encoding(X_new_normed[:, :pca_components], X, X_aug) | |
df_pca_normed = self._convert_to_df(X_new_normed, ['PC_1', 'PC_2', 'type']) | |
sns.scatterplot(ax=ax2, data=df_pca_normed, x="PC_1", y="PC_2", palette="bright", hue = 'type', alpha=0.5, s=100) | |
ax2.set_xlabel(f"PC1 ({np.round(pca.explained_variance_ratio_[0]*100, 2)}% explained variance)") | |
ax2.set_ylabel(f"PC2 ({np.round(pca.explained_variance_ratio_[1]*100, 2)}% explained variance)") | |
ax2.get_legend().set_title("") | |
# PLOT - PCA on scaled data | |
fit_sca_pca = pca.fit(scaled_data_real) | |
X_new_sca = fit_sca_pca.transform(scaled_data_aug) | |
X_new_sca = self._add_real_synth_encoding(X_new_sca[:, :pca_components], X, X_aug) | |
df_pca_scaled = self._convert_to_df(X_new_sca, ['PC_1', 'PC_2', 'type']) | |
sns.scatterplot(ax=ax3, data=df_pca_scaled, x="PC_1", y="PC_2", palette="bright", hue = 'type', alpha=0.5, s=100) | |
ax3.set_xlabel(f"PC1 ({np.round(pca.explained_variance_ratio_[0]*100, 2)}% explained variance)") | |
ax3.set_ylabel(f"PC2 ({np.round(pca.explained_variance_ratio_[1]*100, 2)}% explained variance)") | |
ax3.get_legend().set_title("") | |
plt.tight_layout() | |
# OUTPUT | |
if output_path != None: | |
output_path += f"/augmentation_pca_{n_features}_{self.sampler}.jpg" | |
fig.savefig(output_path) | |
print(f"SUCCESS: Saved augmentation pca plot at {output_path}") | |
class GenerationPlotter(object): | |
def __init__(self, gen_cfg, model_params, output_path, input_path=None): | |
print(f"Running plotter for {len(gen_cfg)} genEL, params {model_params}, output path: {output_path}") | |
self.output_path = output_path | |
self.input_path = input_path | |
self.model_params = model_params | |
if gen_cfg.empty: # Deactivated for tests | |
return | |
if "metafeatures" in gen_cfg.columns: | |
self.gen = gen_cfg.metafeatures | |
self.gen=pd.concat([pd.DataFrame.from_dict(entry, orient="Index").T for entry in self.gen]).reset_index(drop=True) | |
else: | |
self.gen = gen_cfg.reset_index(drop=True) | |
if GENERATOR_PARAMS in model_params: | |
self.tasks, _ = get_tasks(model_params[GENERATOR_PARAMS][EXPERIMENT]) | |
feature_list = list(self.tasks.select_dtypes(exclude=['object']).keys()) | |
ref_feat = None | |
if PLOT_REFERENCE_FEATURE in model_params[GENERATOR_PARAMS]and model_params[GENERATOR_PARAMS][PLOT_REFERENCE_FEATURE] != "": | |
ref_feat = model_params[GENERATOR_PARAMS][PLOT_REFERENCE_FEATURE] | |
reference_feature_list = feature_list if ref_feat is None else [ref_feat] | |
self.plot_settings() | |
if input_path is not None: | |
# plot single reference feature compared to values stored in .csvs | |
if isinstance(input_path, str) and input_path.endswith(".csv"): | |
f_d = pd.read_csv(input_path) | |
f_d = {model_params['reference_feature']: f_d} | |
elif isinstance(input_path, list): | |
self.plot_dist_mx(model_params) | |
else: | |
f_d = read_csvs(input_path, model_params['reference_feature']) | |
tasks, _ = get_tasks(model_params['targets'], reference_feature=model_params['reference_feature']) | |
self.plot_reference_feature_plot(tasks, f_d, model_params['reference_feature']) | |
else: | |
# start all plotting procedures at once | |
self.plot_feat_comparison(feature_list, reference_feature_list) | |
def plot_reference_feature_plot(self, orig_targets, f_dict, reference_feature, resolution=10): | |
fig1, axes = plt.subplots(1, len(f_dict), figsize=(20, 4)) | |
if isinstance(axes,Axes): | |
axes = [axes] | |
fig2, axes_mesh = plt.subplots(1, len(f_dict), figsize=(20, 4), layout='compressed') | |
if isinstance(axes_mesh, Axes): | |
axes_mesh = [axes_mesh] | |
for idx_ax, (k, v) in enumerate(f_dict.items()): | |
if isinstance(orig_targets, pd.DataFrame): | |
targets = orig_targets.copy() | |
elif isinstance(orig_targets, defaultdict): | |
if k not in orig_targets: | |
print(f"[WARNING] {k} not in targets. Only in generated features. Will continue with next feature to compare with") | |
continue | |
targets = orig_targets[k].copy() | |
else: | |
print(f"[ERR] Unknown file format for targets {type(orig_targets)}. Close program (Exit Code: 0).") | |
# Identify NAN values of reference feature | |
target_nan_values_idx_reference = np.where(targets[reference_feature].isna())[0] | |
target_nan_logs_reference = targets.loc[target_nan_values_idx_reference]['log'] | |
# Identify NAN values of competitor feature | |
target_nan_values_idx_competitor = np.where(targets[k].isna())[0] | |
target_nan_logs_competitor = targets.loc[target_nan_values_idx_competitor]['log'] | |
# Collection of indices to drop | |
target_nan_indices = np.unique(np.concatenate((target_nan_values_idx_competitor, target_nan_values_idx_reference))) | |
# Drop NAN values in target DataFrame | |
targets.drop(axis='index', index=target_nan_indices, inplace=True) | |
# Check for indices in generated DataFrame | |
reference_values_idx_reference = v[v['log'].isin(list(target_nan_logs_reference))].index | |
reference_values_idx_competitor = v[v['log'].isin(list(target_nan_logs_competitor))].index | |
# Collection of indices to drop for reference | |
reference_nan_indices = np.unique(np.concatenate((reference_values_idx_reference, reference_values_idx_competitor))) | |
# Drop NAN values in generated DataFrame | |
v.drop(axis='index', index=reference_nan_indices, inplace=True) | |
# Plot generated DataFrame + target DataFrame | |
v.plot.scatter(x=v.columns.get_loc(reference_feature), y=v.columns.get_loc(k), ax=axes[idx_ax], c="red", alpha=0.3) | |
targets.plot.scatter(x=targets.columns.get_loc(reference_feature), y=targets.columns.get_loc(k), ax=axes[idx_ax], c='blue', alpha=0.3) | |
Z = np.zeros([resolution+1, resolution+1]) | |
cnt_Z = np.zeros([resolution+1, resolution+1]) | |
Z.fill(np.nan) | |
min_Z_X = np.min(targets[reference_feature]) | |
min_Z_Y = np.min(targets[k]) | |
max_Z_X = np.max(targets[reference_feature]) | |
max_Z_Y = np.max(targets[k]) | |
step_Z_X = np.round((max_Z_X - min_Z_X) / float(resolution), 4) | |
step_Z_Y = np.round((max_Z_Y - min_Z_Y) / float(resolution), 4) | |
cum_sum=0 | |
for idx in v.index: | |
if isinstance(v, pd.DataFrame) and 'log' in v.columns: | |
c_log = v.loc[idx, 'log'] | |
if c_log in targets['log'].values: | |
gen_entry = targets[targets['log'] == c_log] | |
else: | |
print(f"INFO: no value for {c_log} in generated files.") | |
gen_entry = targets | |
else: | |
gen_entry = targets | |
# Plot connection line | |
axes[idx_ax].plot([v[reference_feature][idx], gen_entry[reference_feature].values[0]], | |
[v[k][idx], gen_entry[k].values[0]], | |
c="green", alpha=0.25) | |
# Plot textual annotation | |
axes[idx_ax].annotate(gen_entry['log'].values[0], | |
(gen_entry[reference_feature].values[0], gen_entry[k].values[0]), | |
fontsize=5) | |
# Compute distance between real and generated dot | |
vec1 = np.array([v[reference_feature][idx], v[k][idx]]) | |
vec2 = np.array([gen_entry[reference_feature].values[0], gen_entry[k].values[0]]) | |
Z_idx = int (np.round((gen_entry[reference_feature].values[0] - min_Z_X) / step_Z_X)) | |
Z_idy = int (np.round((gen_entry[k].values[0] - min_Z_Y) / step_Z_Y)) | |
if np.isnan(Z[Z_idx][Z_idy]): | |
Z[Z_idx][Z_idy] = 0.0 | |
Z[Z_idx][Z_idy] += np.linalg.norm(vec1 - vec2) | |
cnt_Z[Z_idx][Z_idy] += 1 | |
cum_sum += np.linalg.norm(vec1 - vec2) | |
print(f"INFO: Cumulated distances objectives <-> generated features for '{reference_feature}' vs. '{k}': {cum_sum:.4f}") | |
X, Y = np.meshgrid(np.linspace(min_Z_X, max_Z_X, resolution+1), | |
np.linspace(min_Z_Y, max_Z_Y, resolution+1)) | |
cmap = plt.colormaps['viridis_r'] | |
Z[np.isnan(Z)] = np.sqrt(2) | |
cnt_Z[cnt_Z==0] = 1 | |
Z /= cnt_Z | |
colormesh = axes_mesh[idx_ax].pcolormesh(X, Y, Z.T, shading='nearest', cmap=cmap) | |
axes_mesh[idx_ax].set_xlabel(reference_feature) | |
axes_mesh[idx_ax].set_ylabel(k) | |
if idx_ax == (len(f_dict)-1): | |
cbar = fig2.colorbar(colormesh, ax=axes_mesh, orientation='vertical', pad=0.01) | |
cbar.ax.set_ylabel('Feature dist. of generated EDs and objectives',fontsize=8, rotation=90, labelpad=-50) | |
axes[idx_ax].set_title(f"Cumulated distances {cum_sum:.4f}") | |
tasks_keys = f_dict.keys() | |
tasks_keys = list(sorted(tasks_keys)) | |
abbreviations = get_keys_abbreviation(tasks_keys) | |
ref_short_name = get_keys_abbreviation([reference_feature]) | |
fig1_title = f'Feature Comparison - {reference_feature}' | |
fig1.suptitle(fig1_title, fontsize=6) | |
fig1.tight_layout() | |
distance_plot_path = os.path.join(self.output_path, | |
f"plot_genEL{len(self.gen)}_tasks{len(tasks_keys)}_{ref_short_name}_vs_{abbreviations}.png") | |
fig1.savefig(distance_plot_path) | |
print(f"Saved objectives vs. genEL features plot in {distance_plot_path}") | |
fig2.suptitle(f'Meshgrid Comparison - {reference_feature}', fontsize=6) | |
meshgrid_plot_path = os.path.join(self.output_path, | |
f"plot_meshgrid_genEL{len(self.gen)}_tasks{len(tasks_keys)}_{ref_short_name}_vs_{abbreviations}.png") | |
fig2.savefig(meshgrid_plot_path) | |
print(f"Saved meshgrid plot in {meshgrid_plot_path}") | |
def plot_single_comparison(self, tasks, objective1, objective2, ax, ax_cmesh, fig2, axes_meshes, flag_plt_clbar): | |
if len(tasks.select_dtypes(include=['object']).columns)==0: | |
tasks['task']=[f"task_{str(x+1)}" for x in tasks.index.values.tolist()] | |
id_col = tasks.select_dtypes(include=['object']).dropna(axis=1).columns[0] | |
tasks.plot.scatter(x=objective1, y=objective2, ax=ax, alpha=0.3) | |
self.gen.plot.scatter(x=objective1, y=objective2, c="red", ax=ax, alpha=0.3) | |
Z = np.zeros([tasks[objective1].unique().size, tasks[objective2].unique().size]) | |
cnt_Z = np.zeros([tasks[objective1].unique().size, tasks[objective2].unique().size]) | |
Z.fill(np.inf) | |
cum_sum = 0 | |
for idx in tasks.index: | |
if isinstance(tasks, pd.DataFrame) and 'log' in tasks.columns: | |
c_log = tasks.loc[idx, 'log'] | |
if c_log in self.gen['log'].values: | |
gen_entry = self.gen[self.gen['log'] == c_log] | |
else: | |
print(f"INFO: no value for {c_log} in generated files.") | |
gen_entry = self.gen | |
else: | |
gen_entry = self.gen | |
ax.plot([tasks[objective1][idx], gen_entry[objective1].values[0]], | |
[tasks[objective2][idx], gen_entry[objective2].values[0]], | |
c="green", alpha=0.25) | |
ax.annotate(tasks[id_col][idx], (tasks[objective1][idx], tasks[objective2][idx]), fontsize=5) | |
vec1 = np.array([tasks[objective1][idx], tasks[objective2][idx]]) | |
vec2 = np.array([gen_entry[objective1].values[0], gen_entry[objective2].values[0]]) | |
Z_idx = np.where(tasks[objective1].unique() == tasks[objective1][idx])[0][0] | |
Z_idy = np.where(tasks[objective2].unique() == tasks[objective2][idx])[0][0] | |
if np.isinf(Z[Z_idx][Z_idy]): | |
Z[Z_idx][Z_idy] = 0.0 | |
Z[Z_idx][Z_idy] += np.linalg.norm(vec1 - vec2) | |
cnt_Z[Z_idx][Z_idy] += 1 | |
cum_sum += np.linalg.norm(vec1 - vec2) | |
print(f"INFO: Cumulated distances objectives <-> generated features for '{objective1}' vs. '{objective2}':", cum_sum) | |
ax.set_title(f"Cumulated distances {cum_sum:.4f}") | |
X, Y = np.meshgrid(tasks[objective1].unique(), tasks[objective2].unique()) | |
cmap = plt.colormaps['viridis_r'] | |
Z[np.isinf(Z)] = np.sqrt(2) | |
cnt_Z[cnt_Z==0] = 1 | |
Z /= cnt_Z | |
colormesh = ax_cmesh.pcolormesh(X, Y, Z.T, shading='nearest', cmap=cmap) # vmin=0.0, vmax=1.0, cmap=cmap) | |
ax_cmesh.set_xlabel(objective1) | |
ax_cmesh.set_ylabel(objective2) | |
if flag_plt_clbar: | |
fig2.colorbar(colormesh, ax=axes_meshes, orientation='vertical') | |
return colormesh | |
def plot_settings(self): | |
mpl.rc('axes', titlesize=8) # fontsize of the axes title | |
mpl.rc('axes', labelsize=8) # fontsize of the x and y labels | |
mpl.rc('font', size=8) | |
def plot_feat_comparison(self, feature_list, reference_list): | |
len_features = len(feature_list) | |
len_ref_feats = len(reference_list) | |
fig1, axes = plt.subplots(len_ref_feats, len_features) | |
fig2, axes_meshes = plt.subplots(len_ref_feats, len_features, layout='compressed') | |
for idx1, entry1 in enumerate(reference_list): | |
for idx2, entry2 in enumerate(feature_list): | |
if isinstance(axes, Axes): | |
ax = axes | |
ax_cmesh = axes_meshes | |
elif len_ref_feats == 1: | |
ax = axes[idx2] | |
ax_cmesh = axes_meshes[idx2] | |
else: | |
ax = axes[idx1][idx2] | |
ax_cmesh = axes_meshes[idx1][idx2] | |
flag_plt_clbar = False | |
if ((idx2 == (len(feature_list)-1)) & (idx1 == len(reference_list)-1)): | |
flag_plt_clbar = True | |
colormesh = self.plot_single_comparison(self.tasks, entry1, entry2, ax, ax_cmesh, fig2, axes_meshes, flag_plt_clbar) | |
objectives_keys = self.tasks.select_dtypes(exclude=['object']).columns | |
objectives_keys = list(sorted(objectives_keys)) | |
abbreviations = get_keys_abbreviation(objectives_keys) | |
fig1_title = f'Feature Comparison with {self.model_params[GENERATOR_PARAMS]}' | |
fig1.suptitle(insert_newlines(fig1_title), fontsize=6) | |
fig1.tight_layout() | |
distance_plot_path = os.path.join(self.output_path, | |
f"eval_genEL{len(self.gen)}_objectives{len(objectives_keys)}_trials{self.model_params['generator_params']['n_trials']}_{abbreviations}.png") | |
os.makedirs(self.output_path, exist_ok=True) | |
fig1.savefig(distance_plot_path) | |
print(f"Saved objectives vs. genEL features plot in {distance_plot_path}") | |
# fig2.suptitle('Meshgrid Comparison', fontsize=12) | |
meshgrid_plot_path = os.path.join(self.output_path, | |
f"meshgrid_genEL{len(self.gen)}_objectives{len(objectives_keys)}_trials{self.model_params['generator_params']['n_trials']}_{abbreviations}.png") | |
fig2.savefig(meshgrid_plot_path) | |
print(f"Saved meshgrid plot in {meshgrid_plot_path}") | |
def plot_dist_mx (self, model_params): | |
gen_dict = defaultdict(lambda: defaultdict(dict)) | |
targets_dict = defaultdict(lambda: defaultdict(dict)) | |
set_ = set() | |
for in_file in self.input_path: | |
for file in glob.glob(f'{in_file}*.csv'): | |
read_in = pd.read_csv(file) | |
feat1, feat2 = None, None | |
if len(read_in.columns) == 2: | |
feat1 = read_in.columns[0] | |
feat2 = feat1 | |
else: | |
feat1 = read_in.columns[0] | |
feat2 = read_in.columns[1] | |
read_in['fn'] = file | |
gen_dict[feat1][feat2] = read_in | |
set_.add(feat1) | |
set_.add(feat2) | |
for target_file in model_params["targets"]: | |
for file in glob.glob(f'{target_file}*.csv'): | |
read_in = pd.read_csv(file) | |
if 'task' in read_in.columns: | |
read_in.rename(columns={"task":"log"}, inplace=True) | |
feat1, feat2 = None, None | |
if len(read_in.columns) == 2: | |
feat1 = read_in.columns[1] | |
feat2 = feat1 | |
else: | |
feat1 = read_in.columns[1] | |
feat2 = read_in.columns[2] | |
read_in['fn'] = file | |
targets_dict[feat1][feat2] = read_in | |
set_.add(feat1) | |
set_.add(feat2) | |
keys = sorted(list(set_)) | |
result_df = pd.DataFrame(index=keys, columns=keys) | |
dist_list = list() | |
for gen_idx, (gen_obj1_key, gen_obj1_vals) in enumerate(gen_dict.items()): | |
if gen_obj1_key not in targets_dict: | |
continue | |
for gen_obj1_value in gen_obj1_vals: | |
if gen_obj1_value not in targets_dict[gen_obj1_key]: | |
continue | |
gen_df = gen_dict[gen_obj1_key][gen_obj1_value] | |
target_df = targets_dict[gen_obj1_key][gen_obj1_value] | |
cnt = 0 | |
cum_sum = 0 | |
for i in gen_df.index: | |
current_log_name = gen_df.loc[i, 'log'] | |
if current_log_name in target_df['log'].values: | |
target_entry = target_df[target_df['log'] == current_log_name] | |
else: | |
print (f"[INFO] no value found for {current_log_name} in target file") | |
vec1 = np.array([gen_df[gen_obj1_key][i], gen_df[gen_obj1_value][i]]) | |
vec2 = np.array([target_entry[gen_obj1_key].values[0], target_entry[gen_obj1_value].values[0]]) | |
cum_sum += np.linalg.norm(vec1 - vec2) | |
cnt += 1 | |
THRESHOLD=0.1 | |
if np.linalg.norm(vec1 - vec2) < THRESHOLD and len(gen_df.columns)>3:#3 for 1 objective | |
path_splits = gen_df.loc[i, 'fn'].split("/") | |
data_splits = path_splits[-1][:-4].split("_") | |
log_path= f'grid_2objectives_{data_splits[1]}_{data_splits[2]}/2_{data_splits[1]}_{data_splits[2]}/genEL{current_log_name}_*.xes' | |
dest, len_is = select_instance(in_file.replace("features/", ""), log_path) | |
dist_list.append(np.linalg.norm(vec1 - vec2)) | |
cum_sum /= cnt | |
result_df.loc[gen_obj1_key, gen_obj1_value] = cum_sum | |
result_df.loc[gen_obj1_value, gen_obj1_key] = cum_sum | |
try: | |
print(f"INFO: Instance selection saved {len_is} ED selection in {dest}") | |
except UnboundLocalError as e: | |
print(e) | |
ratio_most_common_variant = 2.021278 / 11.0 | |
ratio_top_10_variants = 0.07378 / 11.0 | |
ratio_variants_per_number_of_traces = 0.016658 / 11.0 | |
result_df['ratio_most_common_variant']['ratio_most_common_variant'] = ratio_most_common_variant | |
result_df['ratio_top_10_variants']['ratio_top_10_variants'] = ratio_top_10_variants | |
result_df['ratio_variants_per_number_of_traces']['ratio_variants_per_number_of_traces'] = ratio_variants_per_number_of_traces | |
abbrvs_key = get_keys_abbreviation(keys) | |
result_df.columns = abbrvs_key.split("_") | |
result_df.index = abbrvs_key.split("_") | |
# result__mx = result_df.values.astype(np.float16) | |
# result__mx[np.isnan(result__mx)] = 0 | |
img = sns.heatmap(result_df.astype(np.float16),annot=True, cmap="viridis_r", vmin=0.0, vmax=1.0) | |
# plt.xticks(rotation=45) | |
plt.yticks(rotation=0) | |
plt.tight_layout() | |
plt.savefig(os.path.join(self.output_path, f"dist_mx_{abbrvs_key}")) | |
plt.show() | |
fig = plt.figure() | |
sns.histplot(data=pd.DataFrame(dist_list), x=0, bins=30) | |
fig.savefig(os.path.join(self.output_path, f"dist_histogram")) | |