Spaces:
Running
Running
File size: 5,689 Bytes
bdf9096 99bcc04 bdf9096 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import numpy as np
import warnings
from sklearn.decomposition import FastICA, PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import Normalizer, StandardScaler
from gedi.features import EventLogFeatures
from gedi.plotter import ModelResultPlotter
from gedi.utils.matrix_tools import insert_missing_data
# TODO: Call param_keys explicitly e.g. import INPUT_PATH
from utils.param_keys import *
from utils.param_keys.analyser import MODEL, INPUT_PARAMS, PERPLEXITY
# FUDO: Use this class to compare models during evaluation
class FeatureAnalyser:
def __init__(self, features, params=None):
self.features: EventLogFeatures = features
self.params: dict = {
PLOT_TYPE: params.get(PLOT_TYPE, COLOR_MAP),
PLOT_TICS: params.get(PLOT_TICS, True),
INTERACTIVE: params.get(INTERACTIVE, True),
N_COMPONENTS: params.get(N_COMPONENTS, 2),
PERPLEXITY: params.get(PERPLEXITY, 3)
}
def compare(self, model_parameter_list: list[dict], plot_results: bool = True) -> list[dict]:
"""
:param model_parameter_list: list[dict]
Different model input parameters, saved in a list
:param plot_results: bool
Plots the components of the different models (default: True)
The function can be calculated
:return: list[dict]
The results of the models {MODEL, PROJECTION, EXPLAINED_VAR, INPUT_PARAMS}
"""
model_results = []
for model_parameters in model_parameter_list:
try:
model_results.append(self.get_model_result(model_parameters))
except np.linalg.LinAlgError as e:
warnings.warn(f'Eigenvalue decomposition for model `{model_parameters}` could not be calculated:\n {e}')
except AssertionError as e:
warnings.warn(f'{e}')
if plot_results:
self.compare_with_plot(model_results)
return model_results
def compare_with_plot(self, model_results_list):
"""
This method is used to compare the results in a plot, after fit_transforming different models.
@param model_results_list: list[dict]
Different model input parameters, saved in a list.
"""
ModelResultPlotter().plot_models(
model_results_list,
plot_type=self.params[PLOT_TYPE],
plot_tics=self.params[PLOT_TICS],
components=self.params[N_COMPONENTS]
)
def get_model_result(self, model_parameters: dict, log: bool = True) -> dict:
"""
Returns a dict of all the important result values. Used for analysing the different models
:param model_parameters: dict
The input parameters for the model
:param log: bool
Enables the log output while running the program (default: True)
:return: dict of the results: {MODEL, PROJECTION, EXPLAINED_VAR, INPUT_PARAMS}
"""
model, projection = self.get_model_and_projection(model_parameters, log=log)
try:
ex_var = model.explained_variance_ratio_
except AttributeError as e:
warnings.warn(str(e))
ex_var = 0
return {MODEL: model, PROJECTION: projection, EXPLAINED_VAR: ex_var, INPUT_PARAMS: model_parameters}
def get_model_and_projection(self, model_parameters: dict, inp: np.ndarray = None, log: bool = True):
"""
This method is fitting a model with the given parameters :model_parameters: and
the inp(ut) data is transformed on the model.
@param model_parameters: dict
The input parameters for the model.
@param inp: np.ndarray
Input data for the model (optional), (default: None -> calculated on the basis of the model_parameters)
@param log: bool
Enables the log output while running the program (default: True)
@return: fitted model and transformed data
"""
if log:
print(f'Running {model_parameters}...')
if inp is None:
inp = insert_missing_data(self.features.feat)
if ALGORITHM_NAME not in model_parameters.keys():
raise KeyError(f'{ALGORITHM_NAME} is a mandatory model parameter.')
if model_parameters[ALGORITHM_NAME].startswith('normalized'):
inp = Normalizer(norm="l2").fit_transform(inp)
elif model_parameters[ALGORITHM_NAME].startswith('std_scaled'):
scaler = StandardScaler()
inp = scaler.fit_transform(inp)
try:
if 'pca' in model_parameters[ALGORITHM_NAME]:
# from sklearn.decomposition import PCA
pca = PCA(n_components=self.params[N_COMPONENTS])
# pca = coor.pca(data=inp, dim=self.params[N_COMPONENTS])
return pca, pca.fit_transform(inp)
elif 'tsne' in model_parameters[ALGORITHM_NAME]:
tsne = TSNE(n_components=self.params[N_COMPONENTS], learning_rate='auto',
init='random', perplexity=self.params[PERPLEXITY])
return tsne, tsne.fit_transform(inp)
#elif model_parameters[ALGORITHM_NAME] == 'original_ica':
# ica = FastICA(n_components=self.params[N_COMPONENTS])
# return ica, ica.fit_transform(inp)
else:
warnings.warn(f'No original algorithm was found with name: {model_parameters[ALGORITHM_NAME]}')
except TypeError:
raise TypeError(f'Input data of the function is not correct. '
f'Original algorithms take only 2-n-dimensional ndarray')
|