igedi / gedi /analyser.py
Andrea Maldonado
Refactoring tag to gedi
99bcc04
raw
history blame
5.69 kB
import numpy as np
import warnings
from sklearn.decomposition import FastICA, PCA
from sklearn.manifold import TSNE
from sklearn.preprocessing import Normalizer, StandardScaler
from gedi.features import EventLogFeatures
from gedi.plotter import ModelResultPlotter
from gedi.utils.matrix_tools import insert_missing_data
# TODO: Call param_keys explicitly e.g. import INPUT_PATH
from utils.param_keys import *
from utils.param_keys.analyser import MODEL, INPUT_PARAMS, PERPLEXITY
# FUDO: Use this class to compare models during evaluation
class FeatureAnalyser:
def __init__(self, features, params=None):
self.features: EventLogFeatures = features
self.params: dict = {
PLOT_TYPE: params.get(PLOT_TYPE, COLOR_MAP),
PLOT_TICS: params.get(PLOT_TICS, True),
INTERACTIVE: params.get(INTERACTIVE, True),
N_COMPONENTS: params.get(N_COMPONENTS, 2),
PERPLEXITY: params.get(PERPLEXITY, 3)
}
def compare(self, model_parameter_list: list[dict], plot_results: bool = True) -> list[dict]:
"""
:param model_parameter_list: list[dict]
Different model input parameters, saved in a list
:param plot_results: bool
Plots the components of the different models (default: True)
The function can be calculated
:return: list[dict]
The results of the models {MODEL, PROJECTION, EXPLAINED_VAR, INPUT_PARAMS}
"""
model_results = []
for model_parameters in model_parameter_list:
try:
model_results.append(self.get_model_result(model_parameters))
except np.linalg.LinAlgError as e:
warnings.warn(f'Eigenvalue decomposition for model `{model_parameters}` could not be calculated:\n {e}')
except AssertionError as e:
warnings.warn(f'{e}')
if plot_results:
self.compare_with_plot(model_results)
return model_results
def compare_with_plot(self, model_results_list):
"""
This method is used to compare the results in a plot, after fit_transforming different models.
@param model_results_list: list[dict]
Different model input parameters, saved in a list.
"""
ModelResultPlotter().plot_models(
model_results_list,
plot_type=self.params[PLOT_TYPE],
plot_tics=self.params[PLOT_TICS],
components=self.params[N_COMPONENTS]
)
def get_model_result(self, model_parameters: dict, log: bool = True) -> dict:
"""
Returns a dict of all the important result values. Used for analysing the different models
:param model_parameters: dict
The input parameters for the model
:param log: bool
Enables the log output while running the program (default: True)
:return: dict of the results: {MODEL, PROJECTION, EXPLAINED_VAR, INPUT_PARAMS}
"""
model, projection = self.get_model_and_projection(model_parameters, log=log)
try:
ex_var = model.explained_variance_ratio_
except AttributeError as e:
warnings.warn(str(e))
ex_var = 0
return {MODEL: model, PROJECTION: projection, EXPLAINED_VAR: ex_var, INPUT_PARAMS: model_parameters}
def get_model_and_projection(self, model_parameters: dict, inp: np.ndarray = None, log: bool = True):
"""
This method is fitting a model with the given parameters :model_parameters: and
the inp(ut) data is transformed on the model.
@param model_parameters: dict
The input parameters for the model.
@param inp: np.ndarray
Input data for the model (optional), (default: None -> calculated on the basis of the model_parameters)
@param log: bool
Enables the log output while running the program (default: True)
@return: fitted model and transformed data
"""
if log:
print(f'Running {model_parameters}...')
if inp is None:
inp = insert_missing_data(self.features.feat)
if ALGORITHM_NAME not in model_parameters.keys():
raise KeyError(f'{ALGORITHM_NAME} is a mandatory model parameter.')
if model_parameters[ALGORITHM_NAME].startswith('normalized'):
inp = Normalizer(norm="l2").fit_transform(inp)
elif model_parameters[ALGORITHM_NAME].startswith('std_scaled'):
scaler = StandardScaler()
inp = scaler.fit_transform(inp)
try:
if 'pca' in model_parameters[ALGORITHM_NAME]:
# from sklearn.decomposition import PCA
pca = PCA(n_components=self.params[N_COMPONENTS])
# pca = coor.pca(data=inp, dim=self.params[N_COMPONENTS])
return pca, pca.fit_transform(inp)
elif 'tsne' in model_parameters[ALGORITHM_NAME]:
tsne = TSNE(n_components=self.params[N_COMPONENTS], learning_rate='auto',
init='random', perplexity=self.params[PERPLEXITY])
return tsne, tsne.fit_transform(inp)
#elif model_parameters[ALGORITHM_NAME] == 'original_ica':
# ica = FastICA(n_components=self.params[N_COMPONENTS])
# return ica, ica.fit_transform(inp)
else:
warnings.warn(f'No original algorithm was found with name: {model_parameters[ALGORITHM_NAME]}')
except TypeError:
raise TypeError(f'Input data of the function is not correct. '
f'Original algorithms take only 2-n-dimensional ndarray')