Spaces:

andreamalhera
/

igedi

Running

igedi / gedi /analyser.py

Andrea Maldonado

Refactoring tag to gedi

99bcc04 11 months ago

5.69 kB

	import numpy as np
	import warnings

	from sklearn.decomposition import FastICA, PCA
	from sklearn.manifold import TSNE
	from sklearn.preprocessing import Normalizer, StandardScaler
	from gedi.features import EventLogFeatures
	from gedi.plotter import ModelResultPlotter
	from gedi.utils.matrix_tools import insert_missing_data
	# TODO: Call param_keys explicitly e.g. import INPUT_PATH
	from utils.param_keys import *
	from utils.param_keys.analyser import MODEL, INPUT_PARAMS, PERPLEXITY


	# FUDO: Use this class to compare models during evaluation
	class FeatureAnalyser:
	def __init__(self, features, params=None):
	self.features: EventLogFeatures = features
	self.params: dict = {
	PLOT_TYPE: params.get(PLOT_TYPE, COLOR_MAP),
	PLOT_TICS: params.get(PLOT_TICS, True),
	INTERACTIVE: params.get(INTERACTIVE, True),
	N_COMPONENTS: params.get(N_COMPONENTS, 2),
	PERPLEXITY: params.get(PERPLEXITY, 3)
	}
	def compare(self, model_parameter_list: list[dict], plot_results: bool = True) -> list[dict]:
	"""
	:param model_parameter_list: list[dict]
	Different model input parameters, saved in a list
	:param plot_results: bool
	Plots the components of the different models (default: True)
	The function can be calculated
	:return: list[dict]
	The results of the models {MODEL, PROJECTION, EXPLAINED_VAR, INPUT_PARAMS}
	"""
	model_results = []
	for model_parameters in model_parameter_list:
	try:
	model_results.append(self.get_model_result(model_parameters))
	except np.linalg.LinAlgError as e:
	warnings.warn(f'Eigenvalue decomposition for model `{model_parameters}` could not be calculated:\n {e}')
	except AssertionError as e:
	warnings.warn(f'{e}')

	if plot_results:
	self.compare_with_plot(model_results)

	return model_results

	def compare_with_plot(self, model_results_list):
	"""
	This method is used to compare the results in a plot, after fit_transforming different models.
	@param model_results_list: list[dict]
	Different model input parameters, saved in a list.
	"""
	ModelResultPlotter().plot_models(
	model_results_list,
	plot_type=self.params[PLOT_TYPE],
	plot_tics=self.params[PLOT_TICS],
	components=self.params[N_COMPONENTS]
	)

	def get_model_result(self, model_parameters: dict, log: bool = True) -> dict:
	"""
	Returns a dict of all the important result values. Used for analysing the different models
	:param model_parameters: dict
	The input parameters for the model
	:param log: bool
	Enables the log output while running the program (default: True)
	:return: dict of the results: {MODEL, PROJECTION, EXPLAINED_VAR, INPUT_PARAMS}
	"""
	model, projection = self.get_model_and_projection(model_parameters, log=log)
	try:
	ex_var = model.explained_variance_ratio_
	except AttributeError as e:
	warnings.warn(str(e))
	ex_var = 0
	return {MODEL: model, PROJECTION: projection, EXPLAINED_VAR: ex_var, INPUT_PARAMS: model_parameters}

	def get_model_and_projection(self, model_parameters: dict, inp: np.ndarray = None, log: bool = True):
	"""
	This method is fitting a model with the given parameters :model_parameters: and
	the inp(ut) data is transformed on the model.
	@param model_parameters: dict
	The input parameters for the model.
	@param inp: np.ndarray
	Input data for the model (optional), (default: None -> calculated on the basis of the model_parameters)
	@param log: bool
	Enables the log output while running the program (default: True)
	@return: fitted model and transformed data
	"""
	if log:
	print(f'Running {model_parameters}...')

	if inp is None:
	inp = insert_missing_data(self.features.feat)

	if ALGORITHM_NAME not in model_parameters.keys():
	raise KeyError(f'{ALGORITHM_NAME} is a mandatory model parameter.')

	if model_parameters[ALGORITHM_NAME].startswith('normalized'):
	inp = Normalizer(norm="l2").fit_transform(inp)
	elif model_parameters[ALGORITHM_NAME].startswith('std_scaled'):
	scaler = StandardScaler()
	inp = scaler.fit_transform(inp)
	try:
	if 'pca' in model_parameters[ALGORITHM_NAME]:
	# from sklearn.decomposition import PCA
	pca = PCA(n_components=self.params[N_COMPONENTS])
	# pca = coor.pca(data=inp, dim=self.params[N_COMPONENTS])
	return pca, pca.fit_transform(inp)
	elif 'tsne' in model_parameters[ALGORITHM_NAME]:
	tsne = TSNE(n_components=self.params[N_COMPONENTS], learning_rate='auto',
	init='random', perplexity=self.params[PERPLEXITY])
	return tsne, tsne.fit_transform(inp)
	#elif model_parameters[ALGORITHM_NAME] == 'original_ica':
	# ica = FastICA(n_components=self.params[N_COMPONENTS])
	# return ica, ica.fit_transform(inp)
	else:
	warnings.warn(f'No original algorithm was found with name: {model_parameters[ALGORITHM_NAME]}')
	except TypeError:
	raise TypeError(f'Input data of the function is not correct. '
	f'Original algorithms take only 2-n-dimensional ndarray')