Spaces:

linpershey
/

process_mining

Running

process_mining / pm4py /algo /conformance /log_skeleton /variants /classic.py

kota

initial commit

e60e568 11 months ago

12.8 kB

	'''
	This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).

	PM4Py is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	PM4Py is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
	'''
	from pm4py.objects.log.util import xes
	from pm4py.algo.discovery.log_skeleton import trace_skel
	from pm4py.util import xes_constants
	from pm4py.util import variants_util, pandas_utils
	from pm4py.util import exec_utils
	from typing import Optional, Dict, Any, Union, List, Set
	from pm4py.objects.log.obj import EventLog, Trace
	import pandas as pd

	from enum import Enum
	from pm4py.util.constants import PARAMETER_CONSTANT_ACTIVITY_KEY, PARAMETER_CONSTANT_CASEID_KEY, CASE_CONCEPT_NAME


	class Parameters(Enum):
	# parameter for the noise threshold
	NOISE_THRESHOLD = "noise_threshold"
	# considered constraints in conformance checking among: equivalence, always_after, always_before, never_together, directly_follows, activ_freq
	CONSIDERED_CONSTRAINTS = "considered_constraints"
	# default choice for conformance checking
	DEFAULT_CONSIDERED_CONSTRAINTS = ["equivalence", "always_after", "always_before", "never_together",
	"directly_follows", "activ_freq"]
	CASE_ID_KEY = PARAMETER_CONSTANT_CASEID_KEY
	ACTIVITY_KEY = PARAMETER_CONSTANT_ACTIVITY_KEY
	PARAMETER_VARIANT_DELIMITER = "variant_delimiter"


	NOISE_THRESHOLD = Parameters.NOISE_THRESHOLD
	CONSIDERED_CONSTRAINTS = Parameters.CONSIDERED_CONSTRAINTS
	DEFAULT_CONSIDERED_CONSTRAINTS = Parameters.DEFAULT_CONSIDERED_CONSTRAINTS
	ACTIVITY_KEY = Parameters.ACTIVITY_KEY
	PARAMETER_VARIANT_DELIMITER = Parameters.PARAMETER_VARIANT_DELIMITER


	class DiscoveryOutputs(Enum):
	EQUIVALENCE = "equivalence"
	ALWAYS_AFTER = "always_after"
	ALWAYS_BEFORE = "always_before"
	NEVER_TOGETHER = "never_together"
	DIRECTLY_FOLLOWS = "directly_follows"
	ACTIV_FREQ = "activ_freq"


	class Outputs(Enum):
	DEVIATIONS = "deviations"
	NO_DEV_TOTAL = "no_dev_total"
	NO_CONSTR_TOTAL = "no_constr_total"
	DEV_FITNESS = "dev_fitness"
	IS_FIT = "is_fit"


	def apply_log(log: Union[EventLog, pd.DataFrame], model: Dict[str, Any], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> List[Set[Any]]:
	"""
	Apply log-skeleton based conformance checking given an event log
	and a log-skeleton model

	Parameters
	--------------
	log
	Event log
	model
	Log-skeleton model
	parameters
	Parameters of the algorithm, including:
	- Parameters.ACTIVITY_KEY
	- Parameters.CONSIDERED_CONSTRAINTS, among: equivalence, always_after, always_before, never_together, directly_follows, activ_freq

	Returns
	--------------
	aligned_traces
	Conformance checking results for each trace:
	- Outputs.IS_FIT => boolean that tells if the trace is perfectly fit according to the model
	- Outputs.DEV_FITNESS => deviation based fitness (between 0 and 1; the more the trace is near to 1 the more fit is)
	- Outputs.DEVIATIONS => list of deviations in the model
	"""
	if parameters is None:
	parameters = {}

	activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY)

	if pandas_utils.check_is_pandas_dataframe(log):
	case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, CASE_CONCEPT_NAME)
	traces = [tuple(x) for x in log.groupby(case_id_key)[activity_key].agg(list).to_dict().values()]
	else:
	traces = [tuple(y[activity_key] for y in x) for x in log]
	grouped_traces = {}
	gtk = []
	inv_idxs = {}
	for i in range(len(traces)):
	tr = traces[i]
	if not tr in grouped_traces:
	grouped_traces[tr] = []
	gtk.append(tr)
	grouped_traces[tr].append(i)
	inv_idxs[i] = gtk.index(tr)

	res0 = []
	for trace in grouped_traces:
	res0.append(apply_actlist(trace, model, parameters=parameters))

	res = []
	for i in range(len(traces)):
	res.append(res0[inv_idxs[i]])

	return res


	def apply_trace(trace: Trace, model: Dict[str, Any], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> List[Set[Any]]:
	"""
	Apply log-skeleton based conformance checking given a trace
	and a log-skeleton model

	Parameters
	--------------
	trace
	Trace
	model
	Log-skeleton model
	parameters
	Parameters of the algorithm, including:
	- the activity key (pm4py:param:activity_key)
	- the list of considered constraints (considered_constraints) among: equivalence, always_after, always_before, never_together, directly_follows, activ_freq

	Returns
	--------------
	aligned_trace
	Containing:
	- is_fit => boolean that tells if the trace is perfectly fit according to the model
	- dev_fitness => deviation based fitness (between 0 and 1; the more the trace is near to 1 the more fit is)
	- deviations => list of deviations in the model
	"""
	if parameters is None:
	parameters = {}

	activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY)
	trace = [x[activity_key] for x in trace]

	return apply_actlist(trace, model, parameters=parameters)


	def apply_actlist(trace, model, parameters=None):
	"""
	Apply log-skeleton based conformance checking given the list of activities of a trace
	and a log-skeleton model

	Parameters
	--------------
	trace
	List of activities of a trace
	model
	Log-skeleton model
	parameters
	Parameters of the algorithm, including:
	- the activity key (pm4py:param:activity_key)
	- the list of considered constraints (considered_constraints) among: equivalence, always_after, always_before, never_together, directly_follows, activ_freq

	Returns
	--------------
	aligned_trace
	Containing:
	- is_fit => boolean that tells if the trace is perfectly fit according to the model
	- dev_fitness => deviation based fitness (between 0 and 1; the more the trace is near to 1 the more fit is)
	- deviations => list of deviations in the model
	"""
	if parameters is None:
	parameters = {}

	consid_constraints = exec_utils.get_param_value(Parameters.CONSIDERED_CONSTRAINTS, parameters, Parameters.DEFAULT_CONSIDERED_CONSTRAINTS.value)
	trace_info = trace_skel.get_trace_info(trace)

	ret = {}
	ret[Outputs.DEVIATIONS.value] = []
	dev_total = 0
	conf_total = 0

	default_considered_constraints = Parameters.DEFAULT_CONSIDERED_CONSTRAINTS.value

	i = 0
	while i < len(default_considered_constraints):
	if default_considered_constraints[i] in consid_constraints:
	if default_considered_constraints[i] == DiscoveryOutputs.ACTIV_FREQ.value:
	this_constraints = {x: y for x, y in model[default_considered_constraints[i]].items()}
	conf_total += len(list(act for act in trace_info[i] if act in this_constraints)) + len(list(act for act in trace_info[i] if act not in this_constraints)) + len(list(act for act in this_constraints if min(this_constraints[act]) > 0 and not act in trace))
	for act in trace_info[i]:
	if act in this_constraints:
	if trace_info[i][act] not in this_constraints[act]:
	dev_total += 1
	ret[Outputs.DEVIATIONS.value].append((default_considered_constraints[i], (act, trace_info[i][act])))
	else:
	dev_total += 1
	ret[Outputs.DEVIATIONS.value].append((default_considered_constraints[i], (act, 0)))
	for act in this_constraints:
	if min(this_constraints[act]) > 0 and not act in trace:
	dev_total += 1
	ret[Outputs.DEVIATIONS.value].append((default_considered_constraints[i], (act, 0)))
	elif default_considered_constraints[i] == DiscoveryOutputs.NEVER_TOGETHER.value:
	this_constraints = {x for x in model[default_considered_constraints[i]] if x[0] in trace}
	conf_total += len(this_constraints)
	setinte = this_constraints.intersection(trace_info[i])
	dev_total += len(setinte)
	if len(setinte) > 0:
	ret[Outputs.DEVIATIONS.value].append((default_considered_constraints[i], tuple(setinte)))
	else:
	this_constraints = {x for x in model[default_considered_constraints[i]] if x[0] in trace}
	conf_total += len(this_constraints)
	setdiff = this_constraints.difference(trace_info[i])
	dev_total += len(setdiff)
	if len(setdiff) > 0:
	ret[Outputs.DEVIATIONS.value].append((default_considered_constraints[i], tuple(setdiff)))
	i = i + 1
	ret[Outputs.NO_DEV_TOTAL.value] = dev_total
	ret[Outputs.NO_CONSTR_TOTAL.value] = conf_total
	ret[Outputs.DEV_FITNESS.value] = 1.0 - float(dev_total)/float(conf_total) if conf_total > 0 else 1.0
	ret[Outputs.DEVIATIONS.value] = sorted(ret[Outputs.DEVIATIONS.value], key=lambda x: (x[0], x[1]))
	ret[Outputs.IS_FIT.value] = len(ret[Outputs.DEVIATIONS.value]) == 0
	return ret


	def apply_from_variants_list(var_list, model, parameters=None):
	"""
	Performs conformance checking using the log skeleton,
	applying it from a list of variants

	Parameters
	--------------
	var_list
	List of variants
	model
	Log skeleton model
	parameters
	Parameters

	Returns
	--------------
	conformance_dictio
	Dictionary containing, for each variant, the result
	of log skeleton checking
	"""
	if parameters is None:
	parameters = {}

	conformance_output = {}

	for cv in var_list:
	v = cv[0]
	trace = variants_util.variant_to_trace(v, parameters=parameters)

	conformance_output[v] = apply_trace(trace, model, parameters=parameters)

	return conformance_output


	def after_decode(log_skeleton):
	"""
	Prepares the log skeleton after decoding

	Parameters
	--------------
	log_skeleton
	Log skeleton

	Returns
	--------------
	log_skeleton
	Log skeleton (with sets instead of lists)
	"""
	log_skeleton[DiscoveryOutputs.EQUIVALENCE.value] = set(log_skeleton[DiscoveryOutputs.EQUIVALENCE.value])
	log_skeleton[DiscoveryOutputs.ALWAYS_AFTER.value] = set(log_skeleton[DiscoveryOutputs.ALWAYS_AFTER.value])
	log_skeleton[DiscoveryOutputs.ALWAYS_BEFORE.value] = set(log_skeleton[DiscoveryOutputs.ALWAYS_BEFORE.value])
	log_skeleton[DiscoveryOutputs.NEVER_TOGETHER.value] = set(log_skeleton[DiscoveryOutputs.NEVER_TOGETHER.value])
	log_skeleton[DiscoveryOutputs.DIRECTLY_FOLLOWS.value] = set(log_skeleton[DiscoveryOutputs.DIRECTLY_FOLLOWS.value])
	for act in log_skeleton[DiscoveryOutputs.ACTIV_FREQ.value]:
	log_skeleton[DiscoveryOutputs.ACTIV_FREQ.value][act] = set(log_skeleton[DiscoveryOutputs.ACTIV_FREQ.value][act])
	return log_skeleton


	def get_diagnostics_dataframe(log, conf_result, parameters=None):
	"""
	Gets the diagnostics dataframe from a log and the results
	of log skeleton-based conformance checking

	Parameters
	--------------
	log
	Event log
	conf_result
	Results of conformance checking

	Returns
	--------------
	diagn_dataframe
	Diagnostics dataframe
	"""
	if parameters is None:
	parameters = {}

	case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, xes_constants.DEFAULT_TRACEID_KEY)

	import pandas as pd

	diagn_stream = []

	for index in range(len(log)):
	case_id = log[index].attributes[case_id_key]

	no_dev_total = conf_result[index][Outputs.NO_DEV_TOTAL.value]
	no_constr_total = conf_result[index][Outputs.NO_CONSTR_TOTAL.value]
	dev_fitness = conf_result[index][Outputs.DEV_FITNESS.value]

	diagn_stream.append({"case_id": case_id, "no_dev_total": no_dev_total, "no_constr_total": no_constr_total, "dev_fitness": dev_fitness})

	return pandas_utils.instantiate_dataframe(diagn_stream)