''' This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de). PM4Py is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. PM4Py is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with PM4Py. If not, see . ''' from pm4py.objects.log.util import xes from pm4py.algo.discovery.log_skeleton import trace_skel from pm4py.util import xes_constants from pm4py.util import variants_util, pandas_utils from pm4py.util import exec_utils from typing import Optional, Dict, Any, Union, List, Set from pm4py.objects.log.obj import EventLog, Trace import pandas as pd from enum import Enum from pm4py.util.constants import PARAMETER_CONSTANT_ACTIVITY_KEY, PARAMETER_CONSTANT_CASEID_KEY, CASE_CONCEPT_NAME class Parameters(Enum): # parameter for the noise threshold NOISE_THRESHOLD = "noise_threshold" # considered constraints in conformance checking among: equivalence, always_after, always_before, never_together, directly_follows, activ_freq CONSIDERED_CONSTRAINTS = "considered_constraints" # default choice for conformance checking DEFAULT_CONSIDERED_CONSTRAINTS = ["equivalence", "always_after", "always_before", "never_together", "directly_follows", "activ_freq"] CASE_ID_KEY = PARAMETER_CONSTANT_CASEID_KEY ACTIVITY_KEY = PARAMETER_CONSTANT_ACTIVITY_KEY PARAMETER_VARIANT_DELIMITER = "variant_delimiter" NOISE_THRESHOLD = Parameters.NOISE_THRESHOLD CONSIDERED_CONSTRAINTS = Parameters.CONSIDERED_CONSTRAINTS DEFAULT_CONSIDERED_CONSTRAINTS = Parameters.DEFAULT_CONSIDERED_CONSTRAINTS ACTIVITY_KEY = Parameters.ACTIVITY_KEY PARAMETER_VARIANT_DELIMITER = Parameters.PARAMETER_VARIANT_DELIMITER class DiscoveryOutputs(Enum): EQUIVALENCE = "equivalence" ALWAYS_AFTER = "always_after" ALWAYS_BEFORE = "always_before" NEVER_TOGETHER = "never_together" DIRECTLY_FOLLOWS = "directly_follows" ACTIV_FREQ = "activ_freq" class Outputs(Enum): DEVIATIONS = "deviations" NO_DEV_TOTAL = "no_dev_total" NO_CONSTR_TOTAL = "no_constr_total" DEV_FITNESS = "dev_fitness" IS_FIT = "is_fit" def apply_log(log: Union[EventLog, pd.DataFrame], model: Dict[str, Any], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> List[Set[Any]]: """ Apply log-skeleton based conformance checking given an event log and a log-skeleton model Parameters -------------- log Event log model Log-skeleton model parameters Parameters of the algorithm, including: - Parameters.ACTIVITY_KEY - Parameters.CONSIDERED_CONSTRAINTS, among: equivalence, always_after, always_before, never_together, directly_follows, activ_freq Returns -------------- aligned_traces Conformance checking results for each trace: - Outputs.IS_FIT => boolean that tells if the trace is perfectly fit according to the model - Outputs.DEV_FITNESS => deviation based fitness (between 0 and 1; the more the trace is near to 1 the more fit is) - Outputs.DEVIATIONS => list of deviations in the model """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) if pandas_utils.check_is_pandas_dataframe(log): case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, CASE_CONCEPT_NAME) traces = [tuple(x) for x in log.groupby(case_id_key)[activity_key].agg(list).to_dict().values()] else: traces = [tuple(y[activity_key] for y in x) for x in log] grouped_traces = {} gtk = [] inv_idxs = {} for i in range(len(traces)): tr = traces[i] if not tr in grouped_traces: grouped_traces[tr] = [] gtk.append(tr) grouped_traces[tr].append(i) inv_idxs[i] = gtk.index(tr) res0 = [] for trace in grouped_traces: res0.append(apply_actlist(trace, model, parameters=parameters)) res = [] for i in range(len(traces)): res.append(res0[inv_idxs[i]]) return res def apply_trace(trace: Trace, model: Dict[str, Any], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> List[Set[Any]]: """ Apply log-skeleton based conformance checking given a trace and a log-skeleton model Parameters -------------- trace Trace model Log-skeleton model parameters Parameters of the algorithm, including: - the activity key (pm4py:param:activity_key) - the list of considered constraints (considered_constraints) among: equivalence, always_after, always_before, never_together, directly_follows, activ_freq Returns -------------- aligned_trace Containing: - is_fit => boolean that tells if the trace is perfectly fit according to the model - dev_fitness => deviation based fitness (between 0 and 1; the more the trace is near to 1 the more fit is) - deviations => list of deviations in the model """ if parameters is None: parameters = {} activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY) trace = [x[activity_key] for x in trace] return apply_actlist(trace, model, parameters=parameters) def apply_actlist(trace, model, parameters=None): """ Apply log-skeleton based conformance checking given the list of activities of a trace and a log-skeleton model Parameters -------------- trace List of activities of a trace model Log-skeleton model parameters Parameters of the algorithm, including: - the activity key (pm4py:param:activity_key) - the list of considered constraints (considered_constraints) among: equivalence, always_after, always_before, never_together, directly_follows, activ_freq Returns -------------- aligned_trace Containing: - is_fit => boolean that tells if the trace is perfectly fit according to the model - dev_fitness => deviation based fitness (between 0 and 1; the more the trace is near to 1 the more fit is) - deviations => list of deviations in the model """ if parameters is None: parameters = {} consid_constraints = exec_utils.get_param_value(Parameters.CONSIDERED_CONSTRAINTS, parameters, Parameters.DEFAULT_CONSIDERED_CONSTRAINTS.value) trace_info = trace_skel.get_trace_info(trace) ret = {} ret[Outputs.DEVIATIONS.value] = [] dev_total = 0 conf_total = 0 default_considered_constraints = Parameters.DEFAULT_CONSIDERED_CONSTRAINTS.value i = 0 while i < len(default_considered_constraints): if default_considered_constraints[i] in consid_constraints: if default_considered_constraints[i] == DiscoveryOutputs.ACTIV_FREQ.value: this_constraints = {x: y for x, y in model[default_considered_constraints[i]].items()} conf_total += len(list(act for act in trace_info[i] if act in this_constraints)) + len(list(act for act in trace_info[i] if act not in this_constraints)) + len(list(act for act in this_constraints if min(this_constraints[act]) > 0 and not act in trace)) for act in trace_info[i]: if act in this_constraints: if trace_info[i][act] not in this_constraints[act]: dev_total += 1 ret[Outputs.DEVIATIONS.value].append((default_considered_constraints[i], (act, trace_info[i][act]))) else: dev_total += 1 ret[Outputs.DEVIATIONS.value].append((default_considered_constraints[i], (act, 0))) for act in this_constraints: if min(this_constraints[act]) > 0 and not act in trace: dev_total += 1 ret[Outputs.DEVIATIONS.value].append((default_considered_constraints[i], (act, 0))) elif default_considered_constraints[i] == DiscoveryOutputs.NEVER_TOGETHER.value: this_constraints = {x for x in model[default_considered_constraints[i]] if x[0] in trace} conf_total += len(this_constraints) setinte = this_constraints.intersection(trace_info[i]) dev_total += len(setinte) if len(setinte) > 0: ret[Outputs.DEVIATIONS.value].append((default_considered_constraints[i], tuple(setinte))) else: this_constraints = {x for x in model[default_considered_constraints[i]] if x[0] in trace} conf_total += len(this_constraints) setdiff = this_constraints.difference(trace_info[i]) dev_total += len(setdiff) if len(setdiff) > 0: ret[Outputs.DEVIATIONS.value].append((default_considered_constraints[i], tuple(setdiff))) i = i + 1 ret[Outputs.NO_DEV_TOTAL.value] = dev_total ret[Outputs.NO_CONSTR_TOTAL.value] = conf_total ret[Outputs.DEV_FITNESS.value] = 1.0 - float(dev_total)/float(conf_total) if conf_total > 0 else 1.0 ret[Outputs.DEVIATIONS.value] = sorted(ret[Outputs.DEVIATIONS.value], key=lambda x: (x[0], x[1])) ret[Outputs.IS_FIT.value] = len(ret[Outputs.DEVIATIONS.value]) == 0 return ret def apply_from_variants_list(var_list, model, parameters=None): """ Performs conformance checking using the log skeleton, applying it from a list of variants Parameters -------------- var_list List of variants model Log skeleton model parameters Parameters Returns -------------- conformance_dictio Dictionary containing, for each variant, the result of log skeleton checking """ if parameters is None: parameters = {} conformance_output = {} for cv in var_list: v = cv[0] trace = variants_util.variant_to_trace(v, parameters=parameters) conformance_output[v] = apply_trace(trace, model, parameters=parameters) return conformance_output def after_decode(log_skeleton): """ Prepares the log skeleton after decoding Parameters -------------- log_skeleton Log skeleton Returns -------------- log_skeleton Log skeleton (with sets instead of lists) """ log_skeleton[DiscoveryOutputs.EQUIVALENCE.value] = set(log_skeleton[DiscoveryOutputs.EQUIVALENCE.value]) log_skeleton[DiscoveryOutputs.ALWAYS_AFTER.value] = set(log_skeleton[DiscoveryOutputs.ALWAYS_AFTER.value]) log_skeleton[DiscoveryOutputs.ALWAYS_BEFORE.value] = set(log_skeleton[DiscoveryOutputs.ALWAYS_BEFORE.value]) log_skeleton[DiscoveryOutputs.NEVER_TOGETHER.value] = set(log_skeleton[DiscoveryOutputs.NEVER_TOGETHER.value]) log_skeleton[DiscoveryOutputs.DIRECTLY_FOLLOWS.value] = set(log_skeleton[DiscoveryOutputs.DIRECTLY_FOLLOWS.value]) for act in log_skeleton[DiscoveryOutputs.ACTIV_FREQ.value]: log_skeleton[DiscoveryOutputs.ACTIV_FREQ.value][act] = set(log_skeleton[DiscoveryOutputs.ACTIV_FREQ.value][act]) return log_skeleton def get_diagnostics_dataframe(log, conf_result, parameters=None): """ Gets the diagnostics dataframe from a log and the results of log skeleton-based conformance checking Parameters -------------- log Event log conf_result Results of conformance checking Returns -------------- diagn_dataframe Diagnostics dataframe """ if parameters is None: parameters = {} case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, xes_constants.DEFAULT_TRACEID_KEY) import pandas as pd diagn_stream = [] for index in range(len(log)): case_id = log[index].attributes[case_id_key] no_dev_total = conf_result[index][Outputs.NO_DEV_TOTAL.value] no_constr_total = conf_result[index][Outputs.NO_CONSTR_TOTAL.value] dev_fitness = conf_result[index][Outputs.DEV_FITNESS.value] diagn_stream.append({"case_id": case_id, "no_dev_total": no_dev_total, "no_constr_total": no_constr_total, "dev_fitness": dev_fitness}) return pandas_utils.instantiate_dataframe(diagn_stream)