kota
initial commit
e60e568
raw
history blame
12.8 kB
'''
This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).
PM4Py is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
PM4Py is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
'''
from pm4py.objects.log.util import xes
from pm4py.algo.discovery.log_skeleton import trace_skel
from pm4py.util import xes_constants
from pm4py.util import variants_util, pandas_utils
from pm4py.util import exec_utils
from typing import Optional, Dict, Any, Union, List, Set
from pm4py.objects.log.obj import EventLog, Trace
import pandas as pd
from enum import Enum
from pm4py.util.constants import PARAMETER_CONSTANT_ACTIVITY_KEY, PARAMETER_CONSTANT_CASEID_KEY, CASE_CONCEPT_NAME
class Parameters(Enum):
# parameter for the noise threshold
NOISE_THRESHOLD = "noise_threshold"
# considered constraints in conformance checking among: equivalence, always_after, always_before, never_together, directly_follows, activ_freq
CONSIDERED_CONSTRAINTS = "considered_constraints"
# default choice for conformance checking
DEFAULT_CONSIDERED_CONSTRAINTS = ["equivalence", "always_after", "always_before", "never_together",
"directly_follows", "activ_freq"]
CASE_ID_KEY = PARAMETER_CONSTANT_CASEID_KEY
ACTIVITY_KEY = PARAMETER_CONSTANT_ACTIVITY_KEY
PARAMETER_VARIANT_DELIMITER = "variant_delimiter"
NOISE_THRESHOLD = Parameters.NOISE_THRESHOLD
CONSIDERED_CONSTRAINTS = Parameters.CONSIDERED_CONSTRAINTS
DEFAULT_CONSIDERED_CONSTRAINTS = Parameters.DEFAULT_CONSIDERED_CONSTRAINTS
ACTIVITY_KEY = Parameters.ACTIVITY_KEY
PARAMETER_VARIANT_DELIMITER = Parameters.PARAMETER_VARIANT_DELIMITER
class DiscoveryOutputs(Enum):
EQUIVALENCE = "equivalence"
ALWAYS_AFTER = "always_after"
ALWAYS_BEFORE = "always_before"
NEVER_TOGETHER = "never_together"
DIRECTLY_FOLLOWS = "directly_follows"
ACTIV_FREQ = "activ_freq"
class Outputs(Enum):
DEVIATIONS = "deviations"
NO_DEV_TOTAL = "no_dev_total"
NO_CONSTR_TOTAL = "no_constr_total"
DEV_FITNESS = "dev_fitness"
IS_FIT = "is_fit"
def apply_log(log: Union[EventLog, pd.DataFrame], model: Dict[str, Any], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> List[Set[Any]]:
"""
Apply log-skeleton based conformance checking given an event log
and a log-skeleton model
Parameters
--------------
log
Event log
model
Log-skeleton model
parameters
Parameters of the algorithm, including:
- Parameters.ACTIVITY_KEY
- Parameters.CONSIDERED_CONSTRAINTS, among: equivalence, always_after, always_before, never_together, directly_follows, activ_freq
Returns
--------------
aligned_traces
Conformance checking results for each trace:
- Outputs.IS_FIT => boolean that tells if the trace is perfectly fit according to the model
- Outputs.DEV_FITNESS => deviation based fitness (between 0 and 1; the more the trace is near to 1 the more fit is)
- Outputs.DEVIATIONS => list of deviations in the model
"""
if parameters is None:
parameters = {}
activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY)
if pandas_utils.check_is_pandas_dataframe(log):
case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, CASE_CONCEPT_NAME)
traces = [tuple(x) for x in log.groupby(case_id_key)[activity_key].agg(list).to_dict().values()]
else:
traces = [tuple(y[activity_key] for y in x) for x in log]
grouped_traces = {}
gtk = []
inv_idxs = {}
for i in range(len(traces)):
tr = traces[i]
if not tr in grouped_traces:
grouped_traces[tr] = []
gtk.append(tr)
grouped_traces[tr].append(i)
inv_idxs[i] = gtk.index(tr)
res0 = []
for trace in grouped_traces:
res0.append(apply_actlist(trace, model, parameters=parameters))
res = []
for i in range(len(traces)):
res.append(res0[inv_idxs[i]])
return res
def apply_trace(trace: Trace, model: Dict[str, Any], parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> List[Set[Any]]:
"""
Apply log-skeleton based conformance checking given a trace
and a log-skeleton model
Parameters
--------------
trace
Trace
model
Log-skeleton model
parameters
Parameters of the algorithm, including:
- the activity key (pm4py:param:activity_key)
- the list of considered constraints (considered_constraints) among: equivalence, always_after, always_before, never_together, directly_follows, activ_freq
Returns
--------------
aligned_trace
Containing:
- is_fit => boolean that tells if the trace is perfectly fit according to the model
- dev_fitness => deviation based fitness (between 0 and 1; the more the trace is near to 1 the more fit is)
- deviations => list of deviations in the model
"""
if parameters is None:
parameters = {}
activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes.DEFAULT_NAME_KEY)
trace = [x[activity_key] for x in trace]
return apply_actlist(trace, model, parameters=parameters)
def apply_actlist(trace, model, parameters=None):
"""
Apply log-skeleton based conformance checking given the list of activities of a trace
and a log-skeleton model
Parameters
--------------
trace
List of activities of a trace
model
Log-skeleton model
parameters
Parameters of the algorithm, including:
- the activity key (pm4py:param:activity_key)
- the list of considered constraints (considered_constraints) among: equivalence, always_after, always_before, never_together, directly_follows, activ_freq
Returns
--------------
aligned_trace
Containing:
- is_fit => boolean that tells if the trace is perfectly fit according to the model
- dev_fitness => deviation based fitness (between 0 and 1; the more the trace is near to 1 the more fit is)
- deviations => list of deviations in the model
"""
if parameters is None:
parameters = {}
consid_constraints = exec_utils.get_param_value(Parameters.CONSIDERED_CONSTRAINTS, parameters, Parameters.DEFAULT_CONSIDERED_CONSTRAINTS.value)
trace_info = trace_skel.get_trace_info(trace)
ret = {}
ret[Outputs.DEVIATIONS.value] = []
dev_total = 0
conf_total = 0
default_considered_constraints = Parameters.DEFAULT_CONSIDERED_CONSTRAINTS.value
i = 0
while i < len(default_considered_constraints):
if default_considered_constraints[i] in consid_constraints:
if default_considered_constraints[i] == DiscoveryOutputs.ACTIV_FREQ.value:
this_constraints = {x: y for x, y in model[default_considered_constraints[i]].items()}
conf_total += len(list(act for act in trace_info[i] if act in this_constraints)) + len(list(act for act in trace_info[i] if act not in this_constraints)) + len(list(act for act in this_constraints if min(this_constraints[act]) > 0 and not act in trace))
for act in trace_info[i]:
if act in this_constraints:
if trace_info[i][act] not in this_constraints[act]:
dev_total += 1
ret[Outputs.DEVIATIONS.value].append((default_considered_constraints[i], (act, trace_info[i][act])))
else:
dev_total += 1
ret[Outputs.DEVIATIONS.value].append((default_considered_constraints[i], (act, 0)))
for act in this_constraints:
if min(this_constraints[act]) > 0 and not act in trace:
dev_total += 1
ret[Outputs.DEVIATIONS.value].append((default_considered_constraints[i], (act, 0)))
elif default_considered_constraints[i] == DiscoveryOutputs.NEVER_TOGETHER.value:
this_constraints = {x for x in model[default_considered_constraints[i]] if x[0] in trace}
conf_total += len(this_constraints)
setinte = this_constraints.intersection(trace_info[i])
dev_total += len(setinte)
if len(setinte) > 0:
ret[Outputs.DEVIATIONS.value].append((default_considered_constraints[i], tuple(setinte)))
else:
this_constraints = {x for x in model[default_considered_constraints[i]] if x[0] in trace}
conf_total += len(this_constraints)
setdiff = this_constraints.difference(trace_info[i])
dev_total += len(setdiff)
if len(setdiff) > 0:
ret[Outputs.DEVIATIONS.value].append((default_considered_constraints[i], tuple(setdiff)))
i = i + 1
ret[Outputs.NO_DEV_TOTAL.value] = dev_total
ret[Outputs.NO_CONSTR_TOTAL.value] = conf_total
ret[Outputs.DEV_FITNESS.value] = 1.0 - float(dev_total)/float(conf_total) if conf_total > 0 else 1.0
ret[Outputs.DEVIATIONS.value] = sorted(ret[Outputs.DEVIATIONS.value], key=lambda x: (x[0], x[1]))
ret[Outputs.IS_FIT.value] = len(ret[Outputs.DEVIATIONS.value]) == 0
return ret
def apply_from_variants_list(var_list, model, parameters=None):
"""
Performs conformance checking using the log skeleton,
applying it from a list of variants
Parameters
--------------
var_list
List of variants
model
Log skeleton model
parameters
Parameters
Returns
--------------
conformance_dictio
Dictionary containing, for each variant, the result
of log skeleton checking
"""
if parameters is None:
parameters = {}
conformance_output = {}
for cv in var_list:
v = cv[0]
trace = variants_util.variant_to_trace(v, parameters=parameters)
conformance_output[v] = apply_trace(trace, model, parameters=parameters)
return conformance_output
def after_decode(log_skeleton):
"""
Prepares the log skeleton after decoding
Parameters
--------------
log_skeleton
Log skeleton
Returns
--------------
log_skeleton
Log skeleton (with sets instead of lists)
"""
log_skeleton[DiscoveryOutputs.EQUIVALENCE.value] = set(log_skeleton[DiscoveryOutputs.EQUIVALENCE.value])
log_skeleton[DiscoveryOutputs.ALWAYS_AFTER.value] = set(log_skeleton[DiscoveryOutputs.ALWAYS_AFTER.value])
log_skeleton[DiscoveryOutputs.ALWAYS_BEFORE.value] = set(log_skeleton[DiscoveryOutputs.ALWAYS_BEFORE.value])
log_skeleton[DiscoveryOutputs.NEVER_TOGETHER.value] = set(log_skeleton[DiscoveryOutputs.NEVER_TOGETHER.value])
log_skeleton[DiscoveryOutputs.DIRECTLY_FOLLOWS.value] = set(log_skeleton[DiscoveryOutputs.DIRECTLY_FOLLOWS.value])
for act in log_skeleton[DiscoveryOutputs.ACTIV_FREQ.value]:
log_skeleton[DiscoveryOutputs.ACTIV_FREQ.value][act] = set(log_skeleton[DiscoveryOutputs.ACTIV_FREQ.value][act])
return log_skeleton
def get_diagnostics_dataframe(log, conf_result, parameters=None):
"""
Gets the diagnostics dataframe from a log and the results
of log skeleton-based conformance checking
Parameters
--------------
log
Event log
conf_result
Results of conformance checking
Returns
--------------
diagn_dataframe
Diagnostics dataframe
"""
if parameters is None:
parameters = {}
case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, xes_constants.DEFAULT_TRACEID_KEY)
import pandas as pd
diagn_stream = []
for index in range(len(log)):
case_id = log[index].attributes[case_id_key]
no_dev_total = conf_result[index][Outputs.NO_DEV_TOTAL.value]
no_constr_total = conf_result[index][Outputs.NO_CONSTR_TOTAL.value]
dev_fitness = conf_result[index][Outputs.DEV_FITNESS.value]
diagn_stream.append({"case_id": case_id, "no_dev_total": no_dev_total, "no_constr_total": no_constr_total, "dev_fitness": dev_fitness})
return pandas_utils.instantiate_dataframe(diagn_stream)