kota
initial commit
e60e568
raw
history blame
23 kB
'''
This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).
PM4Py is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
PM4Py is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
'''
import sys
from copy import deepcopy, copy
from enum import Enum
from typing import Optional, Dict, Any, Union, Tuple
import numpy as np
from pm4py.algo.conformance.alignments.petri_net import algorithm as ali
from pm4py.algo.conformance.alignments.petri_net.variants import state_equation_a_star as star
from pm4py.algo.conformance.tokenreplay import algorithm as token_replay
from pm4py.objects.conversion.log import converter as log_converter
from pm4py.objects.log.obj import EventLog
from pm4py.objects.petri_net import properties as petri_properties
from pm4py.objects.petri_net.obj import PetriNet, Marking
from pm4py.statistics.attributes.log.select import select_attributes_from_log_for_tree
from pm4py.statistics.variants.log import get as variants_module
from pm4py.util import constants, xes_constants
from pm4py.util import exec_utils, pandas_utils
from pm4py.visualization.decisiontree.util import dt_to_string
import pandas as pd
class Parameters(Enum):
ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
LABELS = "labels"
def create_data_petri_nets_with_decisions(log: Union[EventLog, pd.DataFrame], net: PetriNet, initial_marking: Marking,
final_marking: Marking) -> Tuple[PetriNet, Marking, Marking]:
"""
Given a Petri net, create a data Petri net with the decisions given for each place by the decision
mining algorithm
Parameters
----------------
log
Event log
net
Petri net
initial_marking
Initial marking
final_marking
Final marking
Returns
------------------
data_petri_net
Data petri net
initial_marking
Initial marking (unchanged)
final_marking
Final marking (unchanged)
"""
all_conditions = {}
all_variables = {}
for place in net.places:
try:
clf, columns, targets = get_decision_tree(log, net, initial_marking, final_marking,
decision_point=place.name,
parameters={"labels": False})
target_classes, variables = dt_to_string.apply(clf, columns)
target_classes = {targets[int(k)]: v for k, v in target_classes.items()}
variables = {targets[int(k)]: v for k, v in variables.items()}
for k in target_classes.keys():
all_conditions[k] = target_classes[k]
all_variables[k] = variables[k]
except:
pass
for trans in net.transitions:
if trans.name in all_conditions:
trans.properties[petri_properties.TRANS_GUARD] = all_conditions[trans.name]
trans.properties[petri_properties.READ_VARIABLE] = all_variables[trans.name]
trans.properties[petri_properties.WRITE_VARIABLE] = []
return net, initial_marking, final_marking
def get_decision_tree(log: Union[EventLog, pd.DataFrame], net: PetriNet, initial_marking: Marking, final_marking: Marking,
decision_point=None, attributes=None,
parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> Any:
"""
Gets a decision tree classifier on a specific point of the model
Parameters
--------------
log
Event log
net
Petri net
initial_marking
Initial marking
final_marking
Final marking
decision_point
Point of the process in which a decision happens:
- if not specified, the method crashes, but provides a list of possible decision points
- if specified, the method goes on and produce the decision tree
attributes
Attributes of the log. If not specified, then an automatic attribute selection
is performed.
parameters
Parameters of the algorithm
Returns
---------------
clf
Decision tree
feature_names
The names of the features
classes
The classes
"""
from pm4py.util import ml_utils
if parameters is None:
parameters = {}
X, y, targets = apply(log, net, initial_marking, final_marking, decision_point=decision_point,
attributes=attributes, parameters=parameters)
dt = ml_utils.DecisionTreeClassifier()
dt = dt.fit(X, y)
return dt, list(X.columns.values.tolist()), targets
def apply(log: Union[EventLog, pd.DataFrame], net: PetriNet, initial_marking: Marking, final_marking: Marking, decision_point=None,
attributes=None, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> Any:
"""
Gets the essential information (features, target class and names of the target class)
in order to learn a classifier
Parameters
--------------
log
Event log
net
Petri net
initial_marking
Initial marking
final_marking
Final marking
decision_point
Point of the process in which a decision happens:
- if not specified, the method crashes, but provides a list of possible decision points
- if specified, the method goes on and produce the decision tree
attributes
Attributes of the log. If not specified, then an automatic attribute selection
is performed.
parameters
Parameters of the algorithm
Returns
---------------
X
features
y
Target class
class_name
Target class names
"""
import pandas as pd
if parameters is None:
parameters = {}
labels = exec_utils.get_param_value(Parameters.LABELS, parameters, True)
activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
if decision_point is None:
decision_points_names = get_decision_points(net, labels=labels, parameters=parameters)
raise Exception("please provide decision_point as argument of the method. Possible decision points: ",
decision_points_names)
if attributes is None:
str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr = select_attributes_from_log_for_tree(log)
attributes = list(str_ev_attr) + list(num_ev_attr)
I, dp = get_decisions_table(log, net, initial_marking, final_marking, attributes=attributes,
pre_decision_points=[decision_point], parameters=parameters)
x_attributes = [a for a in attributes if not a == activity_key]
str_attributes = set()
non_str_attributes = set()
x = []
x2 = []
y = []
for el in I[decision_point]:
for a, v in el[0].items():
if a in x_attributes:
if type(v) is str:
str_attributes.add(a)
else:
non_str_attributes.add(a)
x.append({a: v for a, v in el[0].items() if a in x_attributes and type(v) is str})
x2.append({a: v for a, v in el[0].items() if a in x_attributes and type(v) is not str})
y.append(el[1])
X = pandas_utils.instantiate_dataframe(x)
X = pd.get_dummies(data=X, columns=list(str_attributes))
X2 = pandas_utils.instantiate_dataframe(x2)
X = pandas_utils.concat([X, X2], axis=1)
Y = pandas_utils.instantiate_dataframe(y, columns=["Name"])
Y, targets = encode_target(Y, "Name")
y = Y['Target']
return X, y, targets
def get_decisions_table(log0, net, initial_marking, final_marking, attributes=None, use_trace_attributes=False, k=1,
pre_decision_points=None, trace_attributes=None, parameters=None):
"""
Gets a decision table out of a log and an accepting Petri net
Parameters
-----------------
log0
Event log
net
Petri net
initial_marking
Initial marking
final_marking
Final marking
attributes
List of attributes which are considered
(if not provided, all the attributes are considered)
use_trace_attributes
Include trace attributes in the decision table
k
Number that determines the number of last activities to take into account
pre_decision_points
List of Strings of place Names that have to be considered as decision points.
If not provided, the decision points are inferred from the Petri net
trace_attributes
List of trace attributes to consider
parameters
Possible parameters of the algorithm
Returns
--------------
I
decision table
decision_points
The decision points as places of the Petri net, which are the keys of a dictionary
having as values the list of transitions that are target
"""
if parameters is None:
parameters = {}
labels = exec_utils.get_param_value(Parameters.LABELS, parameters, True)
log = deepcopy(log0)
log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)
if pre_decision_points != None:
if not isinstance(pre_decision_points, list):
print(
"Error: The parameter pre_decision_points has to be a list of names of the places that have to be considered.")
sys.exit()
if len(pre_decision_points) == 0:
print("Error: There must be at least one element in the list of pre_decision_points.")
sys.exit()
if attributes != None:
if not isinstance(attributes, list):
print(
"Error: The parameter attributes has to be a list of names of event attributes that have to be considered.")
sys.exit()
if len(attributes) == 0:
print("Error: There must be at least one element in the list of attributes.")
sys.exit()
if use_trace_attributes == False and trace_attributes != None and isinstance(trace_attributes, list):
print(
"Note: Since a list of considerable trace attributes is provided, and use_trace_attributes was set on False, we set it on True")
use_trace_attributes = True
if trace_attributes != None:
if not isinstance(trace_attributes, list):
print(
"Error: The parameter trace_attributes has to be a list of names of trace attributes that have to be considered.")
sys.exit()
if len(trace_attributes) == 0:
print("Error: There must be at least one element in the list of trace_attributes.")
sys.exit()
# alignment = ali.apply(log, net, initial_marking, final_marking, variant=True, parameters={star.PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE:True})
decision_points = get_decision_points(net, pre_decision_points=pre_decision_points, parameters=parameters)
decision_points_names = get_decision_points(net, labels=labels, pre_decision_points=pre_decision_points,
parameters=parameters)
if use_trace_attributes:
# Made to ensure distinguishness between event and trace attributes.
log = prepare_event_log(log)
if attributes != None:
attributes = prepare_attributes(attributes)
if use_trace_attributes and trace_attributes == None:
# if no list of trace attributes is provided, we create one
trace_attributes = []
if use_trace_attributes:
for trace in log:
trace_attributes += list(trace.attributes)
trace_attributes = list(set(trace_attributes))
if attributes == None:
# if no list is given, every attribute of the events are considered
attributes = []
for trace in log:
for event in trace:
attributes += list(event.keys())
attributes = list(set(attributes))
I = get_attributes(log, decision_points,
attributes, use_trace_attributes, trace_attributes,
k, net, initial_marking, final_marking, decision_points_names, parameters=parameters)
return (I, decision_points)
def prepare_event_log(log):
"""
If trace attributes are considered, it is possible that trace attributes have the same name as event attributes.
To tackle this issue, the attributes get renamed.
For trace attributes, we add "t_" at the beginning of the dictionary keys.
For event attributes, we add "e_" at the beginning of the dict keys.
:param log:
:return:
"""
for trace in log:
attributes = trace.attributes.copy()
for attribute in attributes:
trace.attributes["t_" + attribute] = trace.attributes.pop(attribute)
for event in trace:
attributes = event._dict.copy()
for attribute in attributes:
event._dict["e_" + attribute] = event._dict.pop(attribute)
return log
def prepare_attributes(attributes):
"""
Method that "e_" in front of every attribute if trace attributes are considered.
:param attributes: List of event attributes that the user wants to consider.
:return: list of edited attribute names
"""
new_attributes = []
for attribute in attributes:
new_attributes.append("e_" + attribute)
return new_attributes
def get_decision_points(net, labels=False, pre_decision_points=None, parameters=None):
"""
The goal is to get all decision places. These are places where there are at least two outgoing arcs.
:param net: Petri Net where decision points are discovered (places with at least two outgoing arcs)
:param labels: If someone wants to get the labels of the transitions after a decision point and not the "ID"
:return:
"""
if parameters is None:
parameters = {}
counter = {}
for place in net.places:
counter[place.name] = []
for arc in net.arcs:
if arc.source in net.places:
if labels == True:
counter[arc.source.name].append(arc.target.label)
else:
counter[arc.source.name].append(arc.target.name)
decision_points = {key: val for key, val in counter.items() if len(val) >= 2}
i = 0
# i counts how many given decision points of the user are detected
if pre_decision_points != None:
for el in list(decision_points):
if el in pre_decision_points:
i += 1
else:
del decision_points[el]
if i == len(pre_decision_points):
# print("All given decision points were identified as decision points in the Petri Net.")
pass
elif i == 0:
raise Exception("None of the given points is a decision point.")
# sys.exit()
else:
print(
"Not all of the given places were identified as decision points. However, we only take the correct decision points from your list into account.")
return decision_points
def simplify_token_replay(replay):
variant = {}
for element in replay:
if tuple(element['activated_transitions']) not in variant:
variant[tuple(element['activated_transitions'])] = True
smaller_replay = []
for element in replay:
if variant[tuple(element['activated_transitions'])]:
smaller_replay.append(element)
variant[tuple(element['activated_transitions'])] = False
return smaller_replay
def get_attributes(log, decision_points, attributes, use_trace_attributes, trace_attributes, k, net, initial_marking,
final_marking, decision_points_names, parameters=None):
"""
This method aims to construct for each decision place a table where for each decision place a list if given with the
label of the later decision and as value the given attributes
:param log: Log on which the method is applied
:param alignments: Computed alignments for a log and a model
:param decision_points: Places that have multiple outgoing arcs
:param attributes: Attributes that are considered
:param use_trace_attributes: If trace attributes have to be considered or not
:param trace_attributes: List of trace attributes that are considered
:param k: Taking k last activities into account
:return: Dictionary that has as keys the decision places. The value for this key is a list.
The content of these lists are tuples. The first element of these tuples is information regrading the attributes,
the second element of these tuples is the transition which chosen in a decision.
"""
if parameters is None:
parameters = {}
labels = exec_utils.get_param_value(Parameters.LABELS, parameters, True)
I = {}
for key in decision_points:
I[key] = []
A = {}
for attri in attributes:
A[attri] = None
i = 0
# first, take a look at the variants
variants_idxs = variants_module.get_variants_from_log_trace_idx(log, parameters=parameters)
one_variant = []
for variant in variants_idxs:
one_variant.append(variant)
# TODO: Token based replay code mit paramter für nur varianten einbeziehen ausstatten
replay_result = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters)
replay_result = simplify_token_replay(replay_result)
count = 0
for variant in replay_result:
if variant['trace_fitness'] == 1.0:
for trace_index in variants_idxs[one_variant[count]]:
last_k_list = [None] * k
trace = log[trace_index]
if use_trace_attributes:
for attribute in trace_attributes:
# can be done here since trace attributes does not change for whole trace
A[attribute] = trace.attributes[attribute]
j = 0
# j is a pointer which points to the current event inside a trace
for transition in variant['activated_transitions']:
for key, value in decision_points_names.items():
tr_to_str = transition.label if labels else transition.name
if tr_to_str in value:
for element in last_k_list:
if element != None:
I[key].append((element.copy(), tr_to_str))
for attri in attributes:
# print(variant, transition.label, j)
if attri in trace[j]:
# only add the attribute information if it is present in the event
A[attri] = trace[j][attri]
# add A to last_k_list. Using modulo to access correct entry
last_k_list[j % k] = A.copy()
if transition.label != None:
if not j + 1 >= len(trace):
# Problem otherwise: If there are tau-transition after the last event related transition,
# the pointer j which points to the current event in a trace, gets out of range
j += 1
else:
example_trace = log[variants_idxs[one_variant[count]][0]]
align_parameters = copy(parameters)
align_parameters[star.Parameters.PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE] = True
alignment = ali.apply(example_trace, net, initial_marking, final_marking,
parameters=align_parameters)['alignment']
for trace_index in variants_idxs[one_variant[count]]:
last_k_list = [None] * k
trace = log[trace_index]
if use_trace_attributes:
for attribute in trace_attributes:
# can be done here since trace attributes does not change for whole trace
A[attribute] = trace.attributes[attribute]
j = 0
for el in alignment:
if el[1][1] != '>>':
# If move in model
for key, value in decision_points.items():
if el[0][1] in value:
for element in last_k_list:
if element != None:
# only add those entries where information is provided
if el[1][1] == None:
# for some dt algorithms, the entry None might be a problem, since it is left out later
I[key].append((element.copy(), el[0][1]))
else:
I[key].append((element.copy(), el[1][1]))
if el[1][0] != '>>' and el[1][1] != '>>':
# If there is a move in log and model
for attri in attributes:
if attri in trace[j]:
# only add the attribute information if it is present in the event
A[attri] = trace[j][attri]
# add A to last_k_list. Using modulo to access correct entry
last_k_list[j % k] = A.copy()
if el[1][0] != '>>':
# only go to next event in trace if the current event has been aligned
# TODO: Discuss if this is correct or can lead to problems
j += 1
count += 1
return I
def encode_target(df, target_column):
"""Add column to df with integers for the target.
Method taken from: http://chrisstrelioff.ws/sandbox/2015/06/08/decision_trees_in_python_with_scikit_learn_and_pandas.html
Args
----
df -- pandas DataFrame.
target_column -- column to map to int, producing
new Target column.
Returns
-------
df_mod -- modified DataFrame.
targets -- list of target names.
"""
df_mod = df.copy()
targets = pandas_utils.format_unique(df_mod[target_column].unique())
map_to_int = {name: n for n, name in enumerate(targets)}
df_mod["Target"] = df_mod[target_column].replace(map_to_int)
return (df_mod, targets)