''' This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de). PM4Py is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. PM4Py is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with PM4Py. If not, see . ''' import sys from copy import deepcopy, copy from enum import Enum from typing import Optional, Dict, Any, Union, Tuple import numpy as np from pm4py.algo.conformance.alignments.petri_net import algorithm as ali from pm4py.algo.conformance.alignments.petri_net.variants import state_equation_a_star as star from pm4py.algo.conformance.tokenreplay import algorithm as token_replay from pm4py.objects.conversion.log import converter as log_converter from pm4py.objects.log.obj import EventLog from pm4py.objects.petri_net import properties as petri_properties from pm4py.objects.petri_net.obj import PetriNet, Marking from pm4py.statistics.attributes.log.select import select_attributes_from_log_for_tree from pm4py.statistics.variants.log import get as variants_module from pm4py.util import constants, xes_constants from pm4py.util import exec_utils, pandas_utils from pm4py.visualization.decisiontree.util import dt_to_string import pandas as pd class Parameters(Enum): ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY LABELS = "labels" def create_data_petri_nets_with_decisions(log: Union[EventLog, pd.DataFrame], net: PetriNet, initial_marking: Marking, final_marking: Marking) -> Tuple[PetriNet, Marking, Marking]: """ Given a Petri net, create a data Petri net with the decisions given for each place by the decision mining algorithm Parameters ---------------- log Event log net Petri net initial_marking Initial marking final_marking Final marking Returns ------------------ data_petri_net Data petri net initial_marking Initial marking (unchanged) final_marking Final marking (unchanged) """ all_conditions = {} all_variables = {} for place in net.places: try: clf, columns, targets = get_decision_tree(log, net, initial_marking, final_marking, decision_point=place.name, parameters={"labels": False}) target_classes, variables = dt_to_string.apply(clf, columns) target_classes = {targets[int(k)]: v for k, v in target_classes.items()} variables = {targets[int(k)]: v for k, v in variables.items()} for k in target_classes.keys(): all_conditions[k] = target_classes[k] all_variables[k] = variables[k] except: pass for trans in net.transitions: if trans.name in all_conditions: trans.properties[petri_properties.TRANS_GUARD] = all_conditions[trans.name] trans.properties[petri_properties.READ_VARIABLE] = all_variables[trans.name] trans.properties[petri_properties.WRITE_VARIABLE] = [] return net, initial_marking, final_marking def get_decision_tree(log: Union[EventLog, pd.DataFrame], net: PetriNet, initial_marking: Marking, final_marking: Marking, decision_point=None, attributes=None, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> Any: """ Gets a decision tree classifier on a specific point of the model Parameters -------------- log Event log net Petri net initial_marking Initial marking final_marking Final marking decision_point Point of the process in which a decision happens: - if not specified, the method crashes, but provides a list of possible decision points - if specified, the method goes on and produce the decision tree attributes Attributes of the log. If not specified, then an automatic attribute selection is performed. parameters Parameters of the algorithm Returns --------------- clf Decision tree feature_names The names of the features classes The classes """ from pm4py.util import ml_utils if parameters is None: parameters = {} X, y, targets = apply(log, net, initial_marking, final_marking, decision_point=decision_point, attributes=attributes, parameters=parameters) dt = ml_utils.DecisionTreeClassifier() dt = dt.fit(X, y) return dt, list(X.columns.values.tolist()), targets def apply(log: Union[EventLog, pd.DataFrame], net: PetriNet, initial_marking: Marking, final_marking: Marking, decision_point=None, attributes=None, parameters: Optional[Dict[Union[str, Parameters], Any]] = None) -> Any: """ Gets the essential information (features, target class and names of the target class) in order to learn a classifier Parameters -------------- log Event log net Petri net initial_marking Initial marking final_marking Final marking decision_point Point of the process in which a decision happens: - if not specified, the method crashes, but provides a list of possible decision points - if specified, the method goes on and produce the decision tree attributes Attributes of the log. If not specified, then an automatic attribute selection is performed. parameters Parameters of the algorithm Returns --------------- X features y Target class class_name Target class names """ import pandas as pd if parameters is None: parameters = {} labels = exec_utils.get_param_value(Parameters.LABELS, parameters, True) activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY) if decision_point is None: decision_points_names = get_decision_points(net, labels=labels, parameters=parameters) raise Exception("please provide decision_point as argument of the method. Possible decision points: ", decision_points_names) if attributes is None: str_tr_attr, str_ev_attr, num_tr_attr, num_ev_attr = select_attributes_from_log_for_tree(log) attributes = list(str_ev_attr) + list(num_ev_attr) I, dp = get_decisions_table(log, net, initial_marking, final_marking, attributes=attributes, pre_decision_points=[decision_point], parameters=parameters) x_attributes = [a for a in attributes if not a == activity_key] str_attributes = set() non_str_attributes = set() x = [] x2 = [] y = [] for el in I[decision_point]: for a, v in el[0].items(): if a in x_attributes: if type(v) is str: str_attributes.add(a) else: non_str_attributes.add(a) x.append({a: v for a, v in el[0].items() if a in x_attributes and type(v) is str}) x2.append({a: v for a, v in el[0].items() if a in x_attributes and type(v) is not str}) y.append(el[1]) X = pandas_utils.instantiate_dataframe(x) X = pd.get_dummies(data=X, columns=list(str_attributes)) X2 = pandas_utils.instantiate_dataframe(x2) X = pandas_utils.concat([X, X2], axis=1) Y = pandas_utils.instantiate_dataframe(y, columns=["Name"]) Y, targets = encode_target(Y, "Name") y = Y['Target'] return X, y, targets def get_decisions_table(log0, net, initial_marking, final_marking, attributes=None, use_trace_attributes=False, k=1, pre_decision_points=None, trace_attributes=None, parameters=None): """ Gets a decision table out of a log and an accepting Petri net Parameters ----------------- log0 Event log net Petri net initial_marking Initial marking final_marking Final marking attributes List of attributes which are considered (if not provided, all the attributes are considered) use_trace_attributes Include trace attributes in the decision table k Number that determines the number of last activities to take into account pre_decision_points List of Strings of place Names that have to be considered as decision points. If not provided, the decision points are inferred from the Petri net trace_attributes List of trace attributes to consider parameters Possible parameters of the algorithm Returns -------------- I decision table decision_points The decision points as places of the Petri net, which are the keys of a dictionary having as values the list of transitions that are target """ if parameters is None: parameters = {} labels = exec_utils.get_param_value(Parameters.LABELS, parameters, True) log = deepcopy(log0) log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters) if pre_decision_points != None: if not isinstance(pre_decision_points, list): print( "Error: The parameter pre_decision_points has to be a list of names of the places that have to be considered.") sys.exit() if len(pre_decision_points) == 0: print("Error: There must be at least one element in the list of pre_decision_points.") sys.exit() if attributes != None: if not isinstance(attributes, list): print( "Error: The parameter attributes has to be a list of names of event attributes that have to be considered.") sys.exit() if len(attributes) == 0: print("Error: There must be at least one element in the list of attributes.") sys.exit() if use_trace_attributes == False and trace_attributes != None and isinstance(trace_attributes, list): print( "Note: Since a list of considerable trace attributes is provided, and use_trace_attributes was set on False, we set it on True") use_trace_attributes = True if trace_attributes != None: if not isinstance(trace_attributes, list): print( "Error: The parameter trace_attributes has to be a list of names of trace attributes that have to be considered.") sys.exit() if len(trace_attributes) == 0: print("Error: There must be at least one element in the list of trace_attributes.") sys.exit() # alignment = ali.apply(log, net, initial_marking, final_marking, variant=True, parameters={star.PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE:True}) decision_points = get_decision_points(net, pre_decision_points=pre_decision_points, parameters=parameters) decision_points_names = get_decision_points(net, labels=labels, pre_decision_points=pre_decision_points, parameters=parameters) if use_trace_attributes: # Made to ensure distinguishness between event and trace attributes. log = prepare_event_log(log) if attributes != None: attributes = prepare_attributes(attributes) if use_trace_attributes and trace_attributes == None: # if no list of trace attributes is provided, we create one trace_attributes = [] if use_trace_attributes: for trace in log: trace_attributes += list(trace.attributes) trace_attributes = list(set(trace_attributes)) if attributes == None: # if no list is given, every attribute of the events are considered attributes = [] for trace in log: for event in trace: attributes += list(event.keys()) attributes = list(set(attributes)) I = get_attributes(log, decision_points, attributes, use_trace_attributes, trace_attributes, k, net, initial_marking, final_marking, decision_points_names, parameters=parameters) return (I, decision_points) def prepare_event_log(log): """ If trace attributes are considered, it is possible that trace attributes have the same name as event attributes. To tackle this issue, the attributes get renamed. For trace attributes, we add "t_" at the beginning of the dictionary keys. For event attributes, we add "e_" at the beginning of the dict keys. :param log: :return: """ for trace in log: attributes = trace.attributes.copy() for attribute in attributes: trace.attributes["t_" + attribute] = trace.attributes.pop(attribute) for event in trace: attributes = event._dict.copy() for attribute in attributes: event._dict["e_" + attribute] = event._dict.pop(attribute) return log def prepare_attributes(attributes): """ Method that "e_" in front of every attribute if trace attributes are considered. :param attributes: List of event attributes that the user wants to consider. :return: list of edited attribute names """ new_attributes = [] for attribute in attributes: new_attributes.append("e_" + attribute) return new_attributes def get_decision_points(net, labels=False, pre_decision_points=None, parameters=None): """ The goal is to get all decision places. These are places where there are at least two outgoing arcs. :param net: Petri Net where decision points are discovered (places with at least two outgoing arcs) :param labels: If someone wants to get the labels of the transitions after a decision point and not the "ID" :return: """ if parameters is None: parameters = {} counter = {} for place in net.places: counter[place.name] = [] for arc in net.arcs: if arc.source in net.places: if labels == True: counter[arc.source.name].append(arc.target.label) else: counter[arc.source.name].append(arc.target.name) decision_points = {key: val for key, val in counter.items() if len(val) >= 2} i = 0 # i counts how many given decision points of the user are detected if pre_decision_points != None: for el in list(decision_points): if el in pre_decision_points: i += 1 else: del decision_points[el] if i == len(pre_decision_points): # print("All given decision points were identified as decision points in the Petri Net.") pass elif i == 0: raise Exception("None of the given points is a decision point.") # sys.exit() else: print( "Not all of the given places were identified as decision points. However, we only take the correct decision points from your list into account.") return decision_points def simplify_token_replay(replay): variant = {} for element in replay: if tuple(element['activated_transitions']) not in variant: variant[tuple(element['activated_transitions'])] = True smaller_replay = [] for element in replay: if variant[tuple(element['activated_transitions'])]: smaller_replay.append(element) variant[tuple(element['activated_transitions'])] = False return smaller_replay def get_attributes(log, decision_points, attributes, use_trace_attributes, trace_attributes, k, net, initial_marking, final_marking, decision_points_names, parameters=None): """ This method aims to construct for each decision place a table where for each decision place a list if given with the label of the later decision and as value the given attributes :param log: Log on which the method is applied :param alignments: Computed alignments for a log and a model :param decision_points: Places that have multiple outgoing arcs :param attributes: Attributes that are considered :param use_trace_attributes: If trace attributes have to be considered or not :param trace_attributes: List of trace attributes that are considered :param k: Taking k last activities into account :return: Dictionary that has as keys the decision places. The value for this key is a list. The content of these lists are tuples. The first element of these tuples is information regrading the attributes, the second element of these tuples is the transition which chosen in a decision. """ if parameters is None: parameters = {} labels = exec_utils.get_param_value(Parameters.LABELS, parameters, True) I = {} for key in decision_points: I[key] = [] A = {} for attri in attributes: A[attri] = None i = 0 # first, take a look at the variants variants_idxs = variants_module.get_variants_from_log_trace_idx(log, parameters=parameters) one_variant = [] for variant in variants_idxs: one_variant.append(variant) # TODO: Token based replay code mit paramter für nur varianten einbeziehen ausstatten replay_result = token_replay.apply(log, net, initial_marking, final_marking, parameters=parameters) replay_result = simplify_token_replay(replay_result) count = 0 for variant in replay_result: if variant['trace_fitness'] == 1.0: for trace_index in variants_idxs[one_variant[count]]: last_k_list = [None] * k trace = log[trace_index] if use_trace_attributes: for attribute in trace_attributes: # can be done here since trace attributes does not change for whole trace A[attribute] = trace.attributes[attribute] j = 0 # j is a pointer which points to the current event inside a trace for transition in variant['activated_transitions']: for key, value in decision_points_names.items(): tr_to_str = transition.label if labels else transition.name if tr_to_str in value: for element in last_k_list: if element != None: I[key].append((element.copy(), tr_to_str)) for attri in attributes: # print(variant, transition.label, j) if attri in trace[j]: # only add the attribute information if it is present in the event A[attri] = trace[j][attri] # add A to last_k_list. Using modulo to access correct entry last_k_list[j % k] = A.copy() if transition.label != None: if not j + 1 >= len(trace): # Problem otherwise: If there are tau-transition after the last event related transition, # the pointer j which points to the current event in a trace, gets out of range j += 1 else: example_trace = log[variants_idxs[one_variant[count]][0]] align_parameters = copy(parameters) align_parameters[star.Parameters.PARAM_ALIGNMENT_RESULT_IS_SYNC_PROD_AWARE] = True alignment = ali.apply(example_trace, net, initial_marking, final_marking, parameters=align_parameters)['alignment'] for trace_index in variants_idxs[one_variant[count]]: last_k_list = [None] * k trace = log[trace_index] if use_trace_attributes: for attribute in trace_attributes: # can be done here since trace attributes does not change for whole trace A[attribute] = trace.attributes[attribute] j = 0 for el in alignment: if el[1][1] != '>>': # If move in model for key, value in decision_points.items(): if el[0][1] in value: for element in last_k_list: if element != None: # only add those entries where information is provided if el[1][1] == None: # for some dt algorithms, the entry None might be a problem, since it is left out later I[key].append((element.copy(), el[0][1])) else: I[key].append((element.copy(), el[1][1])) if el[1][0] != '>>' and el[1][1] != '>>': # If there is a move in log and model for attri in attributes: if attri in trace[j]: # only add the attribute information if it is present in the event A[attri] = trace[j][attri] # add A to last_k_list. Using modulo to access correct entry last_k_list[j % k] = A.copy() if el[1][0] != '>>': # only go to next event in trace if the current event has been aligned # TODO: Discuss if this is correct or can lead to problems j += 1 count += 1 return I def encode_target(df, target_column): """Add column to df with integers for the target. Method taken from: http://chrisstrelioff.ws/sandbox/2015/06/08/decision_trees_in_python_with_scikit_learn_and_pandas.html Args ---- df -- pandas DataFrame. target_column -- column to map to int, producing new Target column. Returns ------- df_mod -- modified DataFrame. targets -- list of target names. """ df_mod = df.copy() targets = pandas_utils.format_unique(df_mod[target_column].unique()) map_to_int = {name: n for n, name in enumerate(targets)} df_mod["Target"] = df_mod[target_column].replace(map_to_int) return (df_mod, targets)