Spaces:

linpershey
/

process_mining

Running

process_mining / pm4py /algo /simulation /playout /process_tree /variants /extensive.py

kota

initial commit

e60e568 11 months ago

13.2 kB

	'''
	This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).

	PM4Py is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	PM4Py is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
	'''
	from enum import Enum

	from pm4py.algo.discovery.footprints import algorithm as fp_discovery
	from pm4py.objects.log.obj import EventLog, Trace, Event
	from pm4py.objects.process_tree.utils import bottomup as bottomup_discovery
	from pm4py.objects.process_tree.obj import Operator
	from pm4py.util import constants, xes_constants
	from pm4py.util import exec_utils

	from typing import Optional, Dict, Any, Union
	from pm4py.objects.log.obj import EventLog
	from pm4py.objects.process_tree.obj import ProcessTree

	TRACES = "traces"
	SKIPPABLE = "skippable"


	class Parameters(Enum):
	MIN_TRACE_LENGTH = "min_trace_length"
	MAX_TRACE_LENGTH = "max_trace_length"
	MAX_LOOP_OCC = "max_loop_occ"
	ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
	MAX_LIMIT_NUM_TRACES = "max_limit_num_traces"
	RETURN_SET_STRINGS = "return_set_strings"


	def get_playout_leaf(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict, max_rem_dict,
	max_limit_num_traces):
	"""
	Performs the playout of a leaf (activity or invisible), returning the traces allowed by the tree
	"""
	mr = min_rem_dict[node]
	mar = max_rem_dict[node]
	playout_dictio[node] = {TRACES: set()}
	if node.label is None:
	playout_dictio[node][TRACES].add(tuple([]))
	else:
	if mar + 1 >= min_trace_length and max_trace_length - mr >= 1:
	playout_dictio[node][TRACES].add((node.label,))


	def get_playout_xor(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict, max_rem_dict,
	max_limit_num_traces):
	"""
	Performs the playout of a XOR node, returning the traces allowed by the tree
	"""
	mr = min_rem_dict[node]
	traces = set()
	for n in node.children:
	traces = traces.union(playout_dictio[n][TRACES])
	if len(traces) > max_limit_num_traces:
	break
	playout_dictio[node] = {TRACES: traces}


	def get_min_remaining_length(traces):
	"""
	Minimum remaining length (for sequential, parallel cut detection)

	Parameters
	--------------
	traces
	Traces
	"""
	min_len_traces = []
	min_rem_length = []
	for x in traces:
	if len(x) == 0:
	min_len_traces.append(0)
	else:
	min_len_traces.append(len(x[0]))
	min_rem_length.append(0)
	min_rem_length[-1] = 0
	min_rem_length[-2] = min_len_traces[-1]
	j = len(traces) - 3
	while j >= 0:
	min_rem_length[j] = min_rem_length[j + 1] + min_len_traces[j + 1]
	j = j - 1
	return min_len_traces, min_rem_length


	def get_max_remaining_length(traces):
	"""
	Maximum remaining length (for sequential, parallel cut detection)

	Parameters
	--------------
	traces
	Traces
	"""
	max_len_traces = []
	max_rem_length = []
	for x in traces:
	if len(x) == 0:
	max_len_traces.append(0)
	else:
	max_len_traces.append(len(x[-1]))
	max_rem_length.append(0)
	max_rem_length[-1] = 0
	max_rem_length[-2] = max_len_traces[-1]
	j = len(traces) - 3
	while j >= 0:
	max_rem_length[j] = max_rem_length[j + 1] + max_len_traces[j + 1]
	j = j - 1
	return max_len_traces, max_rem_length


	def flatten(x):
	"""
	Flattens a list of tuples
	"""
	ret = []
	for y in x:
	for z in y:
	ret.append(z)
	return ret


	def get_sequential_compositions_children(traces, min_trace_length, max_trace_length, mr, mar, max_limit_num_traces):
	"""
	Returns alls the possible sequential combinations between
	the children of a tree
	"""
	diff = max_trace_length - mr
	diff2 = min_trace_length - mar
	min_len_traces, min_rem_length = get_min_remaining_length(traces)
	max_len_traces, max_rem_length = get_max_remaining_length(traces)
	curr = list(traces[0])
	i = 1
	while i < len(traces):
	mrl = min_rem_length[i]
	marl = max_rem_length[i]
	to_visit = []
	j = 0
	while j < len(curr):
	x = curr[j]
	if not type(x) is list:
	x = [x]
	z = 0
	while z < len(traces[i]):
	y = traces[i][z]
	xy = list(x)
	xy.append(y)
	val = sum(len(k) for k in xy)
	if val + mrl <= diff and val + marl >= diff2:
	to_visit.append(xy)
	z = z + 1
	j = j + 1
	curr = to_visit
	i = i + 1
	return curr


	def get_playout_parallel(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
	max_rem_dict, max_limit_num_traces):
	"""
	Performs the playout of an AND node, returning the traces allowed by the tree
	"""
	mr = min_rem_dict[node]
	mar = max_rem_dict[node]
	traces = list(sorted(playout_dictio[x][TRACES], key=lambda x: len(x)) for x in node.children)
	sequential_compositions = get_sequential_compositions_children(traces, min_trace_length, max_trace_length, mr, mar, max_limit_num_traces)
	final_traces = list()
	for x in sequential_compositions:
	if len(final_traces) >= max_limit_num_traces:
	break
	to_visit = [[[]] + [len(y) for y in x]]
	while len(to_visit) > 0:
	curr = to_visit.pop(0)
	possible_choices = [i - 1 for i in range(1, len(curr)) if curr[i] > 0]
	for j in possible_choices:
	new = list(curr)
	new[0] = list(new[0])
	new[0].append(x[j][len(x[j]) - curr[j + 1]])
	new[j + 1] = new[j + 1] - 1
	to_visit.append(new)
	if not possible_choices:
	final_traces.append(tuple(curr[0]))
	if len(final_traces) >= max_limit_num_traces:
	break
	playout_dictio[node] = {TRACES: set(final_traces)}


	def get_playout_sequence(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
	max_rem_dict, max_limit_num_traces):
	"""
	Performs the playout of a sequence node, returning the traces allowed by the tree
	"""
	mr = min_rem_dict[node]
	mar = max_rem_dict[node]
	final_traces = set()
	traces = list(sorted(playout_dictio[x][TRACES], key=lambda x: len(x)) for x in node.children)
	sequential_compositions = get_sequential_compositions_children(traces, min_trace_length, max_trace_length, mr, mar, max_limit_num_traces)
	for x in sequential_compositions:
	final_traces.add(tuple(flatten(x)))
	playout_dictio[node] = {TRACES: final_traces}


	def get_playout_loop(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict, max_rem_dict,
	max_limit_num_traces):
	"""
	Performs the playout of a loop node, returning the traces allowed by the tree
	"""
	mr = min_rem_dict[node]
	mar = max_rem_dict[node]
	final_traces = set()
	do_traces = sorted(list(playout_dictio[node.children[0]][TRACES]), key=lambda x: len(x))
	redo_traces = sorted(list(playout_dictio[node.children[1]][TRACES]), key=lambda x: len(x))
	min_do_trace = min(len(x) for x in do_traces) if do_traces else 0
	to_visit = list((x, 0, 0) for x in do_traces)
	closed = set()
	diff1 = max_trace_length - mr
	diff2 = max_trace_length - min_do_trace - mr
	diff3 = min_trace_length - mar
	while to_visit:
	curr = to_visit.pop(0)
	curr_trace = curr[0]
	position = curr[1]
	num_loops = curr[2]
	if position == 0:
	if curr_trace in closed:
	continue
	closed.add(curr_trace)

	if diff3 <= len(curr_trace) <= diff1:
	final_traces.add(curr_trace)
	if len(final_traces) > max_limit_num_traces:
	break

	for y in redo_traces:
	new = curr_trace + y
	if len(new) <= diff2 and num_loops + 1 <= max_loop_occ:
	to_visit.append((new, 1, num_loops + 1))
	else:
	break

	elif position == 1:
	for y in do_traces:
	new = curr_trace + y
	if len(new) <= diff1:
	to_visit.append((new, 0, num_loops))
	else:
	break
	playout_dictio[node] = {TRACES: final_traces}


	def get_playout(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict, max_rem_dict,
	max_limit_num_traces):
	"""
	Performs a playout of an ode of the process tree, given the type
	"""
	if len(node.children) == 0:
	get_playout_leaf(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
	max_rem_dict, max_limit_num_traces)
	elif node.operator == Operator.XOR:
	get_playout_xor(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
	max_rem_dict, max_limit_num_traces)
	elif node.operator == Operator.PARALLEL:
	get_playout_parallel(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
	max_rem_dict, max_limit_num_traces)
	elif node.operator == Operator.SEQUENCE:
	get_playout_sequence(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
	max_rem_dict, max_limit_num_traces)
	elif node.operator == Operator.LOOP:
	get_playout_loop(node, playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
	max_rem_dict, max_limit_num_traces)


	def apply(tree: ProcessTree, parameters : Optional[Dict[Union[str, Parameters], Any]] = None) -> EventLog:
	"""
	Performs an extensive playout of the process tree

	Parameters
	-------------
	tree
	Process tree
	parameters
	Possible parameters, including:
	- Parameters.MIN_TRACE_LENGTH => minimum length of a trace (default: 1)
	- Parameters.MAX_TRACE_LENGTH => maximum length of a trace (default: min_allowed_trace_length)
	- Parameters.MAX_LOOP_OCC => maximum number of occurrences for a loop (default: MAX_TRACE_LENGTH)
	- Parameters.ACTIVITY_KEY => activity key
	- Parameters.MAX_LIMIT_NUM_TRACES => maximum number to the limit of traces; the playout shall stop when the number is reached (default: 100000)
	Returns
	-------------
	log
	Event log
	"""
	if parameters is None:
	parameters = {}

	activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, xes_constants.DEFAULT_NAME_KEY)
	# to save memory in the returned log, allocate each activity once. to know the list of activities of the
	# process tree, use the footprints module
	fp_tree = fp_discovery.apply(tree, parameters=parameters)
	activities = fp_tree["activities"]
	activities = {act: Event({activity_key: act}) for act in activities}

	min_allowed_trace_length = bottomup_discovery.get_min_trace_length(tree, parameters=parameters)
	min_trace_length = exec_utils.get_param_value(Parameters.MIN_TRACE_LENGTH, parameters, 1)
	max_trace_length = exec_utils.get_param_value(Parameters.MAX_TRACE_LENGTH, parameters, min_allowed_trace_length)
	max_loop_occ = exec_utils.get_param_value(Parameters.MAX_LOOP_OCC, parameters, int(max_trace_length / 2))
	max_limit_num_traces = exec_utils.get_param_value(Parameters.MAX_LIMIT_NUM_TRACES, parameters, 100000)
	return_set_strings = exec_utils.get_param_value(Parameters.RETURN_SET_STRINGS, parameters, False)

	bottomup = bottomup_discovery.get_bottomup_nodes(tree, parameters=parameters)
	min_rem_dict = bottomup_discovery.get_min_rem_dict(tree, parameters=parameters)
	max_rem_dict = bottomup_discovery.get_max_rem_dict(tree, parameters=parameters)

	playout_dictio = {}
	for i in range(len(bottomup)):
	get_playout(bottomup[i], playout_dictio, min_trace_length, max_trace_length, max_loop_occ, min_rem_dict,
	max_rem_dict, max_limit_num_traces)
	tree_playout_traces = playout_dictio[tree][TRACES]

	if return_set_strings:
	return tree_playout_traces

	log = EventLog()
	for tr0 in tree_playout_traces:
	trace = Trace()
	for act in tr0:
	trace.append(activities[act])
	log.append(trace)

	return log