Spaces:

linpershey
/

process_mining

Running

process_mining / pm4py /algo /evaluation /precision /variants /etconformance_token.py

kota

initial commit

e60e568 11 months ago

6.06 kB

	'''
	This file is part of PM4Py (More Info: https://pm4py.fit.fraunhofer.de).

	PM4Py is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 3 of the License, or
	(at your option) any later version.

	PM4Py is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with PM4Py. If not, see <https://www.gnu.org/licenses/>.
	'''
	from pm4py.algo.conformance.tokenreplay.variants import token_replay
	from pm4py.algo.conformance.tokenreplay import algorithm as executor

	from pm4py.objects import log as log_lib
	from pm4py.algo.evaluation.precision import utils as precision_utils
	from pm4py.statistics.start_activities.log.get import get_start_activities
	from pm4py.objects.petri_net.utils.align_utils import get_visible_transitions_eventually_enabled_by_marking
	from pm4py.util import exec_utils
	from enum import Enum
	from pm4py.util import constants
	from typing import Optional, Dict, Any, Union
	from pm4py.objects.log.obj import EventLog
	from pm4py.objects.petri_net.obj import PetriNet, Marking
	from pm4py.objects.conversion.log import converter as log_converter
	import pandas as pd


	class Parameters(Enum):
	ACTIVITY_KEY = constants.PARAMETER_CONSTANT_ACTIVITY_KEY
	CASE_ID_KEY = constants.PARAMETER_CONSTANT_CASEID_KEY
	TOKEN_REPLAY_VARIANT = "token_replay_variant"
	CLEANING_TOKEN_FLOOD = "cleaning_token_flood"
	SHOW_PROGRESS_BAR = "show_progress_bar"
	MULTIPROCESSING = "multiprocessing"
	CORES = "cores"

	"""
	Implementation of the approach described in paper

	Muñoz-Gama, Jorge, and Josep Carmona. "A fresh look at precision in process conformance." International Conference
	on Business Process Management. Springer, Berlin, Heidelberg, 2010.

	for measuring precision.

	For each prefix in the log, the reflected tasks are calculated (outgoing attributes from the prefix)
	Then, a token replay is done on the prefix in order to get activated transitions
	Escaping edges is the set difference between activated transitions and reflected tasks

	Then, precision is calculated by the formula used in the paper

	At the moment, the precision value is different from the one provided by the ProM plug-in,
	although the implementation seems to follow the paper concept
	"""


	def apply(log: EventLog, net: PetriNet, marking: Marking, final_marking: Marking, parameters: Optional[Dict[Union[str, Parameters], Any]] = None):
	"""
	Get ET Conformance precision

	Parameters
	----------
	log
	Trace log
	net
	Petri net
	marking
	Initial marking
	final_marking
	Final marking
	parameters
	Parameters of the algorithm, including:
	Parameters.ACTIVITY_KEY -> Activity key
	"""

	if parameters is None:
	parameters = {}

	cleaning_token_flood = exec_utils.get_param_value(Parameters.CLEANING_TOKEN_FLOOD, parameters, False)
	token_replay_variant = exec_utils.get_param_value(Parameters.TOKEN_REPLAY_VARIANT, parameters,
	executor.Variants.TOKEN_REPLAY)
	activity_key = exec_utils.get_param_value(Parameters.ACTIVITY_KEY, parameters, log_lib.util.xes.DEFAULT_NAME_KEY)
	case_id_key = exec_utils.get_param_value(Parameters.CASE_ID_KEY, parameters, constants.CASE_CONCEPT_NAME)
	show_progress_bar = exec_utils.get_param_value(Parameters.SHOW_PROGRESS_BAR, parameters, constants.SHOW_PROGRESS_BAR)

	# default value for precision, when no activated transitions (not even by looking at the initial marking) are found
	precision = 1.0
	sum_ee = 0
	sum_at = 0

	parameters_tr = {
	token_replay.Parameters.SHOW_PROGRESS_BAR: show_progress_bar,
	token_replay.Parameters.CONSIDER_REMAINING_IN_FITNESS: False,
	token_replay.Parameters.TRY_TO_REACH_FINAL_MARKING_THROUGH_HIDDEN: False,
	token_replay.Parameters.STOP_IMMEDIATELY_UNFIT: True,
	token_replay.Parameters.WALK_THROUGH_HIDDEN_TRANS: True,
	token_replay.Parameters.CLEANING_TOKEN_FLOOD: cleaning_token_flood,
	token_replay.Parameters.ACTIVITY_KEY: activity_key
	}

	if type(log) is not pd.DataFrame:
	log = log_converter.apply(log, variant=log_converter.Variants.TO_EVENT_LOG, parameters=parameters)

	prefixes, prefix_count = precision_utils.get_log_prefixes(log, activity_key=activity_key, case_id_key=case_id_key)
	prefixes_keys = list(prefixes.keys())
	fake_log = precision_utils.form_fake_log(prefixes_keys, activity_key=activity_key)

	aligned_traces = executor.apply(fake_log, net, marking, final_marking, variant=token_replay_variant,
	parameters=parameters_tr)

	# fix: also the empty prefix should be counted!
	start_activities = set(get_start_activities(log, parameters=parameters))
	trans_en_ini_marking = set([x.label for x in get_visible_transitions_eventually_enabled_by_marking(net, marking)])
	diff = trans_en_ini_marking.difference(start_activities)
	sum_at += len(log) * len(trans_en_ini_marking)
	sum_ee += len(log) * len(diff)
	# end fix

	for i in range(len(aligned_traces)):
	if aligned_traces[i]["trace_is_fit"]:
	log_transitions = set(prefixes[prefixes_keys[i]])
	activated_transitions_labels = set(
	[x.label for x in aligned_traces[i]["enabled_transitions_in_marking"] if x.label is not None])
	sum_at += len(activated_transitions_labels) * prefix_count[prefixes_keys[i]]
	escaping_edges = activated_transitions_labels.difference(log_transitions)
	sum_ee += len(escaping_edges) * prefix_count[prefixes_keys[i]]

	if sum_at > 0:
	precision = 1 - float(sum_ee) / float(sum_at)

	return precision