import os import traceback from pm4py.util import constants, pandas_utils import time import pm4py from pm4py.algo.discovery.inductive import algorithm as im_clean from pm4py.statistics.variants.log import get as variants_get LOGS_FOLDER = "../compressed_input_data" CLASSIFIER = "@@classifier" ENABLE_ALIGNMENTS = True NOISE_THRESHOLD = 0.2 VARIANT = im_clean.Variants.IM ENABLE_MULTIPROCESSING = False if __name__ == "__main__": for log_name in os.listdir(LOGS_FOLDER): if "xes" in log_name or "parquet" in log_name: try: log_path = os.path.join(LOGS_FOLDER, log_name) print("") print(log_path) if "xes" in log_name: from pm4py.statistics.attributes.log import get as attributes_get_log log = pm4py.read_xes(log_path, return_legacy_log_object=True) for trace in log: for event in trace: if True and "lifecycle:transition" in event: event["@@classifier"] = event["concept:name"] + "+" + event["lifecycle:transition"] # event["concept:name"] = event["concept:name"] + "+" + event["lifecycle:transition"] else: event["@@classifier"] = event["concept:name"] activities = set(attributes_get_log.get_attribute_values(log, CLASSIFIER).keys()) variants = variants_get.get_variants(log, parameters={"pm4py:param:activity_key": CLASSIFIER}) fp_log = pm4py.algo.discovery.footprints.log.variants.entire_event_log.apply(log, parameters={ "pm4py:param:activity_key": CLASSIFIER}) elif "parquet" in log_name: from pm4py.statistics.attributes.pandas import get as attributes_get_pandas dataframe = pandas_utils.DATAFRAME.read_parquet(log_path) activities = set(attributes_get_pandas.get_attribute_values(dataframe, CLASSIFIER).keys()) variants = pm4py.get_variants_as_tuples(dataframe) variants = {",".join(x): y for x, y in variants.items()} fp_log = pm4py.algo.discovery.footprints.log.variants.entire_dataframe.apply(dataframe) print("start tree_im_clean") tree_im_clean = im_clean.apply(log, variant=VARIANT, parameters={"pm4py:param:activity_key": CLASSIFIER, "noise_threshold": NOISE_THRESHOLD, "multiprocessing": ENABLE_MULTIPROCESSING}) print(tree_im_clean) print("end tree_im_clean") fp_tree_clean = pm4py.algo.discovery.footprints.tree.variants.bottomup.apply(tree_im_clean) if not activities.issubset(fp_tree_clean["activities"]): print("ALERT! activities of the tree are less than the ones in the log!") print(activities.difference(fp_tree_clean["activities"])) time.sleep(5) fp_conf_im_clean = pm4py.algo.conformance.footprints.variants.log_extensive.apply(fp_log, fp_tree_clean) fitness_im_clean = pm4py.algo.conformance.footprints.util.evaluation.fp_fitness(fp_log, fp_tree_clean, fp_conf_im_clean) if ENABLE_ALIGNMENTS: from pm4py.algo.conformance.alignments.process_tree.variants import search_graph_pt alignments_clean = search_graph_pt.apply(log, tree_im_clean, parameters={ search_graph_pt.Parameters.ACTIVITY_KEY: CLASSIFIER}) from pm4py.algo.evaluation.replay_fitness.variants import alignment_based fitness_al_clean = alignment_based.evaluate(alignments_clean)["average_trace_fitness"] if fitness_al_clean < fitness_im_clean: print("ALERT", fitness_al_clean, fitness_im_clean) time.sleep(5) #input() else: print("OK ALIGNMENTS", fitness_al_clean) precision_im_clean = pm4py.algo.conformance.footprints.util.evaluation.fp_precision(fp_log, fp_tree_clean) print("IMCLEAN fp-fitness=%.3f fp-precision=%.3f" % (fitness_im_clean, precision_im_clean)) except: traceback.print_exc() input()