File size: 4,637 Bytes
8097001
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import os
import traceback

from pm4py.util import constants, pandas_utils

import time
import pm4py
from pm4py.algo.discovery.inductive import algorithm as im_clean
from pm4py.statistics.variants.log import get as variants_get

LOGS_FOLDER = "../compressed_input_data"
CLASSIFIER = "@@classifier"
ENABLE_ALIGNMENTS = True
NOISE_THRESHOLD = 0.2
VARIANT = im_clean.Variants.IM
ENABLE_MULTIPROCESSING = False


if __name__ == "__main__":
    for log_name in os.listdir(LOGS_FOLDER):
        if "xes" in log_name or "parquet" in log_name:
            try:
                log_path = os.path.join(LOGS_FOLDER, log_name)
                print("")
                print(log_path)
                if "xes" in log_name:
                    from pm4py.statistics.attributes.log import get as attributes_get_log

                    log = pm4py.read_xes(log_path, return_legacy_log_object=True)
                    for trace in log:
                        for event in trace:
                            if True and "lifecycle:transition" in event:
                                event["@@classifier"] = event["concept:name"] + "+" + event["lifecycle:transition"]
                                # event["concept:name"] = event["concept:name"] + "+" + event["lifecycle:transition"]
                            else:
                                event["@@classifier"] = event["concept:name"]
                    activities = set(attributes_get_log.get_attribute_values(log, CLASSIFIER).keys())
                    variants = variants_get.get_variants(log, parameters={"pm4py:param:activity_key": CLASSIFIER})
                    fp_log = pm4py.algo.discovery.footprints.log.variants.entire_event_log.apply(log, parameters={
                        "pm4py:param:activity_key": CLASSIFIER})
                elif "parquet" in log_name:
                    from pm4py.statistics.attributes.pandas import get as attributes_get_pandas

                    dataframe = pandas_utils.DATAFRAME.read_parquet(log_path)
                    activities = set(attributes_get_pandas.get_attribute_values(dataframe, CLASSIFIER).keys())
                    variants = pm4py.get_variants_as_tuples(dataframe)
                    variants = {",".join(x): y for x, y in variants.items()}
                    fp_log = pm4py.algo.discovery.footprints.log.variants.entire_dataframe.apply(dataframe)
                print("start tree_im_clean")
                tree_im_clean = im_clean.apply(log, variant=VARIANT, parameters={"pm4py:param:activity_key": CLASSIFIER,
                                                                     "noise_threshold": NOISE_THRESHOLD, "multiprocessing": ENABLE_MULTIPROCESSING})
                print(tree_im_clean)
                print("end tree_im_clean")

                fp_tree_clean = pm4py.algo.discovery.footprints.tree.variants.bottomup.apply(tree_im_clean)

                if not activities.issubset(fp_tree_clean["activities"]):
                    print("ALERT! activities of the tree are less than the ones in the log!")
                    print(activities.difference(fp_tree_clean["activities"]))
                    time.sleep(5)

                fp_conf_im_clean = pm4py.algo.conformance.footprints.variants.log_extensive.apply(fp_log, fp_tree_clean)
                fitness_im_clean = pm4py.algo.conformance.footprints.util.evaluation.fp_fitness(fp_log, fp_tree_clean,
                                                                                                fp_conf_im_clean)

                if ENABLE_ALIGNMENTS:
                    from pm4py.algo.conformance.alignments.process_tree.variants import search_graph_pt

                    alignments_clean = search_graph_pt.apply(log, tree_im_clean, parameters={
                        search_graph_pt.Parameters.ACTIVITY_KEY: CLASSIFIER})
                    from pm4py.algo.evaluation.replay_fitness.variants import alignment_based

                    fitness_al_clean = alignment_based.evaluate(alignments_clean)["average_trace_fitness"]
                    if fitness_al_clean < fitness_im_clean:
                        print("ALERT", fitness_al_clean, fitness_im_clean)
                        time.sleep(5)
                        #input()
                    else:
                        print("OK ALIGNMENTS", fitness_al_clean)

                precision_im_clean = pm4py.algo.conformance.footprints.util.evaluation.fp_precision(fp_log, fp_tree_clean)
                print("IMCLEAN fp-fitness=%.3f fp-precision=%.3f" % (fitness_im_clean, precision_im_clean))
            except:
                traceback.print_exc()
                input()