process_mining / pm4py /tests /pm_extr_test /bas_inductive_test.py
linpershey's picture
Add 'pm4py/' from commit '80970016c5e1e79af7c37df0dd88e17587fe7bcf'
b4ba3ec
raw
history blame
4.64 kB
import os
import traceback
from pm4py.util import constants, pandas_utils
import time
import pm4py
from pm4py.algo.discovery.inductive import algorithm as im_clean
from pm4py.statistics.variants.log import get as variants_get
LOGS_FOLDER = "../compressed_input_data"
CLASSIFIER = "@@classifier"
ENABLE_ALIGNMENTS = True
NOISE_THRESHOLD = 0.2
VARIANT = im_clean.Variants.IM
ENABLE_MULTIPROCESSING = False
if __name__ == "__main__":
for log_name in os.listdir(LOGS_FOLDER):
if "xes" in log_name or "parquet" in log_name:
try:
log_path = os.path.join(LOGS_FOLDER, log_name)
print("")
print(log_path)
if "xes" in log_name:
from pm4py.statistics.attributes.log import get as attributes_get_log
log = pm4py.read_xes(log_path, return_legacy_log_object=True)
for trace in log:
for event in trace:
if True and "lifecycle:transition" in event:
event["@@classifier"] = event["concept:name"] + "+" + event["lifecycle:transition"]
# event["concept:name"] = event["concept:name"] + "+" + event["lifecycle:transition"]
else:
event["@@classifier"] = event["concept:name"]
activities = set(attributes_get_log.get_attribute_values(log, CLASSIFIER).keys())
variants = variants_get.get_variants(log, parameters={"pm4py:param:activity_key": CLASSIFIER})
fp_log = pm4py.algo.discovery.footprints.log.variants.entire_event_log.apply(log, parameters={
"pm4py:param:activity_key": CLASSIFIER})
elif "parquet" in log_name:
from pm4py.statistics.attributes.pandas import get as attributes_get_pandas
dataframe = pandas_utils.DATAFRAME.read_parquet(log_path)
activities = set(attributes_get_pandas.get_attribute_values(dataframe, CLASSIFIER).keys())
variants = pm4py.get_variants_as_tuples(dataframe)
variants = {",".join(x): y for x, y in variants.items()}
fp_log = pm4py.algo.discovery.footprints.log.variants.entire_dataframe.apply(dataframe)
print("start tree_im_clean")
tree_im_clean = im_clean.apply(log, variant=VARIANT, parameters={"pm4py:param:activity_key": CLASSIFIER,
"noise_threshold": NOISE_THRESHOLD, "multiprocessing": ENABLE_MULTIPROCESSING})
print(tree_im_clean)
print("end tree_im_clean")
fp_tree_clean = pm4py.algo.discovery.footprints.tree.variants.bottomup.apply(tree_im_clean)
if not activities.issubset(fp_tree_clean["activities"]):
print("ALERT! activities of the tree are less than the ones in the log!")
print(activities.difference(fp_tree_clean["activities"]))
time.sleep(5)
fp_conf_im_clean = pm4py.algo.conformance.footprints.variants.log_extensive.apply(fp_log, fp_tree_clean)
fitness_im_clean = pm4py.algo.conformance.footprints.util.evaluation.fp_fitness(fp_log, fp_tree_clean,
fp_conf_im_clean)
if ENABLE_ALIGNMENTS:
from pm4py.algo.conformance.alignments.process_tree.variants import search_graph_pt
alignments_clean = search_graph_pt.apply(log, tree_im_clean, parameters={
search_graph_pt.Parameters.ACTIVITY_KEY: CLASSIFIER})
from pm4py.algo.evaluation.replay_fitness.variants import alignment_based
fitness_al_clean = alignment_based.evaluate(alignments_clean)["average_trace_fitness"]
if fitness_al_clean < fitness_im_clean:
print("ALERT", fitness_al_clean, fitness_im_clean)
time.sleep(5)
#input()
else:
print("OK ALIGNMENTS", fitness_al_clean)
precision_im_clean = pm4py.algo.conformance.footprints.util.evaluation.fp_precision(fp_log, fp_tree_clean)
print("IMCLEAN fp-fitness=%.3f fp-precision=%.3f" % (fitness_im_clean, precision_im_clean))
except:
traceback.print_exc()
input()