import importlib.util import os import unittest import pm4py from pm4py.objects.bpmn.obj import BPMN from pm4py.objects.petri_net.obj import PetriNet from pm4py.objects.process_tree.obj import ProcessTree from pm4py.util import constants, pandas_utils from pm4py.objects.log.util import dataframe_utils from pm4py.objects.log.importer.xes import importer as xes_importer class SimplifiedInterfaceTest(unittest.TestCase): def test_csv(self): df = pandas_utils.read_csv("input_data/running-example.csv") df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["time:timestamp"]) df["case:concept:name"] = df["case:concept:name"].astype("string") log2 = pm4py.convert_to_event_log(df) stream1 = pm4py.convert_to_event_stream(log2) df2 = pm4py.convert_to_dataframe(log2) pm4py.write_xes(log2, "test_output_data/log.xes") os.remove("test_output_data/log.xes") def test_alpha_miner(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) net, im, fm = pm4py.discover_petri_net_alpha(log) def test_alpha_miner_plus(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) net, im, fm = pm4py.discover_petri_net_alpha_plus(log) def test_inductive_miner(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) net, im, fm = pm4py.discover_petri_net_inductive(log) def test_inductive_miner_noise(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) net, im, fm = pm4py.discover_petri_net_inductive(log, noise_threshold=0.5) def test_heuristics_miner(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) net, im, fm = pm4py.discover_petri_net_heuristics(log) def test_inductive_miner_tree(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) tree = pm4py.discover_process_tree_inductive(log) tree = pm4py.discover_process_tree_inductive(log, noise_threshold=0.2) def test_heuristics_miner_heu_net(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) heu_net = pm4py.discover_heuristics_net(log) def test_dfg(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) dfg, sa, ea = pm4py.discover_directly_follows_graph(log) def test_read_petri(self): net, im, fm = pm4py.read_pnml("input_data/running-example.pnml") def test_read_tree(self): tree = pm4py.read_ptml("input_data/running-example.ptml") def test_read_dfg(self): dfg, sa, ea = pm4py.read_dfg("input_data/running-example.dfg") def test_alignments_simpl_interface(self): for legacy_obj in [True, False]: for diagn_df in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) net, im, fm = pm4py.discover_petri_net_inductive(log) aligned_traces = pm4py.conformance_diagnostics_alignments(log, net, im, fm, return_diagnostics_dataframe=diagn_df) def test_tbr_simpl_interface(self): for legacy_obj in [True, False]: for diagn_df in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) net, im, fm = pm4py.discover_petri_net_inductive(log) replayed_traces = pm4py.conformance_diagnostics_token_based_replay(log, net, im, fm, return_diagnostics_dataframe=diagn_df) def test_fitness_alignments(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) net, im, fm = pm4py.discover_petri_net_inductive(log) fitness_ali = pm4py.fitness_alignments(log, net, im, fm) def test_fitness_tbr(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) net, im, fm = pm4py.discover_petri_net_inductive(log) fitness_tbr = pm4py.fitness_token_based_replay(log, net, im, fm) def test_precision_alignments(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) net, im, fm = pm4py.discover_petri_net_inductive(log) precision_ali = pm4py.precision_alignments(log, net, im, fm) def test_precision_tbr(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) net, im, fm = pm4py.discover_petri_net_inductive(log) precision_tbr = pm4py.precision_token_based_replay(log, net, im, fm) def test_convert_to_tree_from_petri(self): net, im, fm = pm4py.read_pnml("input_data/running-example.pnml") tree = pm4py.convert_to_process_tree(net, im, fm) self.assertTrue(isinstance(tree, ProcessTree)) def test_convert_to_tree_from_bpmn(self): bpmn = pm4py.read_bpmn("input_data/running-example.bpmn") tree = pm4py.convert_to_process_tree(bpmn) self.assertTrue(isinstance(tree, ProcessTree)) def test_convert_to_net_from_tree(self): tree = pm4py.read_ptml("input_data/running-example.ptml") net, im, fm = pm4py.convert_to_petri_net(tree) self.assertTrue(isinstance(net, PetriNet)) def test_convert_to_net_from_bpmn(self): bpmn = pm4py.read_bpmn("input_data/running-example.bpmn") net, im, fm = pm4py.convert_to_petri_net(bpmn) self.assertTrue(isinstance(net, PetriNet)) def test_convert_to_net_from_dfg(self): dfg, sa, ea = pm4py.read_dfg("input_data/running-example.dfg") net, im, fm = pm4py.convert_to_petri_net(dfg, sa, ea) self.assertTrue(isinstance(net, PetriNet)) def test_convert_to_net_from_heu(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) heu_net = pm4py.discover_heuristics_net(log) net, im, fm = pm4py.convert_to_petri_net(heu_net) self.assertTrue(isinstance(net, PetriNet)) def test_convert_to_bpmn_from_tree(self): tree = pm4py.read_ptml("input_data/running-example.ptml") bpmn = pm4py.convert_to_bpmn(tree) self.assertTrue(isinstance(bpmn, BPMN)) def test_statistics_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.get_start_activities(log) pm4py.get_end_activities(log) pm4py.get_event_attributes(log) pm4py.get_trace_attributes(log) pm4py.get_event_attribute_values(log, "org:resource") pm4py.get_variants_as_tuples(log) def test_statistics_df(self): df = pandas_utils.read_csv("input_data/running-example-transformed.csv") df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) df["CaseID"] = df["CaseID"].astype("string") pm4py.get_start_activities(df, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") pm4py.get_end_activities(df, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") pm4py.get_event_attributes(df) pm4py.get_event_attribute_values(df, "Resource", case_id_key="CaseID") pm4py.get_variants_as_tuples(df, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_playout(self): net, im, fm = pm4py.read_pnml("input_data/running-example.pnml") pm4py.play_out(net, im, fm) def test_generator(self): pm4py.generate_process_tree() def test_mark_em_equation(self): for legacy_obj in [True, False]: log = xes_importer.apply("input_data/running-example.xes") net, im, fm = pm4py.read_pnml("input_data/running-example.pnml") sync_net, sync_im, sync_fm = pm4py.construct_synchronous_product_net(log[0], net, im, fm) m_h = pm4py.solve_marking_equation(sync_net, sync_im, sync_fm) em_h = pm4py.solve_extended_marking_equation(log[0], sync_net, sync_im, sync_fm) def test_new_statistics_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.get_trace_attribute_values(log, "case:creator") pm4py.discover_eventually_follows_graph(log) pm4py.get_case_arrival_average(log) def test_new_statistics_df(self): df = pandas_utils.read_csv("input_data/running-example-transformed.csv") df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) df["CaseID"] = df["CaseID"].astype("string") pm4py.discover_eventually_follows_graph(df, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") pm4py.get_case_arrival_average(df, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_serialization_log(self): if importlib.util.find_spec("pyarrow"): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) ser = pm4py.serialize(log) log2 = pm4py.deserialize(ser) def test_serialization_dataframe(self): if importlib.util.find_spec("pyarrow"): df = pandas_utils.read_csv("input_data/running-example.csv") df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["time:timestamp"]) ser = pm4py.serialize(df) df2 = pm4py.deserialize(ser) def test_serialization_petri_net(self): net, im, fm = pm4py.read_pnml("input_data/running-example.pnml") ser = pm4py.serialize(net, im, fm) net2, im2, fm2 = pm4py.deserialize(ser) def test_serialization_process_tree(self): tree = pm4py.read_ptml("input_data/running-example.ptml") ser = pm4py.serialize(tree) tree2 = pm4py.deserialize(ser) def test_serialization_bpmn(self): bpmn = pm4py.read_bpmn("input_data/running-example.bpmn") ser = pm4py.serialize(bpmn) bpmn2 = pm4py.deserialize(ser) def test_serialization_dfg(self): dfg, sa, ea = pm4py.read_dfg("input_data/running-example.dfg") ser = pm4py.serialize(dfg, sa, ea) dfg2, sa2, ea2 = pm4py.deserialize(ser) def test_minimum_self_distance(self): import pm4py for legacy_obj in [True, False]: log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj) msd = pm4py.get_minimum_self_distances(log) def test_minimum_self_distance_2(self): import pm4py for legacy_obj in [True, False]: log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj) msd = pm4py.get_minimum_self_distance_witnesses(log) def test_marking_equation_net(self): import pm4py log = xes_importer.apply(os.path.join("input_data", "running-example.xes")) net, im, fm = pm4py.discover_petri_net_inductive(log) pm4py.solve_marking_equation(net, im, fm) def test_marking_equation_sync_net(self): import pm4py log = xes_importer.apply(os.path.join("input_data", "running-example.xes")) net, im, fm = pm4py.discover_petri_net_inductive(log) sync_net, sync_im, sync_fm = pm4py.construct_synchronous_product_net(log[0], net, im, fm) res = pm4py.solve_marking_equation(sync_net, sync_im, sync_fm) self.assertIsNotNone(res) self.assertEqual(res, 11) def test_ext_marking_equation_sync_net(self): import pm4py log = xes_importer.apply(os.path.join("input_data", "running-example.xes")) net, im, fm = pm4py.discover_petri_net_inductive(log) sync_net, sync_im, sync_fm = pm4py.construct_synchronous_product_net(log[0], net, im, fm) res = pm4py.solve_extended_marking_equation(log[0], sync_net, sync_im, sync_fm) self.assertIsNotNone(res) def test_alignments_tree_simpl_interface(self): import pm4py for legacy_obj in [True, False]: for diagn_df in [True, False]: log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj) tree = pm4py.read_ptml(os.path.join("input_data", "running-example.ptml")) res = pm4py.conformance_diagnostics_alignments(log, tree, return_diagnostics_dataframe=diagn_df) self.assertIsNotNone(res) def test_alignments_dfg_simpl_interface(self): import pm4py for legacy_obj in [True, False]: for diagn_df in [True, False]: log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj) dfg, sa, ea = pm4py.read_dfg(os.path.join("input_data", "running-example.dfg")) res = pm4py.conformance_diagnostics_alignments(log, dfg, sa, ea, return_diagnostics_dataframe=diagn_df) self.assertIsNotNone(res) def test_alignments_bpmn_simpl_interface(self): import pm4py for legacy_obj in [True, False]: for diagn_df in [True, False]: log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj) bpmn_graph = pm4py.read_bpmn(os.path.join("input_data", "running-example.bpmn")) res = pm4py.conformance_diagnostics_alignments(log, bpmn_graph, return_diagnostics_dataframe=diagn_df) self.assertIsNotNone(res) def test_discovery_inductive_bpmn(self): import pm4py for legacy_obj in [True, False]: log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj) bpmn_graph = pm4py.discover_bpmn_inductive(log) self.assertIsNotNone(bpmn_graph) def test_generation(self): import pm4py tree = pm4py.generate_process_tree() self.assertIsNotNone(tree) def test_play_out_tree(self): import pm4py tree = pm4py.read_ptml(os.path.join("input_data", "running-example.ptml")) log = pm4py.play_out(tree) def test_play_out_net(self): import pm4py net, im, fm = pm4py.read_pnml(os.path.join("input_data", "running-example.pnml")) log = pm4py.play_out(net, im, fm) def test_msd(self): import pm4py for legacy_obj in [True, False]: log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj) res1 = pm4py.get_minimum_self_distance_witnesses(log) res2 = pm4py.get_minimum_self_distances(log) self.assertIsNotNone(res1) self.assertIsNotNone(res2) def test_case_arrival(self): import pm4py for legacy_obj in [True, False]: log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj) avg = pm4py.get_case_arrival_average(log) self.assertIsNotNone(avg) def test_efg(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.discover_eventually_follows_graph(log) def test_write_pnml(self): net, im, fm = pm4py.read_pnml("input_data/running-example.pnml") pm4py.write_pnml(net, im, fm, "test_output_data/running-example.pnml") os.remove("test_output_data/running-example.pnml") def test_write_ptml(self): process_tree = pm4py.read_ptml("input_data/running-example.ptml") pm4py.write_ptml(process_tree, "test_output_data/running-example.ptml") os.remove("test_output_data/running-example.ptml") def test_write_dfg(self): dfg, sa, ea = pm4py.read_dfg("input_data/running-example.dfg") pm4py.write_dfg(dfg, sa, ea, "test_output_data/running-example.dfg") os.remove("test_output_data/running-example.dfg") def test_write_bpmn(self): bpmn_graph = pm4py.read_bpmn("input_data/running-example.bpmn") pm4py.write_bpmn(bpmn_graph, "test_output_data/running-example.bpmn") os.remove("test_output_data/running-example.bpmn") def test_rebase(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") df = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") dataframe = pm4py.rebase(dataframe, activity_key="Activity", case_id="CaseID", timestamp_key="Timestamp", timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT) def test_parse_process_tree(self): tree = pm4py.parse_process_tree("-> ( 'a', X ( 'b', 'c' ), tau )") def test_parse_log_string(self): elog = pm4py.parse_event_log_string(["A,B,C", "A,B,D"]) def test_project_eattr(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) lst = pm4py.project_on_event_attribute(log, "org:resource") def test_sample_cases_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.sample_cases(log, 2) def test_sample_cases_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.sample_cases(dataframe, 2, case_id_key="CaseID") def test_sample_events_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.sample_events(log, 2) def test_sample_events_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.sample_events(dataframe, 2) def test_check_soundness(self): net, im, fm = pm4py.read_pnml("input_data/running-example.pnml") self.assertTrue(pm4py.check_soundness(net, im, fm)) def test_check_wfnet(self): net, im, fm = pm4py.read_pnml("input_data/running-example.pnml") self.assertTrue(pm4py.check_is_workflow_net(net)) def test_artificial_start_end_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.insert_artificial_start_end(log) def test_artificial_start_end_dataframe(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.insert_artificial_start_end(dataframe, activity_key="Activity", timestamp_key="Timestamp", case_id_key="CaseID") def test_hof_filter_log(self): log = xes_importer.apply("input_data/running-example.xes") pm4py.filter_log(log, lambda x: len(x) > 5) def test_hof_filter_trace(self): log = xes_importer.apply("input_data/running-example.xes") pm4py.filter_trace(log[0], lambda x: x["concept:name"] == "decide") def test_hof_sort_log(self): log = xes_importer.apply("input_data/running-example.xes") pm4py.sort_log(log, key=lambda x: x.attributes["concept:name"]) def test_hof_sort_trace(self): log = xes_importer.apply("input_data/running-example.xes") pm4py.sort_trace(log[0], key=lambda x: x["concept:name"]) def test_split_train_test_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.split_train_test(log, train_percentage=0.6) def test_split_train_test_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.split_train_test(dataframe, train_percentage=0.6, case_id_key="CaseID") def test_get_prefixes_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.get_prefixes_from_log(log, 3) def test_get_prefixes_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.get_prefixes_from_log(dataframe, 3, case_id_key="CaseID") def test_convert_reachab(self): net, im, fm = pm4py.read_pnml("input_data/running-example.pnml") ts = pm4py.convert_to_reachability_graph(net, im, fm) def test_hw_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.discover_handover_of_work_network(log) def test_hw_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_handover_of_work_network(dataframe, resource_key="Resource", case_id_key="CaseID", timestamp_key="Timestamp") def test_wt_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.discover_working_together_network(log) def test_wt_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_working_together_network(dataframe, resource_key="Resource", case_id_key="CaseID", timestamp_key="Timestamp") def test_act_based_res_sim_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.discover_activity_based_resource_similarity(log) def test_act_based_res_sim_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_activity_based_resource_similarity(dataframe, activity_key="Activity", resource_key="Resource", case_id_key="CaseID", timestamp_key="Timestamp") def test_subcontracting_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.discover_subcontracting_network(log) def test_subcontracting_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_subcontracting_network(dataframe, resource_key="Resource", case_id_key="CaseID", timestamp_key="Timestamp") def test_roles_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.discover_organizational_roles(log) def test_roles_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_organizational_roles(dataframe, activity_key="Activity", resource_key="Resource", case_id_key="CaseID", timestamp_key="Timestamp") def test_network_analysis_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.discover_network_analysis(log, "case:concept:name", "case:concept:name", "org:resource", "org:resource", "concept:name") def test_network_analysis_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_network_analysis(dataframe, "CaseID", "CaseID", "Resource", "Resource", "Activity", sorting_column="Timestamp", timestamp_column="Timestamp") def test_discover_batches_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.discover_batches(log) def test_discover_batches_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_batches(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", resource_key="Resource") def test_log_skeleton_log_simplified_interface(self): for legacy_obj in [True, False]: for diagn_df in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) model = pm4py.discover_log_skeleton(log) pm4py.conformance_log_skeleton(log, model, return_diagnostics_dataframe=diagn_df) def test_log_skeleton_df_simplified_interface(self): for diagn_df in [True, False]: dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") model = pm4py.discover_log_skeleton(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") pm4py.conformance_log_skeleton(dataframe, model, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", return_diagnostics_dataframe=diagn_df) def test_temporal_profile_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) model = pm4py.discover_temporal_profile(log) pm4py.conformance_temporal_profile(log, model) def test_temporal_profile_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") model = pm4py.discover_temporal_profile(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") pm4py.conformance_temporal_profile(dataframe, model, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_ocel_get_obj_types(self): ocel = pm4py.read_ocel("input_data/ocel/example_log.csv") pm4py.ocel_get_object_types(ocel) def test_ocel_get_attr_names(self): ocel = pm4py.read_ocel("input_data/ocel/example_log.csv") pm4py.ocel_get_attribute_names(ocel) def test_ocel_flattening(self): ocel = pm4py.read_ocel("input_data/ocel/example_log.csv") pm4py.ocel_flattening(ocel, "order") def test_stats_var_tuples_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.get_variants_as_tuples(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_stats_cycle_time_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.get_cycle_time(log) def test_stats_cycle_time_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.get_cycle_time(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_stats_case_durations_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.get_all_case_durations(log) def test_stats_case_durations_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.get_all_case_durations(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_stats_case_duration_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.get_case_duration(log, "1") def test_stats_case_duration_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.get_case_duration(dataframe, "1", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_stats_act_pos_summary_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.get_activity_position_summary(log, "check ticket") def test_stats_act_pos_summary_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.get_activity_position_summary(dataframe, "check ticket", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_filter_act_done_diff_res_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_activity_done_different_resources(log, "check ticket") def test_filter_act_done_diff_res_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_activity_done_different_resources(dataframe, "check ticket", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", resource_key="Resource") def test_filter_four_eyes_principle_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_four_eyes_principle(log, "register request", "check ticket") def test_filter_four_eyes_principle_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_four_eyes_principle(dataframe, "register request", "check ticket", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", resource_key="Resource") def test_filter_rel_occ_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_log_relative_occurrence_event_attribute(log, 0.8, level="cases") def test_filter_rel_occ_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_log_relative_occurrence_event_attribute(dataframe, 0.8, attribute_key="Activity", level="cases", case_id_key="CaseID", timestamp_key="Timestamp") def test_filter_start_activities_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_start_activities(log, ["register request"]) def test_filter_start_activities_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_start_activities(dataframe, ["register request"], activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_filter_end_activities_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_end_activities(log, ["pay compensation"]) def test_filter_end_activities_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_end_activities(dataframe, ["pay compensation"], activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_filter_eve_attr_values_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_event_attribute_values(log, "concept:name", ["register request", "pay compensation", "reject request"]) def test_filter_eve_attr_values_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_event_attribute_values(dataframe, "Activity", ["register request", "pay compensation", "reject request"], case_id_key="CaseID") def test_filter_trace_attr_values_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_trace_attribute_values(log, "case:creator", ["Fluxicon"]) def test_filter_variant_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_variants(log, [('register request', 'examine casually', 'check ticket', 'decide', 'reinitiate request', 'examine thoroughly', 'check ticket', 'decide', 'pay compensation')]) def test_filter_variant_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_variants(dataframe, [('register request', 'examine casually', 'check ticket', 'decide', 'reinitiate request', 'examine thoroughly', 'check ticket', 'decide', 'pay compensation')], activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_filter_dfg_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_directly_follows_relation(log, [("register request", "check ticket")]) def test_filter_dfg_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_directly_follows_relation(dataframe, [("register request", "check ticket")], activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_filter_efg_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_eventually_follows_relation(log, [("register request", "check ticket")]) def test_filter_efg_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_eventually_follows_relation(dataframe, [("register request", "check ticket")], activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_filter_time_range_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_time_range(log, "2009-01-01 01:00:00", "2011-01-01 01:00:00") def test_filter_time_range_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_time_range(dataframe, "2009-01-01 01:00:00", "2011-01-01 01:00:00", case_id_key="CaseID", timestamp_key="Timestamp") def test_filter_between_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_between(log, "check ticket", "decide") def test_filter_between_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_between(dataframe, "check ticket", "decide", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_filter_case_size_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_case_size(log, 10, 20) def test_filter_case_size_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_case_size(dataframe, 10, 20, case_id_key="CaseID") def test_filter_case_performance_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_case_performance(log, 86400, 8640000) def test_filter_case_performance_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_case_performance(dataframe, 86400, 8640000, case_id_key="CaseID", timestamp_key="Timestamp") def test_filter_activities_rework_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_activities_rework(log, "check ticket") def test_filter_act_rework_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_activities_rework(dataframe, "check ticket", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_filter_paths_perf_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_paths_performance(log, ("register request", "check ticket"), 86400, 864000) def test_filter_paths_perf_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_paths_performance(dataframe, ("register request", "check ticket"), 86400, 864000, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_filter_vars_top_k_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_variants_top_k(log, 1) def test_filter_vars_top_k_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_variants_top_k(dataframe, 1, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_filter_vars_coverage(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_variants_by_coverage_percentage(log, 0.1) def test_filter_vars_coverage(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_variants_by_coverage_percentage(dataframe, 0.1, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_filter_prefixes_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_prefixes(log, "check ticket") def test_filter_prefixes_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_prefixes(dataframe, "check ticket", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_filter_suffixes_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.filter_suffixes(log, "check ticket") def test_filter_suffixes_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.filter_suffixes(dataframe, "check ticket", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_discover_perf_dfg_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.discover_performance_dfg(log) def test_discover_perf_dfg_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_performance_dfg(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_discover_footprints_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.discover_footprints(log) def test_discover_ts_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.discover_transition_system(log) def test_discover_ts_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_transition_system(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_discover_pref_tree_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.discover_prefix_tree(log) def test_discover_pref_tree_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_prefix_tree(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") def test_discover_ocpn(self): ocel = pm4py.read_ocel("input_data/ocel/example_log.csv") pm4py.discover_oc_petri_net(ocel) def test_conformance_alignments_pn_log_simplified_interface(self): for legacy_obj in [True, False]: for diagn_df in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) net, im, fm = pm4py.discover_petri_net_inductive(log) pm4py.conformance_diagnostics_alignments(log, net, im, fm, return_diagnostics_dataframe=diagn_df) def test_conformance_alignments_pn_df_simplified_interface(self): for diagn_df in [True, False]: dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") net, im, fm = pm4py.discover_petri_net_inductive(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") pm4py.conformance_diagnostics_alignments(dataframe, net, im, fm, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", return_diagnostics_dataframe=diagn_df) def test_conformance_diagnostics_fp_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) tree = pm4py.discover_process_tree_inductive(log) pm4py.conformance_diagnostics_footprints(log, tree) def test_fitness_fp_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) tree = pm4py.discover_process_tree_inductive(log) pm4py.fitness_footprints(log, tree) def test_precision_fp_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) tree = pm4py.discover_process_tree_inductive(log) pm4py.precision_footprints(log, tree) def test_maximal_decomposition(self): net, im, fm = pm4py.read_pnml("input_data/running-example.pnml") pm4py.maximal_decomposition(net, im, fm) def test_fea_ext_log(self): for legacy_obj in [True, False]: log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj) pm4py.extract_features_dataframe(log) def test_fea_ext_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.extract_features_dataframe(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", resource_key="Resource") def test_new_alpha_miner_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_petri_net_alpha(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_new_heu_miner_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_petri_net_heuristics(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_new_dfg_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_dfg(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_new_perf_dfg_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") pm4py.discover_performance_dfg(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_new_tbr_df_simpl_interface(self): for ret_df in [True, False]: dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") pm4py.conformance_diagnostics_token_based_replay(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp", return_diagnostics_dataframe=ret_df) def test_new_tbr_fitness_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") pm4py.fitness_token_based_replay(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_new_tbr_precision_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") pm4py.precision_token_based_replay(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_new_align_df_simpl_interface(self): for diagn_df in [True, False]: dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") pm4py.conformance_diagnostics_alignments(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp", return_diagnostics_dataframe=diagn_df) def test_new_align_fitness_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") pm4py.fitness_alignments(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_new_align_precision_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") pm4py.precision_alignments(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp") def test_vis_case_duration_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") if importlib.util.find_spec("matplotlib"): target = os.path.join("test_output_data", "case_duration.svg") pm4py.save_vis_case_duration_graph(dataframe, target, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") os.remove(target) def test_vis_ev_distr_graph_df(self): dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv") dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"]) dataframe["CaseID"] = dataframe["CaseID"].astype("string") target = os.path.join("test_output_data", "ev_distr_graph.svg") if importlib.util.find_spec("matplotlib"): pm4py.save_vis_events_distribution_graph(dataframe, target, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp") os.remove(target) def test_ocel_object_graph(self): ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") ev_graph = pm4py.discover_objects_graph(ocel, graph_type="object_interaction") ev_graph = pm4py.discover_objects_graph(ocel, graph_type="object_descendants") ev_graph = pm4py.discover_objects_graph(ocel, graph_type="object_inheritance") ev_graph = pm4py.discover_objects_graph(ocel, graph_type="object_cobirth") ev_graph = pm4py.discover_objects_graph(ocel, graph_type="object_codeath") def test_ocel_temporal_summary(self): ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") temp_summary = pm4py.ocel_temporal_summary(ocel) def test_ocel_objects_summary(self): ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") objects_summary = pm4py.ocel_objects_summary(ocel) def test_ocel_filtering_ev_ids(self): ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") filtered_ocel = pm4py.filter_ocel_events(ocel, ["e1"]) def test_ocel_filtering_obj_ids(self): ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") filtered_ocel = pm4py.filter_ocel_objects(ocel, ["o1"], level=1) filtered_ocel = pm4py.filter_ocel_objects(ocel, ["o1"], level=2) def test_ocel_filtering_obj_types(self): ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") filtered_ocel = pm4py.filter_ocel_object_types(ocel, ["order"]) def test_ocel_filtering_cc(self): ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") filtered_ocel = pm4py.filter_ocel_cc_object(ocel, "o1") def test_ocel_drop_duplicates(self): ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") filtered_ocel = pm4py.ocel_drop_duplicates(ocel) def test_ocel_add_index_based_timedelta(self): ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel") filtered_ocel = pm4py.ocel_add_index_based_timedelta(ocel) def test_ocel2_xml(self): ocel = pm4py.read_ocel2("input_data/ocel/ocel20_example.xmlocel") pm4py.write_ocel2(ocel, "test_output_data/ocel20_example.xmlocel") os.remove("test_output_data/ocel20_example.xmlocel") def test_ocel2_sqlite(self): ocel = pm4py.read_ocel2("input_data/ocel/ocel20_example.sqlite") pm4py.write_ocel2(ocel, "test_output_data/ocel20_example.sqlite") os.remove("test_output_data/ocel20_example.sqlite") if __name__ == "__main__": unittest.main()