process_mining / pm4py /tests /simplified_interface.py
linpershey's picture
Add 'pm4py/' from commit '80970016c5e1e79af7c37df0dd88e17587fe7bcf'
b4ba3ec
raw
history blame
65.8 kB
import importlib.util
import os
import unittest
import pm4py
from pm4py.objects.bpmn.obj import BPMN
from pm4py.objects.petri_net.obj import PetriNet
from pm4py.objects.process_tree.obj import ProcessTree
from pm4py.util import constants, pandas_utils
from pm4py.objects.log.util import dataframe_utils
from pm4py.objects.log.importer.xes import importer as xes_importer
class SimplifiedInterfaceTest(unittest.TestCase):
def test_csv(self):
df = pandas_utils.read_csv("input_data/running-example.csv")
df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["time:timestamp"])
df["case:concept:name"] = df["case:concept:name"].astype("string")
log2 = pm4py.convert_to_event_log(df)
stream1 = pm4py.convert_to_event_stream(log2)
df2 = pm4py.convert_to_dataframe(log2)
pm4py.write_xes(log2, "test_output_data/log.xes")
os.remove("test_output_data/log.xes")
def test_alpha_miner(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
net, im, fm = pm4py.discover_petri_net_alpha(log)
def test_alpha_miner_plus(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
net, im, fm = pm4py.discover_petri_net_alpha_plus(log)
def test_inductive_miner(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
net, im, fm = pm4py.discover_petri_net_inductive(log)
def test_inductive_miner_noise(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
net, im, fm = pm4py.discover_petri_net_inductive(log, noise_threshold=0.5)
def test_heuristics_miner(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
net, im, fm = pm4py.discover_petri_net_heuristics(log)
def test_inductive_miner_tree(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
tree = pm4py.discover_process_tree_inductive(log)
tree = pm4py.discover_process_tree_inductive(log, noise_threshold=0.2)
def test_heuristics_miner_heu_net(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
heu_net = pm4py.discover_heuristics_net(log)
def test_dfg(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
dfg, sa, ea = pm4py.discover_directly_follows_graph(log)
def test_read_petri(self):
net, im, fm = pm4py.read_pnml("input_data/running-example.pnml")
def test_read_tree(self):
tree = pm4py.read_ptml("input_data/running-example.ptml")
def test_read_dfg(self):
dfg, sa, ea = pm4py.read_dfg("input_data/running-example.dfg")
def test_alignments_simpl_interface(self):
for legacy_obj in [True, False]:
for diagn_df in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
net, im, fm = pm4py.discover_petri_net_inductive(log)
aligned_traces = pm4py.conformance_diagnostics_alignments(log, net, im, fm, return_diagnostics_dataframe=diagn_df)
def test_tbr_simpl_interface(self):
for legacy_obj in [True, False]:
for diagn_df in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
net, im, fm = pm4py.discover_petri_net_inductive(log)
replayed_traces = pm4py.conformance_diagnostics_token_based_replay(log, net, im, fm, return_diagnostics_dataframe=diagn_df)
def test_fitness_alignments(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
net, im, fm = pm4py.discover_petri_net_inductive(log)
fitness_ali = pm4py.fitness_alignments(log, net, im, fm)
def test_fitness_tbr(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
net, im, fm = pm4py.discover_petri_net_inductive(log)
fitness_tbr = pm4py.fitness_token_based_replay(log, net, im, fm)
def test_precision_alignments(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
net, im, fm = pm4py.discover_petri_net_inductive(log)
precision_ali = pm4py.precision_alignments(log, net, im, fm)
def test_precision_tbr(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
net, im, fm = pm4py.discover_petri_net_inductive(log)
precision_tbr = pm4py.precision_token_based_replay(log, net, im, fm)
def test_convert_to_tree_from_petri(self):
net, im, fm = pm4py.read_pnml("input_data/running-example.pnml")
tree = pm4py.convert_to_process_tree(net, im, fm)
self.assertTrue(isinstance(tree, ProcessTree))
def test_convert_to_tree_from_bpmn(self):
bpmn = pm4py.read_bpmn("input_data/running-example.bpmn")
tree = pm4py.convert_to_process_tree(bpmn)
self.assertTrue(isinstance(tree, ProcessTree))
def test_convert_to_net_from_tree(self):
tree = pm4py.read_ptml("input_data/running-example.ptml")
net, im, fm = pm4py.convert_to_petri_net(tree)
self.assertTrue(isinstance(net, PetriNet))
def test_convert_to_net_from_bpmn(self):
bpmn = pm4py.read_bpmn("input_data/running-example.bpmn")
net, im, fm = pm4py.convert_to_petri_net(bpmn)
self.assertTrue(isinstance(net, PetriNet))
def test_convert_to_net_from_dfg(self):
dfg, sa, ea = pm4py.read_dfg("input_data/running-example.dfg")
net, im, fm = pm4py.convert_to_petri_net(dfg, sa, ea)
self.assertTrue(isinstance(net, PetriNet))
def test_convert_to_net_from_heu(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
heu_net = pm4py.discover_heuristics_net(log)
net, im, fm = pm4py.convert_to_petri_net(heu_net)
self.assertTrue(isinstance(net, PetriNet))
def test_convert_to_bpmn_from_tree(self):
tree = pm4py.read_ptml("input_data/running-example.ptml")
bpmn = pm4py.convert_to_bpmn(tree)
self.assertTrue(isinstance(bpmn, BPMN))
def test_statistics_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.get_start_activities(log)
pm4py.get_end_activities(log)
pm4py.get_event_attributes(log)
pm4py.get_trace_attributes(log)
pm4py.get_event_attribute_values(log, "org:resource")
pm4py.get_variants_as_tuples(log)
def test_statistics_df(self):
df = pandas_utils.read_csv("input_data/running-example-transformed.csv")
df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
df["CaseID"] = df["CaseID"].astype("string")
pm4py.get_start_activities(df, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
pm4py.get_end_activities(df, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
pm4py.get_event_attributes(df)
pm4py.get_event_attribute_values(df, "Resource", case_id_key="CaseID")
pm4py.get_variants_as_tuples(df, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
def test_playout(self):
net, im, fm = pm4py.read_pnml("input_data/running-example.pnml")
pm4py.play_out(net, im, fm)
def test_generator(self):
pm4py.generate_process_tree()
def test_mark_em_equation(self):
for legacy_obj in [True, False]:
log = xes_importer.apply("input_data/running-example.xes")
net, im, fm = pm4py.read_pnml("input_data/running-example.pnml")
sync_net, sync_im, sync_fm = pm4py.construct_synchronous_product_net(log[0], net, im, fm)
m_h = pm4py.solve_marking_equation(sync_net, sync_im, sync_fm)
em_h = pm4py.solve_extended_marking_equation(log[0], sync_net, sync_im, sync_fm)
def test_new_statistics_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.get_trace_attribute_values(log, "case:creator")
pm4py.discover_eventually_follows_graph(log)
pm4py.get_case_arrival_average(log)
def test_new_statistics_df(self):
df = pandas_utils.read_csv("input_data/running-example-transformed.csv")
df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT,
timest_columns=["Timestamp"])
df["CaseID"] = df["CaseID"].astype("string")
pm4py.discover_eventually_follows_graph(df, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
pm4py.get_case_arrival_average(df, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
def test_serialization_log(self):
if importlib.util.find_spec("pyarrow"):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
ser = pm4py.serialize(log)
log2 = pm4py.deserialize(ser)
def test_serialization_dataframe(self):
if importlib.util.find_spec("pyarrow"):
df = pandas_utils.read_csv("input_data/running-example.csv")
df = dataframe_utils.convert_timestamp_columns_in_df(df, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["time:timestamp"])
ser = pm4py.serialize(df)
df2 = pm4py.deserialize(ser)
def test_serialization_petri_net(self):
net, im, fm = pm4py.read_pnml("input_data/running-example.pnml")
ser = pm4py.serialize(net, im, fm)
net2, im2, fm2 = pm4py.deserialize(ser)
def test_serialization_process_tree(self):
tree = pm4py.read_ptml("input_data/running-example.ptml")
ser = pm4py.serialize(tree)
tree2 = pm4py.deserialize(ser)
def test_serialization_bpmn(self):
bpmn = pm4py.read_bpmn("input_data/running-example.bpmn")
ser = pm4py.serialize(bpmn)
bpmn2 = pm4py.deserialize(ser)
def test_serialization_dfg(self):
dfg, sa, ea = pm4py.read_dfg("input_data/running-example.dfg")
ser = pm4py.serialize(dfg, sa, ea)
dfg2, sa2, ea2 = pm4py.deserialize(ser)
def test_minimum_self_distance(self):
import pm4py
for legacy_obj in [True, False]:
log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj)
msd = pm4py.get_minimum_self_distances(log)
def test_minimum_self_distance_2(self):
import pm4py
for legacy_obj in [True, False]:
log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj)
msd = pm4py.get_minimum_self_distance_witnesses(log)
def test_marking_equation_net(self):
import pm4py
log = xes_importer.apply(os.path.join("input_data", "running-example.xes"))
net, im, fm = pm4py.discover_petri_net_inductive(log)
pm4py.solve_marking_equation(net, im, fm)
def test_marking_equation_sync_net(self):
import pm4py
log = xes_importer.apply(os.path.join("input_data", "running-example.xes"))
net, im, fm = pm4py.discover_petri_net_inductive(log)
sync_net, sync_im, sync_fm = pm4py.construct_synchronous_product_net(log[0], net, im, fm)
res = pm4py.solve_marking_equation(sync_net, sync_im, sync_fm)
self.assertIsNotNone(res)
self.assertEqual(res, 11)
def test_ext_marking_equation_sync_net(self):
import pm4py
log = xes_importer.apply(os.path.join("input_data", "running-example.xes"))
net, im, fm = pm4py.discover_petri_net_inductive(log)
sync_net, sync_im, sync_fm = pm4py.construct_synchronous_product_net(log[0], net, im, fm)
res = pm4py.solve_extended_marking_equation(log[0], sync_net, sync_im, sync_fm)
self.assertIsNotNone(res)
def test_alignments_tree_simpl_interface(self):
import pm4py
for legacy_obj in [True, False]:
for diagn_df in [True, False]:
log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj)
tree = pm4py.read_ptml(os.path.join("input_data", "running-example.ptml"))
res = pm4py.conformance_diagnostics_alignments(log, tree, return_diagnostics_dataframe=diagn_df)
self.assertIsNotNone(res)
def test_alignments_dfg_simpl_interface(self):
import pm4py
for legacy_obj in [True, False]:
for diagn_df in [True, False]:
log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj)
dfg, sa, ea = pm4py.read_dfg(os.path.join("input_data", "running-example.dfg"))
res = pm4py.conformance_diagnostics_alignments(log, dfg, sa, ea, return_diagnostics_dataframe=diagn_df)
self.assertIsNotNone(res)
def test_alignments_bpmn_simpl_interface(self):
import pm4py
for legacy_obj in [True, False]:
for diagn_df in [True, False]:
log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj)
bpmn_graph = pm4py.read_bpmn(os.path.join("input_data", "running-example.bpmn"))
res = pm4py.conformance_diagnostics_alignments(log, bpmn_graph, return_diagnostics_dataframe=diagn_df)
self.assertIsNotNone(res)
def test_discovery_inductive_bpmn(self):
import pm4py
for legacy_obj in [True, False]:
log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj)
bpmn_graph = pm4py.discover_bpmn_inductive(log)
self.assertIsNotNone(bpmn_graph)
def test_generation(self):
import pm4py
tree = pm4py.generate_process_tree()
self.assertIsNotNone(tree)
def test_play_out_tree(self):
import pm4py
tree = pm4py.read_ptml(os.path.join("input_data", "running-example.ptml"))
log = pm4py.play_out(tree)
def test_play_out_net(self):
import pm4py
net, im, fm = pm4py.read_pnml(os.path.join("input_data", "running-example.pnml"))
log = pm4py.play_out(net, im, fm)
def test_msd(self):
import pm4py
for legacy_obj in [True, False]:
log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj)
res1 = pm4py.get_minimum_self_distance_witnesses(log)
res2 = pm4py.get_minimum_self_distances(log)
self.assertIsNotNone(res1)
self.assertIsNotNone(res2)
def test_case_arrival(self):
import pm4py
for legacy_obj in [True, False]:
log = pm4py.read_xes(os.path.join("input_data", "running-example.xes"), return_legacy_log_object=legacy_obj)
avg = pm4py.get_case_arrival_average(log)
self.assertIsNotNone(avg)
def test_efg(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.discover_eventually_follows_graph(log)
def test_write_pnml(self):
net, im, fm = pm4py.read_pnml("input_data/running-example.pnml")
pm4py.write_pnml(net, im, fm, "test_output_data/running-example.pnml")
os.remove("test_output_data/running-example.pnml")
def test_write_ptml(self):
process_tree = pm4py.read_ptml("input_data/running-example.ptml")
pm4py.write_ptml(process_tree, "test_output_data/running-example.ptml")
os.remove("test_output_data/running-example.ptml")
def test_write_dfg(self):
dfg, sa, ea = pm4py.read_dfg("input_data/running-example.dfg")
pm4py.write_dfg(dfg, sa, ea, "test_output_data/running-example.dfg")
os.remove("test_output_data/running-example.dfg")
def test_write_bpmn(self):
bpmn_graph = pm4py.read_bpmn("input_data/running-example.bpmn")
pm4py.write_bpmn(bpmn_graph, "test_output_data/running-example.bpmn")
os.remove("test_output_data/running-example.bpmn")
def test_rebase(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
df = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
dataframe = pm4py.rebase(dataframe, activity_key="Activity", case_id="CaseID", timestamp_key="Timestamp", timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT)
def test_parse_process_tree(self):
tree = pm4py.parse_process_tree("-> ( 'a', X ( 'b', 'c' ), tau )")
def test_parse_log_string(self):
elog = pm4py.parse_event_log_string(["A,B,C", "A,B,D"])
def test_project_eattr(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
lst = pm4py.project_on_event_attribute(log, "org:resource")
def test_sample_cases_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.sample_cases(log, 2)
def test_sample_cases_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.sample_cases(dataframe, 2, case_id_key="CaseID")
def test_sample_events_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.sample_events(log, 2)
def test_sample_events_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.sample_events(dataframe, 2)
def test_check_soundness(self):
net, im, fm = pm4py.read_pnml("input_data/running-example.pnml")
self.assertTrue(pm4py.check_soundness(net, im, fm))
def test_check_wfnet(self):
net, im, fm = pm4py.read_pnml("input_data/running-example.pnml")
self.assertTrue(pm4py.check_is_workflow_net(net))
def test_artificial_start_end_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.insert_artificial_start_end(log)
def test_artificial_start_end_dataframe(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.insert_artificial_start_end(dataframe, activity_key="Activity", timestamp_key="Timestamp", case_id_key="CaseID")
def test_hof_filter_log(self):
log = xes_importer.apply("input_data/running-example.xes")
pm4py.filter_log(log, lambda x: len(x) > 5)
def test_hof_filter_trace(self):
log = xes_importer.apply("input_data/running-example.xes")
pm4py.filter_trace(log[0], lambda x: x["concept:name"] == "decide")
def test_hof_sort_log(self):
log = xes_importer.apply("input_data/running-example.xes")
pm4py.sort_log(log, key=lambda x: x.attributes["concept:name"])
def test_hof_sort_trace(self):
log = xes_importer.apply("input_data/running-example.xes")
pm4py.sort_trace(log[0], key=lambda x: x["concept:name"])
def test_split_train_test_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.split_train_test(log, train_percentage=0.6)
def test_split_train_test_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.split_train_test(dataframe, train_percentage=0.6, case_id_key="CaseID")
def test_get_prefixes_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.get_prefixes_from_log(log, 3)
def test_get_prefixes_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.get_prefixes_from_log(dataframe, 3, case_id_key="CaseID")
def test_convert_reachab(self):
net, im, fm = pm4py.read_pnml("input_data/running-example.pnml")
ts = pm4py.convert_to_reachability_graph(net, im, fm)
def test_hw_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.discover_handover_of_work_network(log)
def test_hw_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.discover_handover_of_work_network(dataframe, resource_key="Resource", case_id_key="CaseID", timestamp_key="Timestamp")
def test_wt_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.discover_working_together_network(log)
def test_wt_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.discover_working_together_network(dataframe, resource_key="Resource", case_id_key="CaseID", timestamp_key="Timestamp")
def test_act_based_res_sim_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.discover_activity_based_resource_similarity(log)
def test_act_based_res_sim_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.discover_activity_based_resource_similarity(dataframe, activity_key="Activity", resource_key="Resource", case_id_key="CaseID", timestamp_key="Timestamp")
def test_subcontracting_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.discover_subcontracting_network(log)
def test_subcontracting_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.discover_subcontracting_network(dataframe, resource_key="Resource", case_id_key="CaseID", timestamp_key="Timestamp")
def test_roles_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.discover_organizational_roles(log)
def test_roles_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.discover_organizational_roles(dataframe, activity_key="Activity", resource_key="Resource", case_id_key="CaseID", timestamp_key="Timestamp")
def test_network_analysis_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.discover_network_analysis(log, "case:concept:name", "case:concept:name", "org:resource", "org:resource", "concept:name")
def test_network_analysis_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.discover_network_analysis(dataframe, "CaseID", "CaseID", "Resource", "Resource", "Activity", sorting_column="Timestamp", timestamp_column="Timestamp")
def test_discover_batches_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.discover_batches(log)
def test_discover_batches_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.discover_batches(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", resource_key="Resource")
def test_log_skeleton_log_simplified_interface(self):
for legacy_obj in [True, False]:
for diagn_df in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
model = pm4py.discover_log_skeleton(log)
pm4py.conformance_log_skeleton(log, model, return_diagnostics_dataframe=diagn_df)
def test_log_skeleton_df_simplified_interface(self):
for diagn_df in [True, False]:
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe,
timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT,
timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
model = pm4py.discover_log_skeleton(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
pm4py.conformance_log_skeleton(dataframe, model, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", return_diagnostics_dataframe=diagn_df)
def test_temporal_profile_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
model = pm4py.discover_temporal_profile(log)
pm4py.conformance_temporal_profile(log, model)
def test_temporal_profile_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
model = pm4py.discover_temporal_profile(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
pm4py.conformance_temporal_profile(dataframe, model, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_ocel_get_obj_types(self):
ocel = pm4py.read_ocel("input_data/ocel/example_log.csv")
pm4py.ocel_get_object_types(ocel)
def test_ocel_get_attr_names(self):
ocel = pm4py.read_ocel("input_data/ocel/example_log.csv")
pm4py.ocel_get_attribute_names(ocel)
def test_ocel_flattening(self):
ocel = pm4py.read_ocel("input_data/ocel/example_log.csv")
pm4py.ocel_flattening(ocel, "order")
def test_stats_var_tuples_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.get_variants_as_tuples(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_stats_cycle_time_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.get_cycle_time(log)
def test_stats_cycle_time_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.get_cycle_time(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_stats_case_durations_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.get_all_case_durations(log)
def test_stats_case_durations_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.get_all_case_durations(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_stats_case_duration_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.get_case_duration(log, "1")
def test_stats_case_duration_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.get_case_duration(dataframe, "1", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_stats_act_pos_summary_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.get_activity_position_summary(log, "check ticket")
def test_stats_act_pos_summary_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.get_activity_position_summary(dataframe, "check ticket", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_filter_act_done_diff_res_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_activity_done_different_resources(log, "check ticket")
def test_filter_act_done_diff_res_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_activity_done_different_resources(dataframe, "check ticket", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", resource_key="Resource")
def test_filter_four_eyes_principle_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_four_eyes_principle(log, "register request", "check ticket")
def test_filter_four_eyes_principle_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_four_eyes_principle(dataframe, "register request", "check ticket", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", resource_key="Resource")
def test_filter_rel_occ_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_log_relative_occurrence_event_attribute(log, 0.8, level="cases")
def test_filter_rel_occ_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_log_relative_occurrence_event_attribute(dataframe, 0.8, attribute_key="Activity", level="cases", case_id_key="CaseID", timestamp_key="Timestamp")
def test_filter_start_activities_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_start_activities(log, ["register request"])
def test_filter_start_activities_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_start_activities(dataframe, ["register request"], activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_filter_end_activities_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_end_activities(log, ["pay compensation"])
def test_filter_end_activities_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_end_activities(dataframe, ["pay compensation"], activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_filter_eve_attr_values_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_event_attribute_values(log, "concept:name", ["register request", "pay compensation", "reject request"])
def test_filter_eve_attr_values_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_event_attribute_values(dataframe, "Activity", ["register request", "pay compensation", "reject request"], case_id_key="CaseID")
def test_filter_trace_attr_values_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_trace_attribute_values(log, "case:creator", ["Fluxicon"])
def test_filter_variant_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_variants(log, [('register request', 'examine casually', 'check ticket', 'decide', 'reinitiate request', 'examine thoroughly', 'check ticket', 'decide', 'pay compensation')])
def test_filter_variant_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_variants(dataframe, [('register request', 'examine casually', 'check ticket', 'decide', 'reinitiate request', 'examine thoroughly', 'check ticket', 'decide', 'pay compensation')], activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_filter_dfg_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_directly_follows_relation(log, [("register request", "check ticket")])
def test_filter_dfg_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_directly_follows_relation(dataframe, [("register request", "check ticket")], activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_filter_efg_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_eventually_follows_relation(log, [("register request", "check ticket")])
def test_filter_efg_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_eventually_follows_relation(dataframe, [("register request", "check ticket")], activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_filter_time_range_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_time_range(log, "2009-01-01 01:00:00", "2011-01-01 01:00:00")
def test_filter_time_range_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_time_range(dataframe, "2009-01-01 01:00:00", "2011-01-01 01:00:00", case_id_key="CaseID", timestamp_key="Timestamp")
def test_filter_between_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_between(log, "check ticket", "decide")
def test_filter_between_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_between(dataframe, "check ticket", "decide", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_filter_case_size_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_case_size(log, 10, 20)
def test_filter_case_size_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_case_size(dataframe, 10, 20, case_id_key="CaseID")
def test_filter_case_performance_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_case_performance(log, 86400, 8640000)
def test_filter_case_performance_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_case_performance(dataframe, 86400, 8640000, case_id_key="CaseID", timestamp_key="Timestamp")
def test_filter_activities_rework_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_activities_rework(log, "check ticket")
def test_filter_act_rework_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_activities_rework(dataframe, "check ticket", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_filter_paths_perf_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_paths_performance(log, ("register request", "check ticket"), 86400, 864000)
def test_filter_paths_perf_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_paths_performance(dataframe, ("register request", "check ticket"), 86400, 864000, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_filter_vars_top_k_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_variants_top_k(log, 1)
def test_filter_vars_top_k_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_variants_top_k(dataframe, 1, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_filter_vars_coverage(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_variants_by_coverage_percentage(log, 0.1)
def test_filter_vars_coverage(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_variants_by_coverage_percentage(dataframe, 0.1, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_filter_prefixes_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_prefixes(log, "check ticket")
def test_filter_prefixes_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_prefixes(dataframe, "check ticket", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_filter_suffixes_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.filter_suffixes(log, "check ticket")
def test_filter_suffixes_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.filter_suffixes(dataframe, "check ticket", activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_discover_perf_dfg_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.discover_performance_dfg(log)
def test_discover_perf_dfg_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.discover_performance_dfg(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_discover_footprints_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.discover_footprints(log)
def test_discover_ts_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.discover_transition_system(log)
def test_discover_ts_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.discover_transition_system(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_discover_pref_tree_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.discover_prefix_tree(log)
def test_discover_pref_tree_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.discover_prefix_tree(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
def test_discover_ocpn(self):
ocel = pm4py.read_ocel("input_data/ocel/example_log.csv")
pm4py.discover_oc_petri_net(ocel)
def test_conformance_alignments_pn_log_simplified_interface(self):
for legacy_obj in [True, False]:
for diagn_df in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
net, im, fm = pm4py.discover_petri_net_inductive(log)
pm4py.conformance_diagnostics_alignments(log, net, im, fm, return_diagnostics_dataframe=diagn_df)
def test_conformance_alignments_pn_df_simplified_interface(self):
for diagn_df in [True, False]:
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe,
timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT,
timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
net, im, fm = pm4py.discover_petri_net_inductive(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
pm4py.conformance_diagnostics_alignments(dataframe, net, im, fm, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", return_diagnostics_dataframe=diagn_df)
def test_conformance_diagnostics_fp_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
tree = pm4py.discover_process_tree_inductive(log)
pm4py.conformance_diagnostics_footprints(log, tree)
def test_fitness_fp_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
tree = pm4py.discover_process_tree_inductive(log)
pm4py.fitness_footprints(log, tree)
def test_precision_fp_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
tree = pm4py.discover_process_tree_inductive(log)
pm4py.precision_footprints(log, tree)
def test_maximal_decomposition(self):
net, im, fm = pm4py.read_pnml("input_data/running-example.pnml")
pm4py.maximal_decomposition(net, im, fm)
def test_fea_ext_log(self):
for legacy_obj in [True, False]:
log = pm4py.read_xes("input_data/running-example.xes", return_legacy_log_object=legacy_obj)
pm4py.extract_features_dataframe(log)
def test_fea_ext_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.extract_features_dataframe(dataframe, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp", resource_key="Resource")
def test_new_alpha_miner_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.discover_petri_net_alpha(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
def test_new_heu_miner_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.discover_petri_net_heuristics(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
def test_new_dfg_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.discover_dfg(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
def test_new_perf_dfg_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
pm4py.discover_performance_dfg(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
def test_new_tbr_df_simpl_interface(self):
for ret_df in [True, False]:
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe,
timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT,
timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
pm4py.conformance_diagnostics_token_based_replay(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp", return_diagnostics_dataframe=ret_df)
def test_new_tbr_fitness_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
pm4py.fitness_token_based_replay(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
def test_new_tbr_precision_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
pm4py.precision_token_based_replay(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
def test_new_align_df_simpl_interface(self):
for diagn_df in [True, False]:
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe,
timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT,
timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
pm4py.conformance_diagnostics_alignments(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp", return_diagnostics_dataframe=diagn_df)
def test_new_align_fitness_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
pm4py.fitness_alignments(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
def test_new_align_precision_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
net, im, fm = pm4py.discover_petri_net_inductive(dataframe, case_id_key="CaseID", activity_key="Activity",
timestamp_key="Timestamp")
pm4py.precision_alignments(dataframe, net, im, fm, case_id_key="CaseID", activity_key="Activity", timestamp_key="Timestamp")
def test_vis_case_duration_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
if importlib.util.find_spec("matplotlib"):
target = os.path.join("test_output_data", "case_duration.svg")
pm4py.save_vis_case_duration_graph(dataframe, target, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
os.remove(target)
def test_vis_ev_distr_graph_df(self):
dataframe = pandas_utils.read_csv("input_data/running-example-transformed.csv")
dataframe = dataframe_utils.convert_timestamp_columns_in_df(dataframe, timest_format=constants.DEFAULT_TIMESTAMP_PARSE_FORMAT, timest_columns=["Timestamp"])
dataframe["CaseID"] = dataframe["CaseID"].astype("string")
target = os.path.join("test_output_data", "ev_distr_graph.svg")
if importlib.util.find_spec("matplotlib"):
pm4py.save_vis_events_distribution_graph(dataframe, target, activity_key="Activity", case_id_key="CaseID", timestamp_key="Timestamp")
os.remove(target)
def test_ocel_object_graph(self):
ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel")
ev_graph = pm4py.discover_objects_graph(ocel, graph_type="object_interaction")
ev_graph = pm4py.discover_objects_graph(ocel, graph_type="object_descendants")
ev_graph = pm4py.discover_objects_graph(ocel, graph_type="object_inheritance")
ev_graph = pm4py.discover_objects_graph(ocel, graph_type="object_cobirth")
ev_graph = pm4py.discover_objects_graph(ocel, graph_type="object_codeath")
def test_ocel_temporal_summary(self):
ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel")
temp_summary = pm4py.ocel_temporal_summary(ocel)
def test_ocel_objects_summary(self):
ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel")
objects_summary = pm4py.ocel_objects_summary(ocel)
def test_ocel_filtering_ev_ids(self):
ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel")
filtered_ocel = pm4py.filter_ocel_events(ocel, ["e1"])
def test_ocel_filtering_obj_ids(self):
ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel")
filtered_ocel = pm4py.filter_ocel_objects(ocel, ["o1"], level=1)
filtered_ocel = pm4py.filter_ocel_objects(ocel, ["o1"], level=2)
def test_ocel_filtering_obj_types(self):
ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel")
filtered_ocel = pm4py.filter_ocel_object_types(ocel, ["order"])
def test_ocel_filtering_cc(self):
ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel")
filtered_ocel = pm4py.filter_ocel_cc_object(ocel, "o1")
def test_ocel_drop_duplicates(self):
ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel")
filtered_ocel = pm4py.ocel_drop_duplicates(ocel)
def test_ocel_add_index_based_timedelta(self):
ocel = pm4py.read_ocel("input_data/ocel/example_log.jsonocel")
filtered_ocel = pm4py.ocel_add_index_based_timedelta(ocel)
def test_ocel2_xml(self):
ocel = pm4py.read_ocel2("input_data/ocel/ocel20_example.xmlocel")
pm4py.write_ocel2(ocel, "test_output_data/ocel20_example.xmlocel")
os.remove("test_output_data/ocel20_example.xmlocel")
def test_ocel2_sqlite(self):
ocel = pm4py.read_ocel2("input_data/ocel/ocel20_example.sqlite")
pm4py.write_ocel2(ocel, "test_output_data/ocel20_example.sqlite")
os.remove("test_output_data/ocel20_example.sqlite")
if __name__ == "__main__":
unittest.main()