diff --git "a/notebooks/experiment_generator.ipynb" "b/notebooks/experiment_generator.ipynb" --- "a/notebooks/experiment_generator.ipynb" +++ "b/notebooks/experiment_generator.ipynb" @@ -36,7 +36,7 @@ "outputs": [], "source": [ "#Features between 0 and 1: \n", - "normalized_feature_names = ['ratio_unique_traces_per_trace', 'trace_len_hist1', 'trace_len_hist2',\n", + "normalized_feature_names = ['ratio_variants_per_number_of_traces', 'trace_len_hist1', 'trace_len_hist2',\n", " 'trace_len_hist3', 'trace_len_hist4', 'trace_len_hist5', 'trace_len_hist7',\n", " 'trace_len_hist8', 'trace_len_hist9', 'ratio_most_common_variant', \n", " 'ratio_top_1_variants', 'ratio_top_5_variants', 'ratio_top_10_variants', \n", @@ -44,7 +44,7 @@ " 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n", " 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n", "\n", - "normalized_feature_names = ['ratio_unique_traces_per_trace', 'ratio_most_common_variant', \n", + "normalized_feature_names = ['ratio_variants_per_number_of_traces', 'ratio_most_common_variant', \n", " 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n", " 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n", "\n", @@ -64,7 +64,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "id": "2be119c8", "metadata": {}, "outputs": [ @@ -72,50 +72,50 @@ "name": "stdout", "output_type": "stream", "text": [ - "21 [('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_unique_traces_per_trace'), ('ratio_top_10_variants', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy', 'ratio_most_common_variant'), ('ratio_most_common_variant', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_most_common_variant'), ('epa_normalized_variant_entropy', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy', 'ratio_top_10_variants'), ('ratio_most_common_variant', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_most_common_variant'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_variant_entropy', 'ratio_unique_traces_per_trace'), ('epa_normalized_variant_entropy', 'ratio_most_common_variant'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_exponential_forgetting'), ('epa_normalized_sequence_entropy', 'ratio_unique_traces_per_trace')]\n", + "21 [('epa_normalized_sequence_entropy', 'ratio_most_common_variant'), ('epa_normalized_variant_entropy', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_variants_per_number_of_traces'), ('ratio_most_common_variant', 'ratio_variants_per_number_of_traces'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_most_common_variant'), ('ratio_top_10_variants', 'ratio_variants_per_number_of_traces'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_exponential_forgetting'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_variants_per_number_of_traces'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_most_common_variant'), ('ratio_most_common_variant', 'ratio_top_10_variants'), ('epa_normalized_variant_entropy', 'ratio_variants_per_number_of_traces'), ('epa_normalized_variant_entropy', 'ratio_most_common_variant'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy', 'ratio_variants_per_number_of_traces'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_top_10_variants')]\n", "121\n", - "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rt10v.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rt10v.json\n", - "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rutpt.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rutpt.json\n", - "Saved experiment in ../data/grid_2obj/grid_2objectives_rt10v_rutpt.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rt10v_rutpt.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rmcv.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rmcv.json\n", - "Saved experiment in ../data/grid_2obj/grid_2objectives_rmcv_rt10v.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rt10v.json\n", - "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enself.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enself.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rt10v.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rt10v.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enve.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enve.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rt10v.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rt10v.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rvpnot.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rvpnot.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_rmcv_rvpnot.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rvpnot.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rmcv.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rmcv.json\n", - "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rt10v.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rt10v.json\n", - "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_enself.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enself.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_rt10v_rvpnot.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rt10v_rvpnot.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enself.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enself.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enseef.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enseef.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_enve.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enve.json\n", - "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rutpt.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rutpt.json\n", - "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rt10v.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rt10v.json\n", - "Saved experiment in ../data/grid_2obj/grid_2objectives_rmcv_rutpt.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rutpt.json\n", - "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rmcv.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rmcv.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rvpnot.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rvpnot.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rt10v.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rt10v.json\n", - "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_enve.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_enve.json\n", - "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rutpt.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rutpt.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rmcv.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rmcv.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_rmcv_rt10v.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rt10v.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rvpnot.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rvpnot.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rmcv.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rmcv.json\n", - "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enseef.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enseef.json\n", - "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rutpt.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rutpt.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_enself.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enself.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_enve.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_enve.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rvpnot.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rvpnot.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rt10v.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rt10v.json\n", "None\n" ] } @@ -152,7 +152,7 @@ " 'input_path': os.path.join('output','features', 'generated', 'grid_2obj', first_dir, second_dir),\n", " 'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n", " 'output_path': 'output/plots',\n", - " 'real_eventlog_path': 'data/34_bpic_features.csv',\n", + " 'real_eventlog_path': 'data/BaselineED_feat.csv',\n", " 'plot_type': 'boxplot'\n", " }\n", " ]\n", @@ -204,7 +204,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 4, "id": "dfd1a302", "metadata": {}, "outputs": [], @@ -214,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 5, "id": "218946b7", "metadata": {}, "outputs": [], @@ -234,7 +234,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 6, "id": "b1e3bb5a", "metadata": {}, "outputs": [], @@ -244,7 +244,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "5de45389", "metadata": {}, "outputs": [ @@ -271,7 +271,7 @@ " \n", " log\n", " ratio_top_20_variants\n", - " normalized_sequence_entropy_linear_forgetting\n", + " epa_normalized_sequence_entropy_linear_forgetting\n", " \n", " \n", " \n", @@ -534,46 +534,46 @@ "34 objective_35 1.0 \n", "35 objective_36 1.0 \n", "\n", - " normalized_sequence_entropy_linear_forgetting \n", - "0 0.0 \n", - "1 0.1 \n", - "2 0.2 \n", - "3 0.3 \n", - "4 0.4 \n", - "5 0.5 \n", - "6 0.0 \n", - "7 0.1 \n", - "8 0.2 \n", - "9 0.3 \n", - "10 0.4 \n", - "11 0.5 \n", - "12 0.0 \n", - "13 0.1 \n", - "14 0.2 \n", - "15 0.3 \n", - "16 0.4 \n", - "17 0.5 \n", - "18 0.0 \n", - "19 0.1 \n", - "20 0.2 \n", - "21 0.3 \n", - "22 0.4 \n", - "23 0.5 \n", - "24 0.0 \n", - "25 0.1 \n", - "26 0.2 \n", - "27 0.3 \n", - "28 0.4 \n", - "29 0.5 \n", - "30 0.0 \n", - "31 0.1 \n", - "32 0.2 \n", - "33 0.3 \n", - "34 0.4 \n", - "35 0.5 " + " epa_normalized_sequence_entropy_linear_forgetting \n", + "0 0.0 \n", + "1 0.1 \n", + "2 0.2 \n", + "3 0.3 \n", + "4 0.4 \n", + "5 0.5 \n", + "6 0.0 \n", + "7 0.1 \n", + "8 0.2 \n", + "9 0.3 \n", + "10 0.4 \n", + "11 0.5 \n", + "12 0.0 \n", + "13 0.1 \n", + "14 0.2 \n", + "15 0.3 \n", + "16 0.4 \n", + "17 0.5 \n", + "18 0.0 \n", + "19 0.1 \n", + "20 0.2 \n", + "21 0.3 \n", + "22 0.4 \n", + "23 0.5 \n", + "24 0.0 \n", + "25 0.1 \n", + "26 0.2 \n", + "27 0.3 \n", + "28 0.4 \n", + "29 0.5 \n", + "30 0.0 \n", + "31 0.1 \n", + "32 0.2 \n", + "33 0.3 \n", + "34 0.4 \n", + "35 0.5 " ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -593,7 +593,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 8, "id": "39ac74bb", "metadata": {}, "outputs": [ @@ -646,164 +646,7 @@ " trace_len_median\n", " trace_len_mode\n", " trace_len_std\n", - " trace_len_variance\n", - " trace_len_q1\n", - " trace_len_q3\n", - " trace_len_iqr\n", - " trace_len_geometric_mean\n", - " trace_len_geometric_std\n", - " trace_len_harmonic_mean\n", - " trace_len_skewness\n", - " trace_len_kurtosis\n", - " trace_len_coefficient_variation\n", - " trace_len_entropy\n", - " trace_len_hist1\n", - " trace_len_hist2\n", - " trace_len_hist3\n", - " trace_len_hist4\n", - " trace_len_hist5\n", - " trace_len_hist6\n", - " trace_len_hist7\n", - " trace_len_hist8\n", - " trace_len_hist9\n", - " trace_len_hist10\n", - " trace_len_skewness_hist\n", - " trace_len_kurtosis_hist\n", - " ratio_most_common_variant\n", - " ratio_top_1_variants\n", - " ratio_top_5_variants\n", - " ratio_top_10_variants\n", - " ratio_top_20_variants\n", - " ratio_top_50_variants\n", - " ratio_top_75_variants\n", - " mean_variant_occurrence\n", - " std_variant_occurrence\n", - " skewness_variant_occurrence\n", - " kurtosis_variant_occurrence\n", - " n_unique_activities\n", - " activities_min\n", - " activities_max\n", - " activities_mean\n", - " activities_median\n", - " activities_std\n", - " activities_variance\n", - " activities_q1\n", - " activities_q3\n", - " activities_iqr\n", - " activities_skewness\n", - " activities_kurtosis\n", - " n_unique_start_activities\n", - " start_activities_min\n", - " start_activities_max\n", - " start_activities_mean\n", - " start_activities_median\n", - " start_activities_std\n", - " start_activities_variance\n", - " start_activities_q1\n", - " start_activities_q3\n", - " start_activities_iqr\n", - " start_activities_skewness\n", - " start_activities_kurtosis\n", - " n_unique_end_activities\n", - " end_activities_min\n", - " end_activities_max\n", - " end_activities_mean\n", - " end_activities_median\n", - " end_activities_std\n", - " end_activities_variance\n", - " end_activities_q1\n", - " end_activities_q3\n", - " end_activities_iqr\n", - " end_activities_skewness\n", - " end_activities_kurtosis\n", - " entropy_trace\n", - " entropy_prefix\n", - " entropy_global_block\n", - " entropy_lempel_ziv\n", - " entropy_k_block_diff_1\n", - " entropy_k_block_diff_3\n", - " entropy_k_block_diff_5\n", - " entropy_k_block_ratio_1\n", - " entropy_k_block_ratio_3\n", - " entropy_k_block_ratio_5\n", - " entropy_knn_3\n", - " entropy_knn_5\n", - " entropy_knn_7\n", - " Log Nature\n", - " epa_variant_entropy\n", - " epa_normalized_variant_entropy\n", - " epa_sequence_entropy\n", - " epa_normalized_sequence_entropy\n", - " epa_sequence_entropy_linear_forgetting\n", - " epa_normalized_sequence_entropy_linear_forgetting\n", - " epa_sequence_entropy_exponential_forgetting\n", - " epa_normalized_sequence_entropy_exponential_forgetting\n", - " accumulated_time_time_min\n", - " accumulated_time_time_max\n", - " accumulated_time_time_mean\n", - " accumulated_time_time_median\n", - " accumulated_time_time_mode\n", - " accumulated_time_time_std\n", - " accumulated_time_time_variance\n", - " accumulated_time_time_q1\n", - " accumulated_time_time_q3\n", - " accumulated_time_time_iqr\n", - " accumulated_time_time_geometric_mean\n", - " accumulated_time_time_geometric_std\n", - " accumulated_time_time_harmonic_mean\n", - " accumulated_time_time_skewness\n", - " accumulated_time_time_kurtosis\n", - " accumulated_time_time_coefficient_variation\n", - " accumulated_time_time_entropy\n", - " accumulated_time_time_skewness_hist\n", - " accumulated_time_time_kurtosis_hist\n", - " execution_time_time_min\n", - " execution_time_time_max\n", - " execution_time_time_mean\n", - " execution_time_time_median\n", - " execution_time_time_mode\n", - " execution_time_time_std\n", - " execution_time_time_variance\n", - " execution_time_time_q1\n", - " execution_time_time_q3\n", - " execution_time_time_iqr\n", - " execution_time_time_geometric_mean\n", - " execution_time_time_geometric_std\n", - " execution_time_time_harmonic_mean\n", - " execution_time_time_skewness\n", - " execution_time_time_kurtosis\n", - " execution_time_time_coefficient_variation\n", - " execution_time_time_entropy\n", - " execution_time_time_skewness_hist\n", - " execution_time_time_kurtosis_hist\n", - " remaining_time_time_min\n", - " remaining_time_time_max\n", - " remaining_time_time_mean\n", - " remaining_time_time_median\n", - " remaining_time_time_mode\n", - " remaining_time_time_std\n", - " remaining_time_time_variance\n", - " remaining_time_time_q1\n", - " remaining_time_time_q3\n", - " remaining_time_time_iqr\n", - " remaining_time_time_geometric_mean\n", - " remaining_time_time_geometric_std\n", - " remaining_time_time_harmonic_mean\n", - " remaining_time_time_skewness\n", - " remaining_time_time_kurtosis\n", - " remaining_time_time_coefficient_variation\n", - " remaining_time_time_entropy\n", - " remaining_time_time_skewness_hist\n", - " remaining_time_time_kurtosis_hist\n", - " within_day_time_min\n", - " within_day_time_max\n", - " within_day_time_mean\n", - " within_day_time_median\n", - " within_day_time_mode\n", - " within_day_time_std\n", - " within_day_time_variance\n", - " within_day_time_q1\n", - " within_day_time_q3\n", + " ...\n", " within_day_time_iqr\n", " within_day_time_geometric_mean\n", " within_day_time_geometric_std\n", @@ -829,164 +672,7 @@ " 54.0\n", " 61\n", " 19.894977\n", - " 395.810090\n", - " 44.0\n", - " 62.0\n", - " 18.0\n", - " 48.150111\n", - " 1.695311\n", - " 37.583741\n", - " 0.054138\n", - " 0.804992\n", - " 0.373193\n", - " 6.646715\n", - " 0.003853\n", - " 0.004863\n", - " 4.679243e-03\n", - " 0.023947\n", - " 2.376321e-02\n", - " 8.257487e-03\n", - " 0.004771\n", - " 1.376248e-03\n", - " 6.422490e-04\n", - " 1.834997e-04\n", - " 0.054138\n", - " 0.804992\n", - " 0.002404\n", - " 0.014423\n", - " 0.054087\n", - " 0.103365\n", - " 0.203125\n", - " 0.502404\n", - " 0.751202\n", - " 1.004831\n", - " 0.069337\n", - " 14.283027\n", - " 202.004854\n", - " 410\n", - " 1\n", - " 830\n", - " 108.180488\n", - " 12.0\n", - " 187.588162\n", - " 3.518932e+04\n", - " 3.0\n", - " 125.5\n", - " 122.5\n", - " 2.129412\n", - " 3.808278\n", - " 14\n", - " 1\n", - " 731\n", - " 59.428571\n", - " 1.0\n", - " 186.717401\n", - " 3.486339e+04\n", - " 1.0\n", - " 8.25\n", - " 7.25\n", - " 3.300411\n", - " 8.960767\n", - " 82\n", - " 1\n", - " 216\n", - " 10.146341\n", - " 1.0\n", - " 35.318800\n", - " 1.247418e+03\n", - " 1.00\n", - " 3.00\n", - " 2.00\n", - " 5.098791\n", - " 25.861991\n", - " 9.691\n", - " 14.524\n", - " 19.448\n", - " 3.859\n", - " 7.105\n", - " 7.105\n", - " 7.105\n", - " 7.105\n", - " 7.105\n", - " 7.105\n", - " 5.545\n", - " 5.039\n", - " 4.721\n", - " Real\n", - " 2.405122e+05\n", - " 0.627973\n", - " 2.858769e+05\n", - " 0.602371\n", - " 1.505466e+05\n", - " 0.317217\n", - " 1.853129e+05\n", - " 0.390473\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " ...\n", " NaN\n", " NaN\n", " NaN\n", @@ -1010,164 +696,7 @@ " 49.0\n", " 49\n", " 34.872131\n", - " 1216.065487\n", - " 44.0\n", - " 59.0\n", - " 15.0\n", - " 53.775008\n", - " 1.367397\n", - " 51.651502\n", - " 26.126459\n", - " 1720.399665\n", - " 0.607618\n", - " 10.598758\n", - " 0.003385\n", - " 0.000005\n", - " 9.288448e-07\n", - " 0.000000\n", - " 0.000000e+00\n", - " 0.000000e+00\n", - " 0.000000\n", - " 0.000000e+00\n", - " 7.740373e-08\n", - " 7.740373e-08\n", - " 26.126459\n", - " 1720.399665\n", - " 0.026981\n", - " 0.290374\n", - " 0.373006\n", - " 0.415371\n", - " 0.480335\n", - " 0.675204\n", - " 0.837590\n", - " 1.539481\n", - " 12.487438\n", - " 64.625680\n", - " 5083.455806\n", - " 41\n", - " 17\n", - " 466141\n", - " 61323.560976\n", - " 7530.0\n", - " 120522.247417\n", - " 1.452561e+10\n", - " 902.0\n", - " 45907.0\n", - " 45005.0\n", - " 2.444007\n", - " 4.773254\n", - " 4\n", - " 2\n", - " 38623\n", - " 10952.250000\n", - " 2592.0\n", - " 16111.407548\n", - " 2.595775e+08\n", - " 36.5\n", - " 13507.75\n", - " 13471.25\n", - " 1.098736\n", - " -0.714800\n", - " 21\n", - " 1\n", - " 34830\n", - " 2086.142857\n", - " 13.0\n", - " 7431.744981\n", - " 5.523083e+07\n", - " 2.00\n", - " 193.00\n", - " 191.00\n", - " 4.062387\n", - " 14.952824\n", - " 13.191\n", - " 16.272\n", - " 20.972\n", - " 1.023\n", - " -0.010\n", - " 1.855\n", - " 0.511\n", - " 1.403\n", - " 3.572\n", - " 2.001\n", - " 7.849\n", - " 7.371\n", - " 7.067\n", - " Real\n", - " 1.156384e+07\n", - " 0.712079\n", - " 2.114626e+07\n", - " 0.570688\n", - " 1.414023e+07\n", - " 0.381612\n", - " 1.557608e+07\n", - " 0.420362\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " ...\n", " NaN\n", " NaN\n", " NaN\n", @@ -1191,506 +720,7 @@ " 6.0\n", " 6\n", " 2.166129\n", - " 4.692114\n", - " 6.0\n", - " 6.0\n", - " 0.0\n", - " 5.414708\n", - " 1.704965\n", - " 4.356445\n", - " 1.276525\n", - " 12.296006\n", - " 0.362158\n", - " 7.197193\n", - " 0.036030\n", - " 0.008136\n", - " 3.411204e-01\n", - " 0.023536\n", - " 3.777313e-03\n", - " 1.743375e-03\n", - " 0.000291\n", - " 1.452813e-03\n", - " 0.000000e+00\n", - " 5.811251e-04\n", - " 1.276525\n", - " 12.296006\n", - " 0.497211\n", - " 0.497211\n", - " 0.796374\n", - " 0.887029\n", - " 0.930265\n", - " 0.959554\n", - " 0.979777\n", - " 12.362069\n", - " 68.360277\n", - " 9.380687\n", - " 92.281919\n", - " 27\n", - " 1\n", - " 1434\n", - " 317.666667\n", - " 27.0\n", - " 553.389823\n", - " 3.062403e+05\n", - " 8.0\n", - " 50.0\n", - " 42.0\n", - " 1.342951\n", - " -0.178094\n", - " 1\n", - " 1434\n", - " 1434\n", - " 1434.000000\n", - " 1434.0\n", - " 0.000000\n", - " 0.000000e+00\n", - " 1434.0\n", - " 1434.00\n", - " 0.00\n", - " NaN\n", - " NaN\n", - " 14\n", - " 1\n", - " 828\n", - " 102.428571\n", - " 6.0\n", - " 225.871555\n", - " 5.101796e+04\n", - " 1.25\n", - " 33.25\n", - " 32.00\n", - " 2.471765\n", - " 4.846541\n", - " 3.209\n", - " 4.746\n", - " 7.019\n", - " 0.385\n", - " 2.672\n", - " 2.966\n", - " 0.804\n", - " 1.484\n", - " 2.966\n", - " 2.966\n", - " 3.260\n", - " 2.845\n", - " 2.584\n", - " Real\n", - " 2.382326e+03\n", - " 0.689363\n", - " 1.829627e+04\n", - " 0.235532\n", - " 7.814868e+03\n", - " 0.100603\n", - " 1.072870e+04\n", - " 0.138113\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 3\n", - " BPIC15_3\n", - " 1409\n", - " 1349\n", - " 0.957417\n", - " 3\n", - " 124\n", - " 42.356991\n", - " 42.0\n", - " 44\n", - " 16.138406\n", - " 260.448143\n", - " 38.0\n", - " 47.0\n", - " 9.0\n", - " 37.637731\n", - " 1.786726\n", - " 29.092933\n", - " -0.009541\n", - " 1.543369\n", - " 0.381009\n", - " 7.167153\n", - " 0.006921\n", - " 0.004340\n", - " 1.630604e-02\n", - " 0.036953\n", - " 1.173096e-02\n", - " 4.105837e-03\n", - " 0.001584\n", - " 5.278933e-04\n", - " 1.173096e-04\n", - " 5.865481e-05\n", - " -0.009541\n", - " 1.543369\n", - " 0.010646\n", - " 0.049681\n", - " 0.090135\n", - " 0.137686\n", - " 0.233499\n", - " 0.520937\n", - " 0.760114\n", - " 1.044477\n", - " 0.592348\n", - " 17.964130\n", - " 358.019511\n", - " 383\n", - " 1\n", - " 1409\n", - " 155.825065\n", - " 16.0\n", - " 306.310544\n", - " 9.382615e+04\n", - " 5.0\n", - " 108.5\n", - " 103.5\n", - " 2.446349\n", - " 5.280931\n", - " 9\n", - " 1\n", - " 1348\n", - " 156.555556\n", - " 8.0\n", - " 421.270858\n", - " 1.774691e+05\n", - " 3.0\n", - " 14.00\n", - " 11.00\n", - " 2.474158\n", - " 4.122971\n", - " 119\n", - " 1\n", - " 342\n", - " 11.840336\n", - " 2.0\n", - " 39.557210\n", - " 1.564773e+03\n", - " 1.00\n", - " 7.00\n", - " 6.00\n", - " 6.217217\n", - " 43.335525\n", - " 10.317\n", - " 14.226\n", - " 18.743\n", - " 3.182\n", - " -0.007\n", - " 6.780\n", - " 6.780\n", - " 6.780\n", - " 6.780\n", - " 6.780\n", - " 5.701\n", - " 5.212\n", - " 4.900\n", - " Real\n", - " 2.981464e+05\n", - " 0.661781\n", - " 3.975043e+05\n", - " 0.605676\n", - " 2.241393e+05\n", - " 0.341521\n", - " 2.657571e+05\n", - " 0.404934\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 4\n", - " BPI_Challenge_2019\n", - " 251734\n", - " 11973\n", - " 0.047562\n", - " 1\n", - " 990\n", - " 6.339720\n", - " 5.0\n", - " 5\n", - " 13.057417\n", - " 170.496137\n", - " 5.0\n", - " 6.0\n", - " 1.0\n", - " 5.173569\n", - " 1.635822\n", - " 4.592844\n", - " 22.132989\n", - " 753.772202\n", - " 2.059621\n", - " 12.044057\n", - " 0.010078\n", - " 0.000020\n", - " 9.559579e-06\n", - " 0.000003\n", - " 3.614967e-07\n", - " 1.606652e-07\n", - " 0.000000\n", - " 4.016630e-08\n", - " 8.033260e-08\n", - " 8.033260e-08\n", - " 22.132989\n", - " 753.772202\n", - " 0.199758\n", - " 0.871424\n", - " 0.929990\n", - " 0.946368\n", - " 0.959767\n", - " 0.976217\n", - " 0.988106\n", - " 21.025140\n", - " 594.255619\n", - " 64.772702\n", - " 4917.319751\n", - " 42\n", - " 2\n", - " 314097\n", - " 37998.166667\n", - " 1628.0\n", - " 80833.669206\n", - " 6.534082e+09\n", - " 202.0\n", - " 11536.0\n", - " 11334.0\n", - " 2.169648\n", - " 3.263594\n", - " 8\n", - " 2\n", - " 199867\n", - " 31466.750000\n", - " 869.0\n", - " 65387.493286\n", - " 4.275524e+09\n", - " 97.0\n", - " 14224.25\n", - " 14127.25\n", - " 2.059742\n", - " 2.535789\n", - " 32\n", - " 1\n", - " 181328\n", - " 7866.687500\n", - " 64.5\n", - " 31658.428996\n", - " 1.002256e+09\n", - " 9.00\n", - " 1027.25\n", - " 1018.25\n", - " 5.135607\n", - " 25.170543\n", - " 6.243\n", - " 8.811\n", - " 19.447\n", - " 0.346\n", - " -0.041\n", - " 1.530\n", - " 0.840\n", - " 0.620\n", - " 3.244\n", - " 1.913\n", - " 7.333\n", - " 6.882\n", - " 6.601\n", - " Real\n", - " 1.690369e+06\n", - " 0.645530\n", - " 7.477256e+06\n", - " 0.328029\n", - " 7.298458e+06\n", - " 0.320185\n", - " 7.300663e+06\n", - " 0.320282\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", + " ...\n", " NaN\n", " NaN\n", " NaN\n", @@ -1701,6 +731,20 @@ " NaN\n", " NaN\n", " NaN\n", + " \n", + " \n", + " 3\n", + " BPIC15_3\n", + " 1409\n", + " 1349\n", + " 0.957417\n", + " 3\n", + " 124\n", + " 42.356991\n", + " 42.0\n", + " 44\n", + " 16.138406\n", + " ...\n", " NaN\n", " NaN\n", " NaN\n", @@ -1711,6 +755,20 @@ " NaN\n", " NaN\n", " NaN\n", + " \n", + " \n", + " 4\n", + " BPI_Challenge_2019\n", + " 251734\n", + " 11973\n", + " 0.047562\n", + " 1\n", + " 990\n", + " 6.339720\n", + " 5.0\n", + " 5\n", + " 13.057417\n", + " ...\n", " NaN\n", " NaN\n", " NaN\n", @@ -1724,6 +782,7 @@ " \n", " \n", "\n", + "

5 rows × 178 columns

\n", "" ], "text/plain": [ @@ -1741,490 +800,58 @@ "3 0.957417 3 124 \n", "4 0.047562 1 990 \n", "\n", - " trace_len_mean trace_len_median trace_len_mode trace_len_std \n", - "0 53.310096 54.0 61 19.894977 \\\n", - "1 57.391541 49.0 49 34.872131 \n", - "2 5.981172 6.0 6 2.166129 \n", - "3 42.356991 42.0 44 16.138406 \n", - "4 6.339720 5.0 5 13.057417 \n", - "\n", - " trace_len_variance trace_len_q1 trace_len_q3 trace_len_iqr \n", - "0 395.810090 44.0 62.0 18.0 \\\n", - "1 1216.065487 44.0 59.0 15.0 \n", - "2 4.692114 6.0 6.0 0.0 \n", - "3 260.448143 38.0 47.0 9.0 \n", - "4 170.496137 5.0 6.0 1.0 \n", - "\n", - " trace_len_geometric_mean trace_len_geometric_std trace_len_harmonic_mean \n", - "0 48.150111 1.695311 37.583741 \\\n", - "1 53.775008 1.367397 51.651502 \n", - "2 5.414708 1.704965 4.356445 \n", - "3 37.637731 1.786726 29.092933 \n", - "4 5.173569 1.635822 4.592844 \n", - "\n", - " trace_len_skewness trace_len_kurtosis trace_len_coefficient_variation \n", - "0 0.054138 0.804992 0.373193 \\\n", - "1 26.126459 1720.399665 0.607618 \n", - "2 1.276525 12.296006 0.362158 \n", - "3 -0.009541 1.543369 0.381009 \n", - "4 22.132989 753.772202 2.059621 \n", - "\n", - " trace_len_entropy trace_len_hist1 trace_len_hist2 trace_len_hist3 \n", - "0 6.646715 0.003853 0.004863 4.679243e-03 \\\n", - "1 10.598758 0.003385 0.000005 9.288448e-07 \n", - "2 7.197193 0.036030 0.008136 3.411204e-01 \n", - "3 7.167153 0.006921 0.004340 1.630604e-02 \n", - "4 12.044057 0.010078 0.000020 9.559579e-06 \n", - "\n", - " trace_len_hist4 trace_len_hist5 trace_len_hist6 trace_len_hist7 \n", - "0 0.023947 2.376321e-02 8.257487e-03 0.004771 \\\n", - "1 0.000000 0.000000e+00 0.000000e+00 0.000000 \n", - "2 0.023536 3.777313e-03 1.743375e-03 0.000291 \n", - "3 0.036953 1.173096e-02 4.105837e-03 0.001584 \n", - "4 0.000003 3.614967e-07 1.606652e-07 0.000000 \n", - "\n", - " trace_len_hist8 trace_len_hist9 trace_len_hist10 \n", - "0 1.376248e-03 6.422490e-04 1.834997e-04 \\\n", - "1 0.000000e+00 7.740373e-08 7.740373e-08 \n", - "2 1.452813e-03 0.000000e+00 5.811251e-04 \n", - "3 5.278933e-04 1.173096e-04 5.865481e-05 \n", - "4 4.016630e-08 8.033260e-08 8.033260e-08 \n", - "\n", - " trace_len_skewness_hist trace_len_kurtosis_hist \n", - "0 0.054138 0.804992 \\\n", - "1 26.126459 1720.399665 \n", - "2 1.276525 12.296006 \n", - "3 -0.009541 1.543369 \n", - "4 22.132989 753.772202 \n", - "\n", - " ratio_most_common_variant ratio_top_1_variants ratio_top_5_variants \n", - "0 0.002404 0.014423 0.054087 \\\n", - "1 0.026981 0.290374 0.373006 \n", - "2 0.497211 0.497211 0.796374 \n", - "3 0.010646 0.049681 0.090135 \n", - "4 0.199758 0.871424 0.929990 \n", - "\n", - " ratio_top_10_variants ratio_top_20_variants ratio_top_50_variants \n", - "0 0.103365 0.203125 0.502404 \\\n", - "1 0.415371 0.480335 0.675204 \n", - "2 0.887029 0.930265 0.959554 \n", - "3 0.137686 0.233499 0.520937 \n", - "4 0.946368 0.959767 0.976217 \n", - "\n", - " ratio_top_75_variants mean_variant_occurrence std_variant_occurrence \n", - "0 0.751202 1.004831 0.069337 \\\n", - "1 0.837590 1.539481 12.487438 \n", - "2 0.979777 12.362069 68.360277 \n", - "3 0.760114 1.044477 0.592348 \n", - "4 0.988106 21.025140 594.255619 \n", - "\n", - " skewness_variant_occurrence kurtosis_variant_occurrence \n", - "0 14.283027 202.004854 \\\n", - "1 64.625680 5083.455806 \n", - "2 9.380687 92.281919 \n", - "3 17.964130 358.019511 \n", - "4 64.772702 4917.319751 \n", - "\n", - " n_unique_activities activities_min activities_max activities_mean \n", - "0 410 1 830 108.180488 \\\n", - "1 41 17 466141 61323.560976 \n", - "2 27 1 1434 317.666667 \n", - "3 383 1 1409 155.825065 \n", - "4 42 2 314097 37998.166667 \n", - "\n", - " activities_median activities_std activities_variance activities_q1 \n", - "0 12.0 187.588162 3.518932e+04 3.0 \\\n", - "1 7530.0 120522.247417 1.452561e+10 902.0 \n", - "2 27.0 553.389823 3.062403e+05 8.0 \n", - "3 16.0 306.310544 9.382615e+04 5.0 \n", - "4 1628.0 80833.669206 6.534082e+09 202.0 \n", - "\n", - " activities_q3 activities_iqr activities_skewness activities_kurtosis \n", - "0 125.5 122.5 2.129412 3.808278 \\\n", - "1 45907.0 45005.0 2.444007 4.773254 \n", - "2 50.0 42.0 1.342951 -0.178094 \n", - "3 108.5 103.5 2.446349 5.280931 \n", - "4 11536.0 11334.0 2.169648 3.263594 \n", - "\n", - " n_unique_start_activities start_activities_min start_activities_max \n", - "0 14 1 731 \\\n", - "1 4 2 38623 \n", - "2 1 1434 1434 \n", - "3 9 1 1348 \n", - "4 8 2 199867 \n", - "\n", - " start_activities_mean start_activities_median start_activities_std \n", - "0 59.428571 1.0 186.717401 \\\n", - "1 10952.250000 2592.0 16111.407548 \n", - "2 1434.000000 1434.0 0.000000 \n", - "3 156.555556 8.0 421.270858 \n", - "4 31466.750000 869.0 65387.493286 \n", - "\n", - " start_activities_variance start_activities_q1 start_activities_q3 \n", - "0 3.486339e+04 1.0 8.25 \\\n", - "1 2.595775e+08 36.5 13507.75 \n", - "2 0.000000e+00 1434.0 1434.00 \n", - "3 1.774691e+05 3.0 14.00 \n", - "4 4.275524e+09 97.0 14224.25 \n", - "\n", - " start_activities_iqr start_activities_skewness start_activities_kurtosis \n", - "0 7.25 3.300411 8.960767 \\\n", - "1 13471.25 1.098736 -0.714800 \n", - "2 0.00 NaN NaN \n", - "3 11.00 2.474158 4.122971 \n", - "4 14127.25 2.059742 2.535789 \n", - "\n", - " n_unique_end_activities end_activities_min end_activities_max \n", - "0 82 1 216 \\\n", - "1 21 1 34830 \n", - "2 14 1 828 \n", - "3 119 1 342 \n", - "4 32 1 181328 \n", - "\n", - " end_activities_mean end_activities_median end_activities_std \n", - "0 10.146341 1.0 35.318800 \\\n", - "1 2086.142857 13.0 7431.744981 \n", - "2 102.428571 6.0 225.871555 \n", - "3 11.840336 2.0 39.557210 \n", - "4 7866.687500 64.5 31658.428996 \n", - "\n", - " end_activities_variance end_activities_q1 end_activities_q3 \n", - "0 1.247418e+03 1.00 3.00 \\\n", - "1 5.523083e+07 2.00 193.00 \n", - "2 5.101796e+04 1.25 33.25 \n", - "3 1.564773e+03 1.00 7.00 \n", - "4 1.002256e+09 9.00 1027.25 \n", - "\n", - " end_activities_iqr end_activities_skewness end_activities_kurtosis \n", - "0 2.00 5.098791 25.861991 \\\n", - "1 191.00 4.062387 14.952824 \n", - "2 32.00 2.471765 4.846541 \n", - "3 6.00 6.217217 43.335525 \n", - "4 1018.25 5.135607 25.170543 \n", - "\n", - " entropy_trace entropy_prefix entropy_global_block entropy_lempel_ziv \n", - "0 9.691 14.524 19.448 3.859 \\\n", - "1 13.191 16.272 20.972 1.023 \n", - "2 3.209 4.746 7.019 0.385 \n", - "3 10.317 14.226 18.743 3.182 \n", - "4 6.243 8.811 19.447 0.346 \n", - "\n", - " entropy_k_block_diff_1 entropy_k_block_diff_3 entropy_k_block_diff_5 \n", - "0 7.105 7.105 7.105 \\\n", - "1 -0.010 1.855 0.511 \n", - "2 2.672 2.966 0.804 \n", - "3 -0.007 6.780 6.780 \n", - "4 -0.041 1.530 0.840 \n", - "\n", - " entropy_k_block_ratio_1 entropy_k_block_ratio_3 entropy_k_block_ratio_5 \n", - "0 7.105 7.105 7.105 \\\n", - "1 1.403 3.572 2.001 \n", - "2 1.484 2.966 2.966 \n", - "3 6.780 6.780 6.780 \n", - "4 0.620 3.244 1.913 \n", - "\n", - " entropy_knn_3 entropy_knn_5 entropy_knn_7 Log Nature \n", - "0 5.545 5.039 4.721 Real \\\n", - "1 7.849 7.371 7.067 Real \n", - "2 3.260 2.845 2.584 Real \n", - "3 5.701 5.212 4.900 Real \n", - "4 7.333 6.882 6.601 Real \n", - "\n", - " epa_variant_entropy epa_normalized_variant_entropy epa_sequence_entropy \n", - "0 2.405122e+05 0.627973 2.858769e+05 \\\n", - "1 1.156384e+07 0.712079 2.114626e+07 \n", - "2 2.382326e+03 0.689363 1.829627e+04 \n", - "3 2.981464e+05 0.661781 3.975043e+05 \n", - "4 1.690369e+06 0.645530 7.477256e+06 \n", - "\n", - " epa_normalized_sequence_entropy epa_sequence_entropy_linear_forgetting \n", - "0 0.602371 1.505466e+05 \\\n", - "1 0.570688 1.414023e+07 \n", - "2 0.235532 7.814868e+03 \n", - "3 0.605676 2.241393e+05 \n", - "4 0.328029 7.298458e+06 \n", - "\n", - " epa_normalized_sequence_entropy_linear_forgetting \n", - "0 0.317217 \\\n", - "1 0.381612 \n", - "2 0.100603 \n", - "3 0.341521 \n", - "4 0.320185 \n", - "\n", - " epa_sequence_entropy_exponential_forgetting \n", - "0 1.853129e+05 \\\n", - "1 1.557608e+07 \n", - "2 1.072870e+04 \n", - "3 2.657571e+05 \n", - "4 7.300663e+06 \n", - "\n", - " epa_normalized_sequence_entropy_exponential_forgetting \n", - "0 0.390473 \\\n", - "1 0.420362 \n", - "2 0.138113 \n", - "3 0.404934 \n", - "4 0.320282 \n", - "\n", - " accumulated_time_time_min accumulated_time_time_max \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " accumulated_time_time_mean accumulated_time_time_median \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " accumulated_time_time_mode accumulated_time_time_std \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " accumulated_time_time_variance accumulated_time_time_q1 \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " accumulated_time_time_q3 accumulated_time_time_iqr \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " accumulated_time_time_geometric_mean accumulated_time_time_geometric_std \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " accumulated_time_time_harmonic_mean accumulated_time_time_skewness \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " accumulated_time_time_kurtosis \n", - "0 NaN \\\n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN \n", - "\n", - " accumulated_time_time_coefficient_variation accumulated_time_time_entropy \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " accumulated_time_time_skewness_hist accumulated_time_time_kurtosis_hist \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " execution_time_time_min execution_time_time_max execution_time_time_mean \n", - "0 NaN NaN NaN \\\n", - "1 NaN NaN NaN \n", - "2 NaN NaN NaN \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN \n", - "\n", - " execution_time_time_median execution_time_time_mode \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " execution_time_time_std execution_time_time_variance \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " execution_time_time_q1 execution_time_time_q3 execution_time_time_iqr \n", - "0 NaN NaN NaN \\\n", - "1 NaN NaN NaN \n", - "2 NaN NaN NaN \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN \n", - "\n", - " execution_time_time_geometric_mean execution_time_time_geometric_std \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " execution_time_time_harmonic_mean execution_time_time_skewness \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " execution_time_time_kurtosis execution_time_time_coefficient_variation \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " execution_time_time_entropy execution_time_time_skewness_hist \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " execution_time_time_kurtosis_hist remaining_time_time_min \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " remaining_time_time_max remaining_time_time_mean \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " remaining_time_time_median remaining_time_time_mode \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " remaining_time_time_std remaining_time_time_variance \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " remaining_time_time_q1 remaining_time_time_q3 remaining_time_time_iqr \n", - "0 NaN NaN NaN \\\n", - "1 NaN NaN NaN \n", - "2 NaN NaN NaN \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN \n", - "\n", - " remaining_time_time_geometric_mean remaining_time_time_geometric_std \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " remaining_time_time_harmonic_mean remaining_time_time_skewness \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " remaining_time_time_kurtosis remaining_time_time_coefficient_variation \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " remaining_time_time_entropy remaining_time_time_skewness_hist \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " remaining_time_time_kurtosis_hist within_day_time_min \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " within_day_time_max within_day_time_mean within_day_time_median \n", - "0 NaN NaN NaN \\\n", - "1 NaN NaN NaN \n", - "2 NaN NaN NaN \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN \n", - "\n", - " within_day_time_mode within_day_time_std within_day_time_variance \n", - "0 NaN NaN NaN \\\n", - "1 NaN NaN NaN \n", - "2 NaN NaN NaN \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN \n", - "\n", - " within_day_time_q1 within_day_time_q3 within_day_time_iqr \n", - "0 NaN NaN NaN \\\n", - "1 NaN NaN NaN \n", - "2 NaN NaN NaN \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN \n", - "\n", - " within_day_time_geometric_mean within_day_time_geometric_std \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " within_day_time_harmonic_mean within_day_time_skewness \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " within_day_time_kurtosis within_day_time_coefficient_variation \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " within_day_time_entropy within_day_time_skewness_hist \n", - "0 NaN NaN \\\n", - "1 NaN NaN \n", - "2 NaN NaN \n", - "3 NaN NaN \n", - "4 NaN NaN \n", - "\n", - " within_day_time_kurtosis_hist \n", - "0 NaN \n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN " + " trace_len_mean trace_len_median trace_len_mode trace_len_std ... \n", + "0 53.310096 54.0 61 19.894977 ... \\\n", + "1 57.391541 49.0 49 34.872131 ... \n", + "2 5.981172 6.0 6 2.166129 ... \n", + "3 42.356991 42.0 44 16.138406 ... \n", + "4 6.339720 5.0 5 13.057417 ... \n", + "\n", + " within_day_time_iqr within_day_time_geometric_mean \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " within_day_time_geometric_std within_day_time_harmonic_mean \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " within_day_time_skewness within_day_time_kurtosis \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " within_day_time_coefficient_variation within_day_time_entropy \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " within_day_time_skewness_hist within_day_time_kurtosis_hist \n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + "[5 rows x 178 columns]" ] }, - "execution_count": 20, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n", + "bpic_features = pd.read_csv(\"../data/BaselineED_feat.csv\", index_col=None)\n", "#bpic_features = pd.read_csv(\"../gedi/output/features/real_event_logs.csv\", index_col=None)\n", "\n", "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n", @@ -2235,12 +862,12 @@ "# errors=\"raise\", inplace=True)\n", "\n", "bpic_features.head()\n", - "#bpic_features.to_csv(\"../data/34_bpic_features.csv\", index=False)" + "#bpic_features.to_csv(\"../data/BaselineED_feat.csv\", index=False)" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 9, "id": "ef0df0b9", "metadata": {}, "outputs": [ @@ -2248,610 +875,28 @@ "name": "stdout", "output_type": "stream", "text": [ - "['ratio_unique_traces_per_trace', 'ratio_most_common_variant', 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n" + "['ratio_variants_per_number_of_traces', 'ratio_most_common_variant', 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n" ] }, { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
logratio_unique_traces_per_traceratio_most_common_variantratio_top_10_variantsepa_normalized_variant_entropyepa_normalized_sequence_entropyepa_normalized_sequence_entropy_linear_forgettingepa_normalized_sequence_entropy_exponential_forgetting
0BPIC15_20.9951920.0024040.1033650.6279730.6023710.3172170.390473
1BPI_Challenge_20180.6495700.0269810.4153710.7120790.5706880.3816120.420362
2Receipt_WABO_CoSeLoG0.0808930.4972110.8870290.6893630.2355320.1006030.138113
3BPIC15_30.9574170.0106460.1376860.6617810.6056760.3415210.404934
4BPI_Challenge_20190.0475620.1997580.9463680.6455300.3280290.3201850.320282
5RequestForPayment0.0129250.4372640.9334880.7037350.1890480.0975720.118744
6PrepaidTravelCost0.0962360.2710810.8227730.7237850.3170440.1848790.214387
7DomesticDeclarations0.0094290.4398100.9500950.6964740.1647580.0854390.104389
8BPIC15_40.9962010.0028490.1025640.6529850.6038660.3559270.412835
9BPI_Challenge_20120.3336140.2620160.6862540.7082800.4230740.2261330.275551
10Hospital_log0.8582680.0358710.2274720.5174430.5130320.2678250.331672
11BPIC15_50.9974050.0017300.1020760.6487020.6032600.3424100.404580
12CoSeLoG_WABO_20.9984500.0031010.1007750.6184550.5940350.3232330.389858
13Road_Traffic_Fine_Management_Process0.0015360.3756200.9931040.7693530.1119320.0525860.068442
14BPI_Challenge_2017_Offer_log0.0003720.3806260.3806260.8134790.1051300.0526720.066000
15Sepsis_Cases_Event_Log0.8057140.0333330.2742860.6957590.5223430.2193650.299505
16CoSeLoG_WABO_30.9494020.0119600.1453540.6542960.5963670.2781210.356439
17BPI_Challenge_2013_closed_problems0.1230670.3315400.8406190.7053830.3109400.2865150.288383
18BPI_Challenge_2013_incidents0.2000260.2321950.7944140.7178460.4046510.3910970.391625
19PermitLog0.2092000.1353150.7575370.7336530.4201500.1372870.215490
20BPIC15_10.9758130.0066720.1217680.6528550.6102940.2702410.363928
21InternationalDeclarations0.1167620.2122810.8112890.7582680.3393800.1456110.193753
22BPI_Challenge_20170.5055700.0335140.5313400.7417060.4615650.2319220.290464
23BPI2016_Complaints0.4380530.1017700.4247790.8994970.6837960.4046850.470116
24BPI2016_Questions0.7974270.0156500.2823110.8134680.7561320.4249100.506118
25BPI2016_Werkmap_Messages0.0028820.2958030.7141060.0000000.0000000.0000000.000000
26BPI_Challenge_2013_open_problems0.1318680.2173380.7692310.7029600.2767710.2620940.263029
27CoSeLoG_WABO_10.9775880.0096050.1195300.6466970.6015660.2928240.376276
28CoSeLoG_WABO_40.9923760.0025410.1067340.6443990.5971090.3739200.422526
29CoSeLoG_WABO_50.9854260.0044840.1121080.6426680.5924540.3468320.401731
30Detail_Change0.0484440.0749440.765056NaNNaNNaNNaN
31Detail_Incident_Activity0.4968470.0374550.552836NaNNaNNaNNaN
32Detail_Interaction0.0000410.7870810.000000NaNNaNNaNNaN
33finale0.0493450.5165940.9063320.7991200.2540660.1184780.154576
\n", - "
" - ], - "text/plain": [ - " log ratio_unique_traces_per_trace \n", - "0 BPIC15_2 0.995192 \\\n", - "1 BPI_Challenge_2018 0.649570 \n", - "2 Receipt_WABO_CoSeLoG 0.080893 \n", - "3 BPIC15_3 0.957417 \n", - "4 BPI_Challenge_2019 0.047562 \n", - "5 RequestForPayment 0.012925 \n", - "6 PrepaidTravelCost 0.096236 \n", - "7 DomesticDeclarations 0.009429 \n", - "8 BPIC15_4 0.996201 \n", - "9 BPI_Challenge_2012 0.333614 \n", - "10 Hospital_log 0.858268 \n", - "11 BPIC15_5 0.997405 \n", - "12 CoSeLoG_WABO_2 0.998450 \n", - "13 Road_Traffic_Fine_Management_Process 0.001536 \n", - "14 BPI_Challenge_2017_Offer_log 0.000372 \n", - "15 Sepsis_Cases_Event_Log 0.805714 \n", - "16 CoSeLoG_WABO_3 0.949402 \n", - "17 BPI_Challenge_2013_closed_problems 0.123067 \n", - "18 BPI_Challenge_2013_incidents 0.200026 \n", - "19 PermitLog 0.209200 \n", - "20 BPIC15_1 0.975813 \n", - "21 InternationalDeclarations 0.116762 \n", - "22 BPI_Challenge_2017 0.505570 \n", - "23 BPI2016_Complaints 0.438053 \n", - "24 BPI2016_Questions 0.797427 \n", - "25 BPI2016_Werkmap_Messages 0.002882 \n", - "26 BPI_Challenge_2013_open_problems 0.131868 \n", - "27 CoSeLoG_WABO_1 0.977588 \n", - "28 CoSeLoG_WABO_4 0.992376 \n", - "29 CoSeLoG_WABO_5 0.985426 \n", - "30 Detail_Change 0.048444 \n", - "31 Detail_Incident_Activity 0.496847 \n", - "32 Detail_Interaction 0.000041 \n", - "33 finale 0.049345 \n", - "\n", - " ratio_most_common_variant ratio_top_10_variants \n", - "0 0.002404 0.103365 \\\n", - "1 0.026981 0.415371 \n", - "2 0.497211 0.887029 \n", - "3 0.010646 0.137686 \n", - "4 0.199758 0.946368 \n", - "5 0.437264 0.933488 \n", - "6 0.271081 0.822773 \n", - "7 0.439810 0.950095 \n", - "8 0.002849 0.102564 \n", - "9 0.262016 0.686254 \n", - "10 0.035871 0.227472 \n", - "11 0.001730 0.102076 \n", - "12 0.003101 0.100775 \n", - "13 0.375620 0.993104 \n", - "14 0.380626 0.380626 \n", - "15 0.033333 0.274286 \n", - "16 0.011960 0.145354 \n", - "17 0.331540 0.840619 \n", - "18 0.232195 0.794414 \n", - "19 0.135315 0.757537 \n", - "20 0.006672 0.121768 \n", - "21 0.212281 0.811289 \n", - "22 0.033514 0.531340 \n", - "23 0.101770 0.424779 \n", - "24 0.015650 0.282311 \n", - "25 0.295803 0.714106 \n", - "26 0.217338 0.769231 \n", - "27 0.009605 0.119530 \n", - "28 0.002541 0.106734 \n", - "29 0.004484 0.112108 \n", - "30 0.074944 0.765056 \n", - "31 0.037455 0.552836 \n", - "32 0.787081 0.000000 \n", - "33 0.516594 0.906332 \n", - "\n", - " epa_normalized_variant_entropy epa_normalized_sequence_entropy \n", - "0 0.627973 0.602371 \\\n", - "1 0.712079 0.570688 \n", - "2 0.689363 0.235532 \n", - "3 0.661781 0.605676 \n", - "4 0.645530 0.328029 \n", - "5 0.703735 0.189048 \n", - "6 0.723785 0.317044 \n", - "7 0.696474 0.164758 \n", - "8 0.652985 0.603866 \n", - "9 0.708280 0.423074 \n", - "10 0.517443 0.513032 \n", - "11 0.648702 0.603260 \n", - "12 0.618455 0.594035 \n", - "13 0.769353 0.111932 \n", - "14 0.813479 0.105130 \n", - "15 0.695759 0.522343 \n", - "16 0.654296 0.596367 \n", - "17 0.705383 0.310940 \n", - "18 0.717846 0.404651 \n", - "19 0.733653 0.420150 \n", - "20 0.652855 0.610294 \n", - "21 0.758268 0.339380 \n", - "22 0.741706 0.461565 \n", - "23 0.899497 0.683796 \n", - "24 0.813468 0.756132 \n", - "25 0.000000 0.000000 \n", - "26 0.702960 0.276771 \n", - "27 0.646697 0.601566 \n", - "28 0.644399 0.597109 \n", - "29 0.642668 0.592454 \n", - "30 NaN NaN \n", - "31 NaN NaN \n", - "32 NaN NaN \n", - "33 0.799120 0.254066 \n", - "\n", - " epa_normalized_sequence_entropy_linear_forgetting \n", - "0 0.317217 \\\n", - "1 0.381612 \n", - "2 0.100603 \n", - "3 0.341521 \n", - "4 0.320185 \n", - "5 0.097572 \n", - "6 0.184879 \n", - "7 0.085439 \n", - "8 0.355927 \n", - "9 0.226133 \n", - "10 0.267825 \n", - "11 0.342410 \n", - "12 0.323233 \n", - "13 0.052586 \n", - "14 0.052672 \n", - "15 0.219365 \n", - "16 0.278121 \n", - "17 0.286515 \n", - "18 0.391097 \n", - "19 0.137287 \n", - "20 0.270241 \n", - "21 0.145611 \n", - "22 0.231922 \n", - "23 0.404685 \n", - "24 0.424910 \n", - "25 0.000000 \n", - "26 0.262094 \n", - "27 0.292824 \n", - "28 0.373920 \n", - "29 0.346832 \n", - "30 NaN \n", - "31 NaN \n", - "32 NaN \n", - "33 0.118478 \n", - "\n", - " epa_normalized_sequence_entropy_exponential_forgetting \n", - "0 0.390473 \n", - "1 0.420362 \n", - "2 0.138113 \n", - "3 0.404934 \n", - "4 0.320282 \n", - "5 0.118744 \n", - "6 0.214387 \n", - "7 0.104389 \n", - "8 0.412835 \n", - "9 0.275551 \n", - "10 0.331672 \n", - "11 0.404580 \n", - "12 0.389858 \n", - "13 0.068442 \n", - "14 0.066000 \n", - "15 0.299505 \n", - "16 0.356439 \n", - "17 0.288383 \n", - "18 0.391625 \n", - "19 0.215490 \n", - "20 0.363928 \n", - "21 0.193753 \n", - "22 0.290464 \n", - "23 0.470116 \n", - "24 0.506118 \n", - "25 0.000000 \n", - "26 0.263029 \n", - "27 0.376276 \n", - "28 0.422526 \n", - "29 0.401731 \n", - "30 NaN \n", - "31 NaN \n", - "32 NaN \n", - "33 0.154576 " - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" + "ename": "KeyError", + "evalue": "\"['ratio_variants_per_number_of_traces'] not in index\"", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [9]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m normalized_feature_names \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mratio_variants_per_number_of_traces\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mratio_most_common_variant\u001b[39m\u001b[38;5;124m'\u001b[39m, \n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mratio_top_10_variants\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mepa_normalized_variant_entropy\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mepa_normalized_sequence_entropy\u001b[39m\u001b[38;5;124m'\u001b[39m, \n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mepa_normalized_sequence_entropy_linear_forgetting\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mepa_normalized_sequence_entropy_exponential_forgetting\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(normalized_feature_names)\n\u001b[0;32m----> 7\u001b[0m \u001b[43mbpic_features\u001b[49m\u001b[43m[\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlog\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43mnormalized_feature_names\u001b[49m\u001b[43m]\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/frame.py:3767\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3765\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n\u001b[1;32m 3766\u001b[0m key \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(key)\n\u001b[0;32m-> 3767\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_indexer_strict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcolumns\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m 3769\u001b[0m \u001b[38;5;66;03m# take() does not accept boolean indexers\u001b[39;00m\n\u001b[1;32m 3770\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(indexer, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mbool\u001b[39m:\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:5876\u001b[0m, in \u001b[0;36mIndex._get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m 5873\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 5874\u001b[0m keyarr, indexer, new_indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reindex_non_unique(keyarr)\n\u001b[0;32m-> 5876\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raise_if_missing\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkeyarr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5878\u001b[0m keyarr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtake(indexer)\n\u001b[1;32m 5879\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, Index):\n\u001b[1;32m 5880\u001b[0m \u001b[38;5;66;03m# GH 42790 - Preserve name from an Index\u001b[39;00m\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:5938\u001b[0m, in \u001b[0;36mIndex._raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m 5935\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNone of [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] are in the [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00maxis_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5937\u001b[0m not_found \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(ensure_index(key)[missing_mask\u001b[38;5;241m.\u001b[39mnonzero()[\u001b[38;5;241m0\u001b[39m]]\u001b[38;5;241m.\u001b[39munique())\n\u001b[0;32m-> 5938\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_found\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not in index\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mKeyError\u001b[0m: \"['ratio_variants_per_number_of_traces'] not in index\"" + ] } ], "source": [ "bpic_stats = bpic_features.describe().transpose()\n", "normalized_feature_names = bpic_stats[(bpic_stats['min']>=0)&(bpic_stats['max']<=1)].index.to_list() \n", - "normalized_feature_names = ['ratio_unique_traces_per_trace', 'ratio_most_common_variant', \n", + "normalized_feature_names = ['ratio_variants_per_number_of_traces', 'ratio_most_common_variant', \n", " 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n", " 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n", "print(normalized_feature_names)\n", @@ -2860,40 +905,10 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "44909860", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "21\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_rt10v.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_rmcv.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_enself.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enve_rt10v.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_rt10v.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_enseef.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_rmcv.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_rmcv_rutpt.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_enve.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enve_rmcv.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_rmcv.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_rutpt.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_enve.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_rmcv_rt10v.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_rutpt.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_enself.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_rt10v.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_enve.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enve_rutpt.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_rt10v_rutpt.json\n", - "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_rutpt.json\n", - "None\n" - ] - } - ], + "outputs": [], "source": [ "#Features between 0 and 1: \n", "def write_generator_bpic_experiment(objectives, n_para_obj=2):\n", @@ -2901,7 +916,7 @@ " experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n", " experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n", " for exp in experiments:\n", - " experiment_path = os.path.join('..','data', '34_bpic_features')\n", + " experiment_path = os.path.join('..','data', 'BaselineED_feat')\n", " os.makedirs(experiment_path, exist_ok=True)\n", " experiment_path = os.path.join(experiment_path, f\"34bpic_{len(exp)}objectives_{abbrev_obj_keys(exp)}.csv\") \n", "\n", @@ -2914,7 +929,7 @@ " 'pipeline_step': 'event_logs_generation',\n", " 'output_path':'output/generated',\n", " 'generator_params': {\n", - " \"experiment\": {\"input_path\": \"data/34_bpic_features.csv\",\n", + " \"experiment\": {\"input_path\": \"data/BaselineED_feat.csv\",\n", " \"objectives\": exp},\n", " 'config_space': {\n", " 'mode': [5, 20],\n", @@ -2933,15 +948,15 @@ " },\n", " {\n", " 'pipeline_step': 'feature_extraction',\n", - " 'input_path': os.path.join('output', 'features', 'generated', '34_bpic_features', second_dir),\n", + " 'input_path': os.path.join('output', 'features', 'generated', 'BaselineED_feat', second_dir),\n", " 'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n", " 'output_path': 'output/plots',\n", - " 'real_eventlog_path': 'data/34_bpic_features.csv',\n", + " 'real_eventlog_path': 'data/BaselineED_feat.csv',\n", " 'plot_type': 'boxplot'\n", " }\n", " ]\n", "\n", - " output_path = os.path.join('..', 'config_files','algorithm','34_bpic_features')\n", + " output_path = os.path.join('..', 'config_files','algorithm','BaselineED_feat')\n", " os.makedirs(output_path, exist_ok=True)\n", " output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n", "\n", @@ -2975,34 +990,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "d759a677", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "7 experiments: [('epa_normalized_sequence_entropy_exponential_forgetting',), ('epa_normalized_variant_entropy',), ('ratio_top_10_variants',), ('epa_normalized_sequence_entropy',), ('epa_normalized_sequence_entropy_linear_forgetting',), ('ratio_most_common_variant',), ('ratio_unique_traces_per_trace',)]\n", - "11\n", - "Saved experiment in ../data/grid_experiments/grid_1objectives_enseef.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enseef.json\n", - "Saved experiment in ../data/grid_experiments/grid_1objectives_enve.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enve.json\n", - "Saved experiment in ../data/grid_experiments/grid_1objectives_rt10v.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rt10v.json\n", - "Saved experiment in ../data/grid_experiments/grid_1objectives_ense.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_ense.json\n", - "Saved experiment in ../data/grid_experiments/grid_1objectives_enself.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enself.json\n", - "Saved experiment in ../data/grid_experiments/grid_1objectives_rmcv.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rmcv.json\n", - "Saved experiment in ../data/grid_experiments/grid_1objectives_rutpt.csv\n", - "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rutpt.json\n", - "None\n" - ] - } - ], + "outputs": [], "source": [ "def write_single_objective_experiment(experiment_path, objectives=[\"ratio_top_20_variants\", \"epa_normalized_sequence_entropy_linear_forgetting\"]):\n", " first_dir = os.path.split(experiment_path[3:])[-1].replace(\".csv\",\"\")\n", @@ -3035,7 +1026,7 @@ " 'input_path': os.path.join('output','features', 'generated', 'grid_1obj', first_dir, second_dir),\n", " 'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n", " 'output_path': 'output/plots',\n", - " 'real_eventlog_path': 'data/34_bpic_features.csv',\n", + " 'real_eventlog_path': 'data/BaselineED_feat.csv',\n", " 'plot_type': 'boxplot'\n", " }\n", " ]\n", @@ -3102,7 +1093,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" + "version": "3.9.7" } }, "nbformat": 4,