diff --git "a/notebooks/experiment_generator.ipynb" "b/notebooks/experiment_generator.ipynb"
--- "a/notebooks/experiment_generator.ipynb"
+++ "b/notebooks/experiment_generator.ipynb"
@@ -36,7 +36,7 @@
"outputs": [],
"source": [
"#Features between 0 and 1: \n",
- "normalized_feature_names = ['ratio_unique_traces_per_trace', 'trace_len_hist1', 'trace_len_hist2',\n",
+ "normalized_feature_names = ['ratio_variants_per_number_of_traces', 'trace_len_hist1', 'trace_len_hist2',\n",
" 'trace_len_hist3', 'trace_len_hist4', 'trace_len_hist5', 'trace_len_hist7',\n",
" 'trace_len_hist8', 'trace_len_hist9', 'ratio_most_common_variant', \n",
" 'ratio_top_1_variants', 'ratio_top_5_variants', 'ratio_top_10_variants', \n",
@@ -44,7 +44,7 @@
" 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n",
" 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n",
"\n",
- "normalized_feature_names = ['ratio_unique_traces_per_trace', 'ratio_most_common_variant', \n",
+ "normalized_feature_names = ['ratio_variants_per_number_of_traces', 'ratio_most_common_variant', \n",
" 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n",
" 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n",
"\n",
@@ -64,7 +64,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 3,
"id": "2be119c8",
"metadata": {},
"outputs": [
@@ -72,50 +72,50 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "21 [('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_unique_traces_per_trace'), ('ratio_top_10_variants', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy', 'ratio_most_common_variant'), ('ratio_most_common_variant', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_most_common_variant'), ('epa_normalized_variant_entropy', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy', 'ratio_top_10_variants'), ('ratio_most_common_variant', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_most_common_variant'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_variant_entropy', 'ratio_unique_traces_per_trace'), ('epa_normalized_variant_entropy', 'ratio_most_common_variant'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_exponential_forgetting'), ('epa_normalized_sequence_entropy', 'ratio_unique_traces_per_trace')]\n",
+ "21 [('epa_normalized_sequence_entropy', 'ratio_most_common_variant'), ('epa_normalized_variant_entropy', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_variants_per_number_of_traces'), ('ratio_most_common_variant', 'ratio_variants_per_number_of_traces'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_most_common_variant'), ('ratio_top_10_variants', 'ratio_variants_per_number_of_traces'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_exponential_forgetting'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_variants_per_number_of_traces'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_most_common_variant'), ('ratio_most_common_variant', 'ratio_top_10_variants'), ('epa_normalized_variant_entropy', 'ratio_variants_per_number_of_traces'), ('epa_normalized_variant_entropy', 'ratio_most_common_variant'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy', 'ratio_variants_per_number_of_traces'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_top_10_variants')]\n",
"121\n",
- "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rt10v.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rt10v.json\n",
- "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rutpt.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rutpt.json\n",
- "Saved experiment in ../data/grid_2obj/grid_2objectives_rt10v_rutpt.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rt10v_rutpt.json\n",
"Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rmcv.csv\n",
"Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rmcv.json\n",
- "Saved experiment in ../data/grid_2obj/grid_2objectives_rmcv_rt10v.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rt10v.json\n",
- "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enself.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enself.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rt10v.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rt10v.json\n",
"Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enve.csv\n",
"Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enve.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rt10v.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rt10v.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rvpnot.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rvpnot.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_rmcv_rvpnot.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rvpnot.json\n",
"Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rmcv.csv\n",
"Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rmcv.json\n",
- "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rt10v.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rt10v.json\n",
- "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_enself.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enself.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_rt10v_rvpnot.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rt10v_rvpnot.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enself.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enself.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enseef.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enseef.json\n",
"Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_enve.csv\n",
"Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enve.json\n",
- "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rutpt.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rutpt.json\n",
- "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rt10v.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rt10v.json\n",
- "Saved experiment in ../data/grid_2obj/grid_2objectives_rmcv_rutpt.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rutpt.json\n",
- "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rmcv.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rmcv.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rvpnot.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rvpnot.json\n",
"Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rt10v.csv\n",
"Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rt10v.json\n",
- "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_enve.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_enve.json\n",
- "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rutpt.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rutpt.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rmcv.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rmcv.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_rmcv_rt10v.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rt10v.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rvpnot.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rvpnot.json\n",
"Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rmcv.csv\n",
"Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rmcv.json\n",
- "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enseef.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enseef.json\n",
- "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rutpt.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rutpt.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_enself.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enself.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_enve.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_enve.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rvpnot.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rvpnot.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rt10v.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rt10v.json\n",
"None\n"
]
}
@@ -152,7 +152,7 @@
" 'input_path': os.path.join('output','features', 'generated', 'grid_2obj', first_dir, second_dir),\n",
" 'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n",
" 'output_path': 'output/plots',\n",
- " 'real_eventlog_path': 'data/34_bpic_features.csv',\n",
+ " 'real_eventlog_path': 'data/BaselineED_feat.csv',\n",
" 'plot_type': 'boxplot'\n",
" }\n",
" ]\n",
@@ -204,7 +204,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 4,
"id": "dfd1a302",
"metadata": {},
"outputs": [],
@@ -214,7 +214,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 5,
"id": "218946b7",
"metadata": {},
"outputs": [],
@@ -234,7 +234,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 6,
"id": "b1e3bb5a",
"metadata": {},
"outputs": [],
@@ -244,7 +244,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 7,
"id": "5de45389",
"metadata": {},
"outputs": [
@@ -271,7 +271,7 @@
"
| \n",
" log | \n",
" ratio_top_20_variants | \n",
- " normalized_sequence_entropy_linear_forgetting | \n",
+ " epa_normalized_sequence_entropy_linear_forgetting | \n",
" \n",
" \n",
" \n",
@@ -534,46 +534,46 @@
"34 objective_35 1.0 \n",
"35 objective_36 1.0 \n",
"\n",
- " normalized_sequence_entropy_linear_forgetting \n",
- "0 0.0 \n",
- "1 0.1 \n",
- "2 0.2 \n",
- "3 0.3 \n",
- "4 0.4 \n",
- "5 0.5 \n",
- "6 0.0 \n",
- "7 0.1 \n",
- "8 0.2 \n",
- "9 0.3 \n",
- "10 0.4 \n",
- "11 0.5 \n",
- "12 0.0 \n",
- "13 0.1 \n",
- "14 0.2 \n",
- "15 0.3 \n",
- "16 0.4 \n",
- "17 0.5 \n",
- "18 0.0 \n",
- "19 0.1 \n",
- "20 0.2 \n",
- "21 0.3 \n",
- "22 0.4 \n",
- "23 0.5 \n",
- "24 0.0 \n",
- "25 0.1 \n",
- "26 0.2 \n",
- "27 0.3 \n",
- "28 0.4 \n",
- "29 0.5 \n",
- "30 0.0 \n",
- "31 0.1 \n",
- "32 0.2 \n",
- "33 0.3 \n",
- "34 0.4 \n",
- "35 0.5 "
+ " epa_normalized_sequence_entropy_linear_forgetting \n",
+ "0 0.0 \n",
+ "1 0.1 \n",
+ "2 0.2 \n",
+ "3 0.3 \n",
+ "4 0.4 \n",
+ "5 0.5 \n",
+ "6 0.0 \n",
+ "7 0.1 \n",
+ "8 0.2 \n",
+ "9 0.3 \n",
+ "10 0.4 \n",
+ "11 0.5 \n",
+ "12 0.0 \n",
+ "13 0.1 \n",
+ "14 0.2 \n",
+ "15 0.3 \n",
+ "16 0.4 \n",
+ "17 0.5 \n",
+ "18 0.0 \n",
+ "19 0.1 \n",
+ "20 0.2 \n",
+ "21 0.3 \n",
+ "22 0.4 \n",
+ "23 0.5 \n",
+ "24 0.0 \n",
+ "25 0.1 \n",
+ "26 0.2 \n",
+ "27 0.3 \n",
+ "28 0.4 \n",
+ "29 0.5 \n",
+ "30 0.0 \n",
+ "31 0.1 \n",
+ "32 0.2 \n",
+ "33 0.3 \n",
+ "34 0.4 \n",
+ "35 0.5 "
]
},
- "execution_count": 6,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -593,7 +593,7 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 8,
"id": "39ac74bb",
"metadata": {},
"outputs": [
@@ -646,164 +646,7 @@
" trace_len_median | \n",
" trace_len_mode | \n",
" trace_len_std | \n",
- " trace_len_variance | \n",
- " trace_len_q1 | \n",
- " trace_len_q3 | \n",
- " trace_len_iqr | \n",
- " trace_len_geometric_mean | \n",
- " trace_len_geometric_std | \n",
- " trace_len_harmonic_mean | \n",
- " trace_len_skewness | \n",
- " trace_len_kurtosis | \n",
- " trace_len_coefficient_variation | \n",
- " trace_len_entropy | \n",
- " trace_len_hist1 | \n",
- " trace_len_hist2 | \n",
- " trace_len_hist3 | \n",
- " trace_len_hist4 | \n",
- " trace_len_hist5 | \n",
- " trace_len_hist6 | \n",
- " trace_len_hist7 | \n",
- " trace_len_hist8 | \n",
- " trace_len_hist9 | \n",
- " trace_len_hist10 | \n",
- " trace_len_skewness_hist | \n",
- " trace_len_kurtosis_hist | \n",
- " ratio_most_common_variant | \n",
- " ratio_top_1_variants | \n",
- " ratio_top_5_variants | \n",
- " ratio_top_10_variants | \n",
- " ratio_top_20_variants | \n",
- " ratio_top_50_variants | \n",
- " ratio_top_75_variants | \n",
- " mean_variant_occurrence | \n",
- " std_variant_occurrence | \n",
- " skewness_variant_occurrence | \n",
- " kurtosis_variant_occurrence | \n",
- " n_unique_activities | \n",
- " activities_min | \n",
- " activities_max | \n",
- " activities_mean | \n",
- " activities_median | \n",
- " activities_std | \n",
- " activities_variance | \n",
- " activities_q1 | \n",
- " activities_q3 | \n",
- " activities_iqr | \n",
- " activities_skewness | \n",
- " activities_kurtosis | \n",
- " n_unique_start_activities | \n",
- " start_activities_min | \n",
- " start_activities_max | \n",
- " start_activities_mean | \n",
- " start_activities_median | \n",
- " start_activities_std | \n",
- " start_activities_variance | \n",
- " start_activities_q1 | \n",
- " start_activities_q3 | \n",
- " start_activities_iqr | \n",
- " start_activities_skewness | \n",
- " start_activities_kurtosis | \n",
- " n_unique_end_activities | \n",
- " end_activities_min | \n",
- " end_activities_max | \n",
- " end_activities_mean | \n",
- " end_activities_median | \n",
- " end_activities_std | \n",
- " end_activities_variance | \n",
- " end_activities_q1 | \n",
- " end_activities_q3 | \n",
- " end_activities_iqr | \n",
- " end_activities_skewness | \n",
- " end_activities_kurtosis | \n",
- " entropy_trace | \n",
- " entropy_prefix | \n",
- " entropy_global_block | \n",
- " entropy_lempel_ziv | \n",
- " entropy_k_block_diff_1 | \n",
- " entropy_k_block_diff_3 | \n",
- " entropy_k_block_diff_5 | \n",
- " entropy_k_block_ratio_1 | \n",
- " entropy_k_block_ratio_3 | \n",
- " entropy_k_block_ratio_5 | \n",
- " entropy_knn_3 | \n",
- " entropy_knn_5 | \n",
- " entropy_knn_7 | \n",
- " Log Nature | \n",
- " epa_variant_entropy | \n",
- " epa_normalized_variant_entropy | \n",
- " epa_sequence_entropy | \n",
- " epa_normalized_sequence_entropy | \n",
- " epa_sequence_entropy_linear_forgetting | \n",
- " epa_normalized_sequence_entropy_linear_forgetting | \n",
- " epa_sequence_entropy_exponential_forgetting | \n",
- " epa_normalized_sequence_entropy_exponential_forgetting | \n",
- " accumulated_time_time_min | \n",
- " accumulated_time_time_max | \n",
- " accumulated_time_time_mean | \n",
- " accumulated_time_time_median | \n",
- " accumulated_time_time_mode | \n",
- " accumulated_time_time_std | \n",
- " accumulated_time_time_variance | \n",
- " accumulated_time_time_q1 | \n",
- " accumulated_time_time_q3 | \n",
- " accumulated_time_time_iqr | \n",
- " accumulated_time_time_geometric_mean | \n",
- " accumulated_time_time_geometric_std | \n",
- " accumulated_time_time_harmonic_mean | \n",
- " accumulated_time_time_skewness | \n",
- " accumulated_time_time_kurtosis | \n",
- " accumulated_time_time_coefficient_variation | \n",
- " accumulated_time_time_entropy | \n",
- " accumulated_time_time_skewness_hist | \n",
- " accumulated_time_time_kurtosis_hist | \n",
- " execution_time_time_min | \n",
- " execution_time_time_max | \n",
- " execution_time_time_mean | \n",
- " execution_time_time_median | \n",
- " execution_time_time_mode | \n",
- " execution_time_time_std | \n",
- " execution_time_time_variance | \n",
- " execution_time_time_q1 | \n",
- " execution_time_time_q3 | \n",
- " execution_time_time_iqr | \n",
- " execution_time_time_geometric_mean | \n",
- " execution_time_time_geometric_std | \n",
- " execution_time_time_harmonic_mean | \n",
- " execution_time_time_skewness | \n",
- " execution_time_time_kurtosis | \n",
- " execution_time_time_coefficient_variation | \n",
- " execution_time_time_entropy | \n",
- " execution_time_time_skewness_hist | \n",
- " execution_time_time_kurtosis_hist | \n",
- " remaining_time_time_min | \n",
- " remaining_time_time_max | \n",
- " remaining_time_time_mean | \n",
- " remaining_time_time_median | \n",
- " remaining_time_time_mode | \n",
- " remaining_time_time_std | \n",
- " remaining_time_time_variance | \n",
- " remaining_time_time_q1 | \n",
- " remaining_time_time_q3 | \n",
- " remaining_time_time_iqr | \n",
- " remaining_time_time_geometric_mean | \n",
- " remaining_time_time_geometric_std | \n",
- " remaining_time_time_harmonic_mean | \n",
- " remaining_time_time_skewness | \n",
- " remaining_time_time_kurtosis | \n",
- " remaining_time_time_coefficient_variation | \n",
- " remaining_time_time_entropy | \n",
- " remaining_time_time_skewness_hist | \n",
- " remaining_time_time_kurtosis_hist | \n",
- " within_day_time_min | \n",
- " within_day_time_max | \n",
- " within_day_time_mean | \n",
- " within_day_time_median | \n",
- " within_day_time_mode | \n",
- " within_day_time_std | \n",
- " within_day_time_variance | \n",
- " within_day_time_q1 | \n",
- " within_day_time_q3 | \n",
+ " ... | \n",
" within_day_time_iqr | \n",
" within_day_time_geometric_mean | \n",
" within_day_time_geometric_std | \n",
@@ -829,164 +672,7 @@
" 54.0 | \n",
" 61 | \n",
" 19.894977 | \n",
- " 395.810090 | \n",
- " 44.0 | \n",
- " 62.0 | \n",
- " 18.0 | \n",
- " 48.150111 | \n",
- " 1.695311 | \n",
- " 37.583741 | \n",
- " 0.054138 | \n",
- " 0.804992 | \n",
- " 0.373193 | \n",
- " 6.646715 | \n",
- " 0.003853 | \n",
- " 0.004863 | \n",
- " 4.679243e-03 | \n",
- " 0.023947 | \n",
- " 2.376321e-02 | \n",
- " 8.257487e-03 | \n",
- " 0.004771 | \n",
- " 1.376248e-03 | \n",
- " 6.422490e-04 | \n",
- " 1.834997e-04 | \n",
- " 0.054138 | \n",
- " 0.804992 | \n",
- " 0.002404 | \n",
- " 0.014423 | \n",
- " 0.054087 | \n",
- " 0.103365 | \n",
- " 0.203125 | \n",
- " 0.502404 | \n",
- " 0.751202 | \n",
- " 1.004831 | \n",
- " 0.069337 | \n",
- " 14.283027 | \n",
- " 202.004854 | \n",
- " 410 | \n",
- " 1 | \n",
- " 830 | \n",
- " 108.180488 | \n",
- " 12.0 | \n",
- " 187.588162 | \n",
- " 3.518932e+04 | \n",
- " 3.0 | \n",
- " 125.5 | \n",
- " 122.5 | \n",
- " 2.129412 | \n",
- " 3.808278 | \n",
- " 14 | \n",
- " 1 | \n",
- " 731 | \n",
- " 59.428571 | \n",
- " 1.0 | \n",
- " 186.717401 | \n",
- " 3.486339e+04 | \n",
- " 1.0 | \n",
- " 8.25 | \n",
- " 7.25 | \n",
- " 3.300411 | \n",
- " 8.960767 | \n",
- " 82 | \n",
- " 1 | \n",
- " 216 | \n",
- " 10.146341 | \n",
- " 1.0 | \n",
- " 35.318800 | \n",
- " 1.247418e+03 | \n",
- " 1.00 | \n",
- " 3.00 | \n",
- " 2.00 | \n",
- " 5.098791 | \n",
- " 25.861991 | \n",
- " 9.691 | \n",
- " 14.524 | \n",
- " 19.448 | \n",
- " 3.859 | \n",
- " 7.105 | \n",
- " 7.105 | \n",
- " 7.105 | \n",
- " 7.105 | \n",
- " 7.105 | \n",
- " 7.105 | \n",
- " 5.545 | \n",
- " 5.039 | \n",
- " 4.721 | \n",
- " Real | \n",
- " 2.405122e+05 | \n",
- " 0.627973 | \n",
- " 2.858769e+05 | \n",
- " 0.602371 | \n",
- " 1.505466e+05 | \n",
- " 0.317217 | \n",
- " 1.853129e+05 | \n",
- " 0.390473 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
+ " ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1010,164 +696,7 @@
" 49.0 | \n",
" 49 | \n",
" 34.872131 | \n",
- " 1216.065487 | \n",
- " 44.0 | \n",
- " 59.0 | \n",
- " 15.0 | \n",
- " 53.775008 | \n",
- " 1.367397 | \n",
- " 51.651502 | \n",
- " 26.126459 | \n",
- " 1720.399665 | \n",
- " 0.607618 | \n",
- " 10.598758 | \n",
- " 0.003385 | \n",
- " 0.000005 | \n",
- " 9.288448e-07 | \n",
- " 0.000000 | \n",
- " 0.000000e+00 | \n",
- " 0.000000e+00 | \n",
- " 0.000000 | \n",
- " 0.000000e+00 | \n",
- " 7.740373e-08 | \n",
- " 7.740373e-08 | \n",
- " 26.126459 | \n",
- " 1720.399665 | \n",
- " 0.026981 | \n",
- " 0.290374 | \n",
- " 0.373006 | \n",
- " 0.415371 | \n",
- " 0.480335 | \n",
- " 0.675204 | \n",
- " 0.837590 | \n",
- " 1.539481 | \n",
- " 12.487438 | \n",
- " 64.625680 | \n",
- " 5083.455806 | \n",
- " 41 | \n",
- " 17 | \n",
- " 466141 | \n",
- " 61323.560976 | \n",
- " 7530.0 | \n",
- " 120522.247417 | \n",
- " 1.452561e+10 | \n",
- " 902.0 | \n",
- " 45907.0 | \n",
- " 45005.0 | \n",
- " 2.444007 | \n",
- " 4.773254 | \n",
- " 4 | \n",
- " 2 | \n",
- " 38623 | \n",
- " 10952.250000 | \n",
- " 2592.0 | \n",
- " 16111.407548 | \n",
- " 2.595775e+08 | \n",
- " 36.5 | \n",
- " 13507.75 | \n",
- " 13471.25 | \n",
- " 1.098736 | \n",
- " -0.714800 | \n",
- " 21 | \n",
- " 1 | \n",
- " 34830 | \n",
- " 2086.142857 | \n",
- " 13.0 | \n",
- " 7431.744981 | \n",
- " 5.523083e+07 | \n",
- " 2.00 | \n",
- " 193.00 | \n",
- " 191.00 | \n",
- " 4.062387 | \n",
- " 14.952824 | \n",
- " 13.191 | \n",
- " 16.272 | \n",
- " 20.972 | \n",
- " 1.023 | \n",
- " -0.010 | \n",
- " 1.855 | \n",
- " 0.511 | \n",
- " 1.403 | \n",
- " 3.572 | \n",
- " 2.001 | \n",
- " 7.849 | \n",
- " 7.371 | \n",
- " 7.067 | \n",
- " Real | \n",
- " 1.156384e+07 | \n",
- " 0.712079 | \n",
- " 2.114626e+07 | \n",
- " 0.570688 | \n",
- " 1.414023e+07 | \n",
- " 0.381612 | \n",
- " 1.557608e+07 | \n",
- " 0.420362 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
+ " ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1191,506 +720,7 @@
" 6.0 | \n",
" 6 | \n",
" 2.166129 | \n",
- " 4.692114 | \n",
- " 6.0 | \n",
- " 6.0 | \n",
- " 0.0 | \n",
- " 5.414708 | \n",
- " 1.704965 | \n",
- " 4.356445 | \n",
- " 1.276525 | \n",
- " 12.296006 | \n",
- " 0.362158 | \n",
- " 7.197193 | \n",
- " 0.036030 | \n",
- " 0.008136 | \n",
- " 3.411204e-01 | \n",
- " 0.023536 | \n",
- " 3.777313e-03 | \n",
- " 1.743375e-03 | \n",
- " 0.000291 | \n",
- " 1.452813e-03 | \n",
- " 0.000000e+00 | \n",
- " 5.811251e-04 | \n",
- " 1.276525 | \n",
- " 12.296006 | \n",
- " 0.497211 | \n",
- " 0.497211 | \n",
- " 0.796374 | \n",
- " 0.887029 | \n",
- " 0.930265 | \n",
- " 0.959554 | \n",
- " 0.979777 | \n",
- " 12.362069 | \n",
- " 68.360277 | \n",
- " 9.380687 | \n",
- " 92.281919 | \n",
- " 27 | \n",
- " 1 | \n",
- " 1434 | \n",
- " 317.666667 | \n",
- " 27.0 | \n",
- " 553.389823 | \n",
- " 3.062403e+05 | \n",
- " 8.0 | \n",
- " 50.0 | \n",
- " 42.0 | \n",
- " 1.342951 | \n",
- " -0.178094 | \n",
- " 1 | \n",
- " 1434 | \n",
- " 1434 | \n",
- " 1434.000000 | \n",
- " 1434.0 | \n",
- " 0.000000 | \n",
- " 0.000000e+00 | \n",
- " 1434.0 | \n",
- " 1434.00 | \n",
- " 0.00 | \n",
- " NaN | \n",
- " NaN | \n",
- " 14 | \n",
- " 1 | \n",
- " 828 | \n",
- " 102.428571 | \n",
- " 6.0 | \n",
- " 225.871555 | \n",
- " 5.101796e+04 | \n",
- " 1.25 | \n",
- " 33.25 | \n",
- " 32.00 | \n",
- " 2.471765 | \n",
- " 4.846541 | \n",
- " 3.209 | \n",
- " 4.746 | \n",
- " 7.019 | \n",
- " 0.385 | \n",
- " 2.672 | \n",
- " 2.966 | \n",
- " 0.804 | \n",
- " 1.484 | \n",
- " 2.966 | \n",
- " 2.966 | \n",
- " 3.260 | \n",
- " 2.845 | \n",
- " 2.584 | \n",
- " Real | \n",
- " 2.382326e+03 | \n",
- " 0.689363 | \n",
- " 1.829627e+04 | \n",
- " 0.235532 | \n",
- " 7.814868e+03 | \n",
- " 0.100603 | \n",
- " 1.072870e+04 | \n",
- " 0.138113 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " \n",
- " \n",
- " 3 | \n",
- " BPIC15_3 | \n",
- " 1409 | \n",
- " 1349 | \n",
- " 0.957417 | \n",
- " 3 | \n",
- " 124 | \n",
- " 42.356991 | \n",
- " 42.0 | \n",
- " 44 | \n",
- " 16.138406 | \n",
- " 260.448143 | \n",
- " 38.0 | \n",
- " 47.0 | \n",
- " 9.0 | \n",
- " 37.637731 | \n",
- " 1.786726 | \n",
- " 29.092933 | \n",
- " -0.009541 | \n",
- " 1.543369 | \n",
- " 0.381009 | \n",
- " 7.167153 | \n",
- " 0.006921 | \n",
- " 0.004340 | \n",
- " 1.630604e-02 | \n",
- " 0.036953 | \n",
- " 1.173096e-02 | \n",
- " 4.105837e-03 | \n",
- " 0.001584 | \n",
- " 5.278933e-04 | \n",
- " 1.173096e-04 | \n",
- " 5.865481e-05 | \n",
- " -0.009541 | \n",
- " 1.543369 | \n",
- " 0.010646 | \n",
- " 0.049681 | \n",
- " 0.090135 | \n",
- " 0.137686 | \n",
- " 0.233499 | \n",
- " 0.520937 | \n",
- " 0.760114 | \n",
- " 1.044477 | \n",
- " 0.592348 | \n",
- " 17.964130 | \n",
- " 358.019511 | \n",
- " 383 | \n",
- " 1 | \n",
- " 1409 | \n",
- " 155.825065 | \n",
- " 16.0 | \n",
- " 306.310544 | \n",
- " 9.382615e+04 | \n",
- " 5.0 | \n",
- " 108.5 | \n",
- " 103.5 | \n",
- " 2.446349 | \n",
- " 5.280931 | \n",
- " 9 | \n",
- " 1 | \n",
- " 1348 | \n",
- " 156.555556 | \n",
- " 8.0 | \n",
- " 421.270858 | \n",
- " 1.774691e+05 | \n",
- " 3.0 | \n",
- " 14.00 | \n",
- " 11.00 | \n",
- " 2.474158 | \n",
- " 4.122971 | \n",
- " 119 | \n",
- " 1 | \n",
- " 342 | \n",
- " 11.840336 | \n",
- " 2.0 | \n",
- " 39.557210 | \n",
- " 1.564773e+03 | \n",
- " 1.00 | \n",
- " 7.00 | \n",
- " 6.00 | \n",
- " 6.217217 | \n",
- " 43.335525 | \n",
- " 10.317 | \n",
- " 14.226 | \n",
- " 18.743 | \n",
- " 3.182 | \n",
- " -0.007 | \n",
- " 6.780 | \n",
- " 6.780 | \n",
- " 6.780 | \n",
- " 6.780 | \n",
- " 6.780 | \n",
- " 5.701 | \n",
- " 5.212 | \n",
- " 4.900 | \n",
- " Real | \n",
- " 2.981464e+05 | \n",
- " 0.661781 | \n",
- " 3.975043e+05 | \n",
- " 0.605676 | \n",
- " 2.241393e+05 | \n",
- " 0.341521 | \n",
- " 2.657571e+05 | \n",
- " 0.404934 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " BPI_Challenge_2019 | \n",
- " 251734 | \n",
- " 11973 | \n",
- " 0.047562 | \n",
- " 1 | \n",
- " 990 | \n",
- " 6.339720 | \n",
- " 5.0 | \n",
- " 5 | \n",
- " 13.057417 | \n",
- " 170.496137 | \n",
- " 5.0 | \n",
- " 6.0 | \n",
- " 1.0 | \n",
- " 5.173569 | \n",
- " 1.635822 | \n",
- " 4.592844 | \n",
- " 22.132989 | \n",
- " 753.772202 | \n",
- " 2.059621 | \n",
- " 12.044057 | \n",
- " 0.010078 | \n",
- " 0.000020 | \n",
- " 9.559579e-06 | \n",
- " 0.000003 | \n",
- " 3.614967e-07 | \n",
- " 1.606652e-07 | \n",
- " 0.000000 | \n",
- " 4.016630e-08 | \n",
- " 8.033260e-08 | \n",
- " 8.033260e-08 | \n",
- " 22.132989 | \n",
- " 753.772202 | \n",
- " 0.199758 | \n",
- " 0.871424 | \n",
- " 0.929990 | \n",
- " 0.946368 | \n",
- " 0.959767 | \n",
- " 0.976217 | \n",
- " 0.988106 | \n",
- " 21.025140 | \n",
- " 594.255619 | \n",
- " 64.772702 | \n",
- " 4917.319751 | \n",
- " 42 | \n",
- " 2 | \n",
- " 314097 | \n",
- " 37998.166667 | \n",
- " 1628.0 | \n",
- " 80833.669206 | \n",
- " 6.534082e+09 | \n",
- " 202.0 | \n",
- " 11536.0 | \n",
- " 11334.0 | \n",
- " 2.169648 | \n",
- " 3.263594 | \n",
- " 8 | \n",
- " 2 | \n",
- " 199867 | \n",
- " 31466.750000 | \n",
- " 869.0 | \n",
- " 65387.493286 | \n",
- " 4.275524e+09 | \n",
- " 97.0 | \n",
- " 14224.25 | \n",
- " 14127.25 | \n",
- " 2.059742 | \n",
- " 2.535789 | \n",
- " 32 | \n",
- " 1 | \n",
- " 181328 | \n",
- " 7866.687500 | \n",
- " 64.5 | \n",
- " 31658.428996 | \n",
- " 1.002256e+09 | \n",
- " 9.00 | \n",
- " 1027.25 | \n",
- " 1018.25 | \n",
- " 5.135607 | \n",
- " 25.170543 | \n",
- " 6.243 | \n",
- " 8.811 | \n",
- " 19.447 | \n",
- " 0.346 | \n",
- " -0.041 | \n",
- " 1.530 | \n",
- " 0.840 | \n",
- " 0.620 | \n",
- " 3.244 | \n",
- " 1.913 | \n",
- " 7.333 | \n",
- " 6.882 | \n",
- " 6.601 | \n",
- " Real | \n",
- " 1.690369e+06 | \n",
- " 0.645530 | \n",
- " 7.477256e+06 | \n",
- " 0.328029 | \n",
- " 7.298458e+06 | \n",
- " 0.320185 | \n",
- " 7.300663e+06 | \n",
- " 0.320282 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
+ " ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1701,6 +731,20 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " BPIC15_3 | \n",
+ " 1409 | \n",
+ " 1349 | \n",
+ " 0.957417 | \n",
+ " 3 | \n",
+ " 124 | \n",
+ " 42.356991 | \n",
+ " 42.0 | \n",
+ " 44 | \n",
+ " 16.138406 | \n",
+ " ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1711,6 +755,20 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " BPI_Challenge_2019 | \n",
+ " 251734 | \n",
+ " 11973 | \n",
+ " 0.047562 | \n",
+ " 1 | \n",
+ " 990 | \n",
+ " 6.339720 | \n",
+ " 5.0 | \n",
+ " 5 | \n",
+ " 13.057417 | \n",
+ " ... | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
@@ -1724,6 +782,7 @@
"
\n",
" \n",
"\n",
+ "5 rows × 178 columns
\n",
""
],
"text/plain": [
@@ -1741,490 +800,58 @@
"3 0.957417 3 124 \n",
"4 0.047562 1 990 \n",
"\n",
- " trace_len_mean trace_len_median trace_len_mode trace_len_std \n",
- "0 53.310096 54.0 61 19.894977 \\\n",
- "1 57.391541 49.0 49 34.872131 \n",
- "2 5.981172 6.0 6 2.166129 \n",
- "3 42.356991 42.0 44 16.138406 \n",
- "4 6.339720 5.0 5 13.057417 \n",
- "\n",
- " trace_len_variance trace_len_q1 trace_len_q3 trace_len_iqr \n",
- "0 395.810090 44.0 62.0 18.0 \\\n",
- "1 1216.065487 44.0 59.0 15.0 \n",
- "2 4.692114 6.0 6.0 0.0 \n",
- "3 260.448143 38.0 47.0 9.0 \n",
- "4 170.496137 5.0 6.0 1.0 \n",
- "\n",
- " trace_len_geometric_mean trace_len_geometric_std trace_len_harmonic_mean \n",
- "0 48.150111 1.695311 37.583741 \\\n",
- "1 53.775008 1.367397 51.651502 \n",
- "2 5.414708 1.704965 4.356445 \n",
- "3 37.637731 1.786726 29.092933 \n",
- "4 5.173569 1.635822 4.592844 \n",
- "\n",
- " trace_len_skewness trace_len_kurtosis trace_len_coefficient_variation \n",
- "0 0.054138 0.804992 0.373193 \\\n",
- "1 26.126459 1720.399665 0.607618 \n",
- "2 1.276525 12.296006 0.362158 \n",
- "3 -0.009541 1.543369 0.381009 \n",
- "4 22.132989 753.772202 2.059621 \n",
- "\n",
- " trace_len_entropy trace_len_hist1 trace_len_hist2 trace_len_hist3 \n",
- "0 6.646715 0.003853 0.004863 4.679243e-03 \\\n",
- "1 10.598758 0.003385 0.000005 9.288448e-07 \n",
- "2 7.197193 0.036030 0.008136 3.411204e-01 \n",
- "3 7.167153 0.006921 0.004340 1.630604e-02 \n",
- "4 12.044057 0.010078 0.000020 9.559579e-06 \n",
- "\n",
- " trace_len_hist4 trace_len_hist5 trace_len_hist6 trace_len_hist7 \n",
- "0 0.023947 2.376321e-02 8.257487e-03 0.004771 \\\n",
- "1 0.000000 0.000000e+00 0.000000e+00 0.000000 \n",
- "2 0.023536 3.777313e-03 1.743375e-03 0.000291 \n",
- "3 0.036953 1.173096e-02 4.105837e-03 0.001584 \n",
- "4 0.000003 3.614967e-07 1.606652e-07 0.000000 \n",
- "\n",
- " trace_len_hist8 trace_len_hist9 trace_len_hist10 \n",
- "0 1.376248e-03 6.422490e-04 1.834997e-04 \\\n",
- "1 0.000000e+00 7.740373e-08 7.740373e-08 \n",
- "2 1.452813e-03 0.000000e+00 5.811251e-04 \n",
- "3 5.278933e-04 1.173096e-04 5.865481e-05 \n",
- "4 4.016630e-08 8.033260e-08 8.033260e-08 \n",
- "\n",
- " trace_len_skewness_hist trace_len_kurtosis_hist \n",
- "0 0.054138 0.804992 \\\n",
- "1 26.126459 1720.399665 \n",
- "2 1.276525 12.296006 \n",
- "3 -0.009541 1.543369 \n",
- "4 22.132989 753.772202 \n",
- "\n",
- " ratio_most_common_variant ratio_top_1_variants ratio_top_5_variants \n",
- "0 0.002404 0.014423 0.054087 \\\n",
- "1 0.026981 0.290374 0.373006 \n",
- "2 0.497211 0.497211 0.796374 \n",
- "3 0.010646 0.049681 0.090135 \n",
- "4 0.199758 0.871424 0.929990 \n",
- "\n",
- " ratio_top_10_variants ratio_top_20_variants ratio_top_50_variants \n",
- "0 0.103365 0.203125 0.502404 \\\n",
- "1 0.415371 0.480335 0.675204 \n",
- "2 0.887029 0.930265 0.959554 \n",
- "3 0.137686 0.233499 0.520937 \n",
- "4 0.946368 0.959767 0.976217 \n",
- "\n",
- " ratio_top_75_variants mean_variant_occurrence std_variant_occurrence \n",
- "0 0.751202 1.004831 0.069337 \\\n",
- "1 0.837590 1.539481 12.487438 \n",
- "2 0.979777 12.362069 68.360277 \n",
- "3 0.760114 1.044477 0.592348 \n",
- "4 0.988106 21.025140 594.255619 \n",
- "\n",
- " skewness_variant_occurrence kurtosis_variant_occurrence \n",
- "0 14.283027 202.004854 \\\n",
- "1 64.625680 5083.455806 \n",
- "2 9.380687 92.281919 \n",
- "3 17.964130 358.019511 \n",
- "4 64.772702 4917.319751 \n",
- "\n",
- " n_unique_activities activities_min activities_max activities_mean \n",
- "0 410 1 830 108.180488 \\\n",
- "1 41 17 466141 61323.560976 \n",
- "2 27 1 1434 317.666667 \n",
- "3 383 1 1409 155.825065 \n",
- "4 42 2 314097 37998.166667 \n",
- "\n",
- " activities_median activities_std activities_variance activities_q1 \n",
- "0 12.0 187.588162 3.518932e+04 3.0 \\\n",
- "1 7530.0 120522.247417 1.452561e+10 902.0 \n",
- "2 27.0 553.389823 3.062403e+05 8.0 \n",
- "3 16.0 306.310544 9.382615e+04 5.0 \n",
- "4 1628.0 80833.669206 6.534082e+09 202.0 \n",
- "\n",
- " activities_q3 activities_iqr activities_skewness activities_kurtosis \n",
- "0 125.5 122.5 2.129412 3.808278 \\\n",
- "1 45907.0 45005.0 2.444007 4.773254 \n",
- "2 50.0 42.0 1.342951 -0.178094 \n",
- "3 108.5 103.5 2.446349 5.280931 \n",
- "4 11536.0 11334.0 2.169648 3.263594 \n",
- "\n",
- " n_unique_start_activities start_activities_min start_activities_max \n",
- "0 14 1 731 \\\n",
- "1 4 2 38623 \n",
- "2 1 1434 1434 \n",
- "3 9 1 1348 \n",
- "4 8 2 199867 \n",
- "\n",
- " start_activities_mean start_activities_median start_activities_std \n",
- "0 59.428571 1.0 186.717401 \\\n",
- "1 10952.250000 2592.0 16111.407548 \n",
- "2 1434.000000 1434.0 0.000000 \n",
- "3 156.555556 8.0 421.270858 \n",
- "4 31466.750000 869.0 65387.493286 \n",
- "\n",
- " start_activities_variance start_activities_q1 start_activities_q3 \n",
- "0 3.486339e+04 1.0 8.25 \\\n",
- "1 2.595775e+08 36.5 13507.75 \n",
- "2 0.000000e+00 1434.0 1434.00 \n",
- "3 1.774691e+05 3.0 14.00 \n",
- "4 4.275524e+09 97.0 14224.25 \n",
- "\n",
- " start_activities_iqr start_activities_skewness start_activities_kurtosis \n",
- "0 7.25 3.300411 8.960767 \\\n",
- "1 13471.25 1.098736 -0.714800 \n",
- "2 0.00 NaN NaN \n",
- "3 11.00 2.474158 4.122971 \n",
- "4 14127.25 2.059742 2.535789 \n",
- "\n",
- " n_unique_end_activities end_activities_min end_activities_max \n",
- "0 82 1 216 \\\n",
- "1 21 1 34830 \n",
- "2 14 1 828 \n",
- "3 119 1 342 \n",
- "4 32 1 181328 \n",
- "\n",
- " end_activities_mean end_activities_median end_activities_std \n",
- "0 10.146341 1.0 35.318800 \\\n",
- "1 2086.142857 13.0 7431.744981 \n",
- "2 102.428571 6.0 225.871555 \n",
- "3 11.840336 2.0 39.557210 \n",
- "4 7866.687500 64.5 31658.428996 \n",
- "\n",
- " end_activities_variance end_activities_q1 end_activities_q3 \n",
- "0 1.247418e+03 1.00 3.00 \\\n",
- "1 5.523083e+07 2.00 193.00 \n",
- "2 5.101796e+04 1.25 33.25 \n",
- "3 1.564773e+03 1.00 7.00 \n",
- "4 1.002256e+09 9.00 1027.25 \n",
- "\n",
- " end_activities_iqr end_activities_skewness end_activities_kurtosis \n",
- "0 2.00 5.098791 25.861991 \\\n",
- "1 191.00 4.062387 14.952824 \n",
- "2 32.00 2.471765 4.846541 \n",
- "3 6.00 6.217217 43.335525 \n",
- "4 1018.25 5.135607 25.170543 \n",
- "\n",
- " entropy_trace entropy_prefix entropy_global_block entropy_lempel_ziv \n",
- "0 9.691 14.524 19.448 3.859 \\\n",
- "1 13.191 16.272 20.972 1.023 \n",
- "2 3.209 4.746 7.019 0.385 \n",
- "3 10.317 14.226 18.743 3.182 \n",
- "4 6.243 8.811 19.447 0.346 \n",
- "\n",
- " entropy_k_block_diff_1 entropy_k_block_diff_3 entropy_k_block_diff_5 \n",
- "0 7.105 7.105 7.105 \\\n",
- "1 -0.010 1.855 0.511 \n",
- "2 2.672 2.966 0.804 \n",
- "3 -0.007 6.780 6.780 \n",
- "4 -0.041 1.530 0.840 \n",
- "\n",
- " entropy_k_block_ratio_1 entropy_k_block_ratio_3 entropy_k_block_ratio_5 \n",
- "0 7.105 7.105 7.105 \\\n",
- "1 1.403 3.572 2.001 \n",
- "2 1.484 2.966 2.966 \n",
- "3 6.780 6.780 6.780 \n",
- "4 0.620 3.244 1.913 \n",
- "\n",
- " entropy_knn_3 entropy_knn_5 entropy_knn_7 Log Nature \n",
- "0 5.545 5.039 4.721 Real \\\n",
- "1 7.849 7.371 7.067 Real \n",
- "2 3.260 2.845 2.584 Real \n",
- "3 5.701 5.212 4.900 Real \n",
- "4 7.333 6.882 6.601 Real \n",
- "\n",
- " epa_variant_entropy epa_normalized_variant_entropy epa_sequence_entropy \n",
- "0 2.405122e+05 0.627973 2.858769e+05 \\\n",
- "1 1.156384e+07 0.712079 2.114626e+07 \n",
- "2 2.382326e+03 0.689363 1.829627e+04 \n",
- "3 2.981464e+05 0.661781 3.975043e+05 \n",
- "4 1.690369e+06 0.645530 7.477256e+06 \n",
- "\n",
- " epa_normalized_sequence_entropy epa_sequence_entropy_linear_forgetting \n",
- "0 0.602371 1.505466e+05 \\\n",
- "1 0.570688 1.414023e+07 \n",
- "2 0.235532 7.814868e+03 \n",
- "3 0.605676 2.241393e+05 \n",
- "4 0.328029 7.298458e+06 \n",
- "\n",
- " epa_normalized_sequence_entropy_linear_forgetting \n",
- "0 0.317217 \\\n",
- "1 0.381612 \n",
- "2 0.100603 \n",
- "3 0.341521 \n",
- "4 0.320185 \n",
- "\n",
- " epa_sequence_entropy_exponential_forgetting \n",
- "0 1.853129e+05 \\\n",
- "1 1.557608e+07 \n",
- "2 1.072870e+04 \n",
- "3 2.657571e+05 \n",
- "4 7.300663e+06 \n",
- "\n",
- " epa_normalized_sequence_entropy_exponential_forgetting \n",
- "0 0.390473 \\\n",
- "1 0.420362 \n",
- "2 0.138113 \n",
- "3 0.404934 \n",
- "4 0.320282 \n",
- "\n",
- " accumulated_time_time_min accumulated_time_time_max \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " accumulated_time_time_mean accumulated_time_time_median \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " accumulated_time_time_mode accumulated_time_time_std \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " accumulated_time_time_variance accumulated_time_time_q1 \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " accumulated_time_time_q3 accumulated_time_time_iqr \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " accumulated_time_time_geometric_mean accumulated_time_time_geometric_std \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " accumulated_time_time_harmonic_mean accumulated_time_time_skewness \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " accumulated_time_time_kurtosis \n",
- "0 NaN \\\n",
- "1 NaN \n",
- "2 NaN \n",
- "3 NaN \n",
- "4 NaN \n",
- "\n",
- " accumulated_time_time_coefficient_variation accumulated_time_time_entropy \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " accumulated_time_time_skewness_hist accumulated_time_time_kurtosis_hist \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " execution_time_time_min execution_time_time_max execution_time_time_mean \n",
- "0 NaN NaN NaN \\\n",
- "1 NaN NaN NaN \n",
- "2 NaN NaN NaN \n",
- "3 NaN NaN NaN \n",
- "4 NaN NaN NaN \n",
- "\n",
- " execution_time_time_median execution_time_time_mode \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " execution_time_time_std execution_time_time_variance \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " execution_time_time_q1 execution_time_time_q3 execution_time_time_iqr \n",
- "0 NaN NaN NaN \\\n",
- "1 NaN NaN NaN \n",
- "2 NaN NaN NaN \n",
- "3 NaN NaN NaN \n",
- "4 NaN NaN NaN \n",
- "\n",
- " execution_time_time_geometric_mean execution_time_time_geometric_std \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " execution_time_time_harmonic_mean execution_time_time_skewness \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " execution_time_time_kurtosis execution_time_time_coefficient_variation \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " execution_time_time_entropy execution_time_time_skewness_hist \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " execution_time_time_kurtosis_hist remaining_time_time_min \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " remaining_time_time_max remaining_time_time_mean \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " remaining_time_time_median remaining_time_time_mode \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " remaining_time_time_std remaining_time_time_variance \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " remaining_time_time_q1 remaining_time_time_q3 remaining_time_time_iqr \n",
- "0 NaN NaN NaN \\\n",
- "1 NaN NaN NaN \n",
- "2 NaN NaN NaN \n",
- "3 NaN NaN NaN \n",
- "4 NaN NaN NaN \n",
- "\n",
- " remaining_time_time_geometric_mean remaining_time_time_geometric_std \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " remaining_time_time_harmonic_mean remaining_time_time_skewness \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " remaining_time_time_kurtosis remaining_time_time_coefficient_variation \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " remaining_time_time_entropy remaining_time_time_skewness_hist \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " remaining_time_time_kurtosis_hist within_day_time_min \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " within_day_time_max within_day_time_mean within_day_time_median \n",
- "0 NaN NaN NaN \\\n",
- "1 NaN NaN NaN \n",
- "2 NaN NaN NaN \n",
- "3 NaN NaN NaN \n",
- "4 NaN NaN NaN \n",
- "\n",
- " within_day_time_mode within_day_time_std within_day_time_variance \n",
- "0 NaN NaN NaN \\\n",
- "1 NaN NaN NaN \n",
- "2 NaN NaN NaN \n",
- "3 NaN NaN NaN \n",
- "4 NaN NaN NaN \n",
- "\n",
- " within_day_time_q1 within_day_time_q3 within_day_time_iqr \n",
- "0 NaN NaN NaN \\\n",
- "1 NaN NaN NaN \n",
- "2 NaN NaN NaN \n",
- "3 NaN NaN NaN \n",
- "4 NaN NaN NaN \n",
- "\n",
- " within_day_time_geometric_mean within_day_time_geometric_std \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " within_day_time_harmonic_mean within_day_time_skewness \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " within_day_time_kurtosis within_day_time_coefficient_variation \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " within_day_time_entropy within_day_time_skewness_hist \n",
- "0 NaN NaN \\\n",
- "1 NaN NaN \n",
- "2 NaN NaN \n",
- "3 NaN NaN \n",
- "4 NaN NaN \n",
- "\n",
- " within_day_time_kurtosis_hist \n",
- "0 NaN \n",
- "1 NaN \n",
- "2 NaN \n",
- "3 NaN \n",
- "4 NaN "
+ " trace_len_mean trace_len_median trace_len_mode trace_len_std ... \n",
+ "0 53.310096 54.0 61 19.894977 ... \\\n",
+ "1 57.391541 49.0 49 34.872131 ... \n",
+ "2 5.981172 6.0 6 2.166129 ... \n",
+ "3 42.356991 42.0 44 16.138406 ... \n",
+ "4 6.339720 5.0 5 13.057417 ... \n",
+ "\n",
+ " within_day_time_iqr within_day_time_geometric_mean \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " within_day_time_geometric_std within_day_time_harmonic_mean \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " within_day_time_skewness within_day_time_kurtosis \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " within_day_time_coefficient_variation within_day_time_entropy \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " within_day_time_skewness_hist within_day_time_kurtosis_hist \n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ "[5 rows x 178 columns]"
]
},
- "execution_count": 20,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n",
+ "bpic_features = pd.read_csv(\"../data/BaselineED_feat.csv\", index_col=None)\n",
"#bpic_features = pd.read_csv(\"../gedi/output/features/real_event_logs.csv\", index_col=None)\n",
"\n",
"#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n",
@@ -2235,12 +862,12 @@
"# errors=\"raise\", inplace=True)\n",
"\n",
"bpic_features.head()\n",
- "#bpic_features.to_csv(\"../data/34_bpic_features.csv\", index=False)"
+ "#bpic_features.to_csv(\"../data/BaselineED_feat.csv\", index=False)"
]
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 9,
"id": "ef0df0b9",
"metadata": {},
"outputs": [
@@ -2248,610 +875,28 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "['ratio_unique_traces_per_trace', 'ratio_most_common_variant', 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n"
+ "['ratio_variants_per_number_of_traces', 'ratio_most_common_variant', 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n"
]
},
{
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " log | \n",
- " ratio_unique_traces_per_trace | \n",
- " ratio_most_common_variant | \n",
- " ratio_top_10_variants | \n",
- " epa_normalized_variant_entropy | \n",
- " epa_normalized_sequence_entropy | \n",
- " epa_normalized_sequence_entropy_linear_forgetting | \n",
- " epa_normalized_sequence_entropy_exponential_forgetting | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " BPIC15_2 | \n",
- " 0.995192 | \n",
- " 0.002404 | \n",
- " 0.103365 | \n",
- " 0.627973 | \n",
- " 0.602371 | \n",
- " 0.317217 | \n",
- " 0.390473 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " BPI_Challenge_2018 | \n",
- " 0.649570 | \n",
- " 0.026981 | \n",
- " 0.415371 | \n",
- " 0.712079 | \n",
- " 0.570688 | \n",
- " 0.381612 | \n",
- " 0.420362 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " Receipt_WABO_CoSeLoG | \n",
- " 0.080893 | \n",
- " 0.497211 | \n",
- " 0.887029 | \n",
- " 0.689363 | \n",
- " 0.235532 | \n",
- " 0.100603 | \n",
- " 0.138113 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " BPIC15_3 | \n",
- " 0.957417 | \n",
- " 0.010646 | \n",
- " 0.137686 | \n",
- " 0.661781 | \n",
- " 0.605676 | \n",
- " 0.341521 | \n",
- " 0.404934 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " BPI_Challenge_2019 | \n",
- " 0.047562 | \n",
- " 0.199758 | \n",
- " 0.946368 | \n",
- " 0.645530 | \n",
- " 0.328029 | \n",
- " 0.320185 | \n",
- " 0.320282 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " RequestForPayment | \n",
- " 0.012925 | \n",
- " 0.437264 | \n",
- " 0.933488 | \n",
- " 0.703735 | \n",
- " 0.189048 | \n",
- " 0.097572 | \n",
- " 0.118744 | \n",
- "
\n",
- " \n",
- " 6 | \n",
- " PrepaidTravelCost | \n",
- " 0.096236 | \n",
- " 0.271081 | \n",
- " 0.822773 | \n",
- " 0.723785 | \n",
- " 0.317044 | \n",
- " 0.184879 | \n",
- " 0.214387 | \n",
- "
\n",
- " \n",
- " 7 | \n",
- " DomesticDeclarations | \n",
- " 0.009429 | \n",
- " 0.439810 | \n",
- " 0.950095 | \n",
- " 0.696474 | \n",
- " 0.164758 | \n",
- " 0.085439 | \n",
- " 0.104389 | \n",
- "
\n",
- " \n",
- " 8 | \n",
- " BPIC15_4 | \n",
- " 0.996201 | \n",
- " 0.002849 | \n",
- " 0.102564 | \n",
- " 0.652985 | \n",
- " 0.603866 | \n",
- " 0.355927 | \n",
- " 0.412835 | \n",
- "
\n",
- " \n",
- " 9 | \n",
- " BPI_Challenge_2012 | \n",
- " 0.333614 | \n",
- " 0.262016 | \n",
- " 0.686254 | \n",
- " 0.708280 | \n",
- " 0.423074 | \n",
- " 0.226133 | \n",
- " 0.275551 | \n",
- "
\n",
- " \n",
- " 10 | \n",
- " Hospital_log | \n",
- " 0.858268 | \n",
- " 0.035871 | \n",
- " 0.227472 | \n",
- " 0.517443 | \n",
- " 0.513032 | \n",
- " 0.267825 | \n",
- " 0.331672 | \n",
- "
\n",
- " \n",
- " 11 | \n",
- " BPIC15_5 | \n",
- " 0.997405 | \n",
- " 0.001730 | \n",
- " 0.102076 | \n",
- " 0.648702 | \n",
- " 0.603260 | \n",
- " 0.342410 | \n",
- " 0.404580 | \n",
- "
\n",
- " \n",
- " 12 | \n",
- " CoSeLoG_WABO_2 | \n",
- " 0.998450 | \n",
- " 0.003101 | \n",
- " 0.100775 | \n",
- " 0.618455 | \n",
- " 0.594035 | \n",
- " 0.323233 | \n",
- " 0.389858 | \n",
- "
\n",
- " \n",
- " 13 | \n",
- " Road_Traffic_Fine_Management_Process | \n",
- " 0.001536 | \n",
- " 0.375620 | \n",
- " 0.993104 | \n",
- " 0.769353 | \n",
- " 0.111932 | \n",
- " 0.052586 | \n",
- " 0.068442 | \n",
- "
\n",
- " \n",
- " 14 | \n",
- " BPI_Challenge_2017_Offer_log | \n",
- " 0.000372 | \n",
- " 0.380626 | \n",
- " 0.380626 | \n",
- " 0.813479 | \n",
- " 0.105130 | \n",
- " 0.052672 | \n",
- " 0.066000 | \n",
- "
\n",
- " \n",
- " 15 | \n",
- " Sepsis_Cases_Event_Log | \n",
- " 0.805714 | \n",
- " 0.033333 | \n",
- " 0.274286 | \n",
- " 0.695759 | \n",
- " 0.522343 | \n",
- " 0.219365 | \n",
- " 0.299505 | \n",
- "
\n",
- " \n",
- " 16 | \n",
- " CoSeLoG_WABO_3 | \n",
- " 0.949402 | \n",
- " 0.011960 | \n",
- " 0.145354 | \n",
- " 0.654296 | \n",
- " 0.596367 | \n",
- " 0.278121 | \n",
- " 0.356439 | \n",
- "
\n",
- " \n",
- " 17 | \n",
- " BPI_Challenge_2013_closed_problems | \n",
- " 0.123067 | \n",
- " 0.331540 | \n",
- " 0.840619 | \n",
- " 0.705383 | \n",
- " 0.310940 | \n",
- " 0.286515 | \n",
- " 0.288383 | \n",
- "
\n",
- " \n",
- " 18 | \n",
- " BPI_Challenge_2013_incidents | \n",
- " 0.200026 | \n",
- " 0.232195 | \n",
- " 0.794414 | \n",
- " 0.717846 | \n",
- " 0.404651 | \n",
- " 0.391097 | \n",
- " 0.391625 | \n",
- "
\n",
- " \n",
- " 19 | \n",
- " PermitLog | \n",
- " 0.209200 | \n",
- " 0.135315 | \n",
- " 0.757537 | \n",
- " 0.733653 | \n",
- " 0.420150 | \n",
- " 0.137287 | \n",
- " 0.215490 | \n",
- "
\n",
- " \n",
- " 20 | \n",
- " BPIC15_1 | \n",
- " 0.975813 | \n",
- " 0.006672 | \n",
- " 0.121768 | \n",
- " 0.652855 | \n",
- " 0.610294 | \n",
- " 0.270241 | \n",
- " 0.363928 | \n",
- "
\n",
- " \n",
- " 21 | \n",
- " InternationalDeclarations | \n",
- " 0.116762 | \n",
- " 0.212281 | \n",
- " 0.811289 | \n",
- " 0.758268 | \n",
- " 0.339380 | \n",
- " 0.145611 | \n",
- " 0.193753 | \n",
- "
\n",
- " \n",
- " 22 | \n",
- " BPI_Challenge_2017 | \n",
- " 0.505570 | \n",
- " 0.033514 | \n",
- " 0.531340 | \n",
- " 0.741706 | \n",
- " 0.461565 | \n",
- " 0.231922 | \n",
- " 0.290464 | \n",
- "
\n",
- " \n",
- " 23 | \n",
- " BPI2016_Complaints | \n",
- " 0.438053 | \n",
- " 0.101770 | \n",
- " 0.424779 | \n",
- " 0.899497 | \n",
- " 0.683796 | \n",
- " 0.404685 | \n",
- " 0.470116 | \n",
- "
\n",
- " \n",
- " 24 | \n",
- " BPI2016_Questions | \n",
- " 0.797427 | \n",
- " 0.015650 | \n",
- " 0.282311 | \n",
- " 0.813468 | \n",
- " 0.756132 | \n",
- " 0.424910 | \n",
- " 0.506118 | \n",
- "
\n",
- " \n",
- " 25 | \n",
- " BPI2016_Werkmap_Messages | \n",
- " 0.002882 | \n",
- " 0.295803 | \n",
- " 0.714106 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 26 | \n",
- " BPI_Challenge_2013_open_problems | \n",
- " 0.131868 | \n",
- " 0.217338 | \n",
- " 0.769231 | \n",
- " 0.702960 | \n",
- " 0.276771 | \n",
- " 0.262094 | \n",
- " 0.263029 | \n",
- "
\n",
- " \n",
- " 27 | \n",
- " CoSeLoG_WABO_1 | \n",
- " 0.977588 | \n",
- " 0.009605 | \n",
- " 0.119530 | \n",
- " 0.646697 | \n",
- " 0.601566 | \n",
- " 0.292824 | \n",
- " 0.376276 | \n",
- "
\n",
- " \n",
- " 28 | \n",
- " CoSeLoG_WABO_4 | \n",
- " 0.992376 | \n",
- " 0.002541 | \n",
- " 0.106734 | \n",
- " 0.644399 | \n",
- " 0.597109 | \n",
- " 0.373920 | \n",
- " 0.422526 | \n",
- "
\n",
- " \n",
- " 29 | \n",
- " CoSeLoG_WABO_5 | \n",
- " 0.985426 | \n",
- " 0.004484 | \n",
- " 0.112108 | \n",
- " 0.642668 | \n",
- " 0.592454 | \n",
- " 0.346832 | \n",
- " 0.401731 | \n",
- "
\n",
- " \n",
- " 30 | \n",
- " Detail_Change | \n",
- " 0.048444 | \n",
- " 0.074944 | \n",
- " 0.765056 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 31 | \n",
- " Detail_Incident_Activity | \n",
- " 0.496847 | \n",
- " 0.037455 | \n",
- " 0.552836 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 32 | \n",
- " Detail_Interaction | \n",
- " 0.000041 | \n",
- " 0.787081 | \n",
- " 0.000000 | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- " NaN | \n",
- "
\n",
- " \n",
- " 33 | \n",
- " finale | \n",
- " 0.049345 | \n",
- " 0.516594 | \n",
- " 0.906332 | \n",
- " 0.799120 | \n",
- " 0.254066 | \n",
- " 0.118478 | \n",
- " 0.154576 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " log ratio_unique_traces_per_trace \n",
- "0 BPIC15_2 0.995192 \\\n",
- "1 BPI_Challenge_2018 0.649570 \n",
- "2 Receipt_WABO_CoSeLoG 0.080893 \n",
- "3 BPIC15_3 0.957417 \n",
- "4 BPI_Challenge_2019 0.047562 \n",
- "5 RequestForPayment 0.012925 \n",
- "6 PrepaidTravelCost 0.096236 \n",
- "7 DomesticDeclarations 0.009429 \n",
- "8 BPIC15_4 0.996201 \n",
- "9 BPI_Challenge_2012 0.333614 \n",
- "10 Hospital_log 0.858268 \n",
- "11 BPIC15_5 0.997405 \n",
- "12 CoSeLoG_WABO_2 0.998450 \n",
- "13 Road_Traffic_Fine_Management_Process 0.001536 \n",
- "14 BPI_Challenge_2017_Offer_log 0.000372 \n",
- "15 Sepsis_Cases_Event_Log 0.805714 \n",
- "16 CoSeLoG_WABO_3 0.949402 \n",
- "17 BPI_Challenge_2013_closed_problems 0.123067 \n",
- "18 BPI_Challenge_2013_incidents 0.200026 \n",
- "19 PermitLog 0.209200 \n",
- "20 BPIC15_1 0.975813 \n",
- "21 InternationalDeclarations 0.116762 \n",
- "22 BPI_Challenge_2017 0.505570 \n",
- "23 BPI2016_Complaints 0.438053 \n",
- "24 BPI2016_Questions 0.797427 \n",
- "25 BPI2016_Werkmap_Messages 0.002882 \n",
- "26 BPI_Challenge_2013_open_problems 0.131868 \n",
- "27 CoSeLoG_WABO_1 0.977588 \n",
- "28 CoSeLoG_WABO_4 0.992376 \n",
- "29 CoSeLoG_WABO_5 0.985426 \n",
- "30 Detail_Change 0.048444 \n",
- "31 Detail_Incident_Activity 0.496847 \n",
- "32 Detail_Interaction 0.000041 \n",
- "33 finale 0.049345 \n",
- "\n",
- " ratio_most_common_variant ratio_top_10_variants \n",
- "0 0.002404 0.103365 \\\n",
- "1 0.026981 0.415371 \n",
- "2 0.497211 0.887029 \n",
- "3 0.010646 0.137686 \n",
- "4 0.199758 0.946368 \n",
- "5 0.437264 0.933488 \n",
- "6 0.271081 0.822773 \n",
- "7 0.439810 0.950095 \n",
- "8 0.002849 0.102564 \n",
- "9 0.262016 0.686254 \n",
- "10 0.035871 0.227472 \n",
- "11 0.001730 0.102076 \n",
- "12 0.003101 0.100775 \n",
- "13 0.375620 0.993104 \n",
- "14 0.380626 0.380626 \n",
- "15 0.033333 0.274286 \n",
- "16 0.011960 0.145354 \n",
- "17 0.331540 0.840619 \n",
- "18 0.232195 0.794414 \n",
- "19 0.135315 0.757537 \n",
- "20 0.006672 0.121768 \n",
- "21 0.212281 0.811289 \n",
- "22 0.033514 0.531340 \n",
- "23 0.101770 0.424779 \n",
- "24 0.015650 0.282311 \n",
- "25 0.295803 0.714106 \n",
- "26 0.217338 0.769231 \n",
- "27 0.009605 0.119530 \n",
- "28 0.002541 0.106734 \n",
- "29 0.004484 0.112108 \n",
- "30 0.074944 0.765056 \n",
- "31 0.037455 0.552836 \n",
- "32 0.787081 0.000000 \n",
- "33 0.516594 0.906332 \n",
- "\n",
- " epa_normalized_variant_entropy epa_normalized_sequence_entropy \n",
- "0 0.627973 0.602371 \\\n",
- "1 0.712079 0.570688 \n",
- "2 0.689363 0.235532 \n",
- "3 0.661781 0.605676 \n",
- "4 0.645530 0.328029 \n",
- "5 0.703735 0.189048 \n",
- "6 0.723785 0.317044 \n",
- "7 0.696474 0.164758 \n",
- "8 0.652985 0.603866 \n",
- "9 0.708280 0.423074 \n",
- "10 0.517443 0.513032 \n",
- "11 0.648702 0.603260 \n",
- "12 0.618455 0.594035 \n",
- "13 0.769353 0.111932 \n",
- "14 0.813479 0.105130 \n",
- "15 0.695759 0.522343 \n",
- "16 0.654296 0.596367 \n",
- "17 0.705383 0.310940 \n",
- "18 0.717846 0.404651 \n",
- "19 0.733653 0.420150 \n",
- "20 0.652855 0.610294 \n",
- "21 0.758268 0.339380 \n",
- "22 0.741706 0.461565 \n",
- "23 0.899497 0.683796 \n",
- "24 0.813468 0.756132 \n",
- "25 0.000000 0.000000 \n",
- "26 0.702960 0.276771 \n",
- "27 0.646697 0.601566 \n",
- "28 0.644399 0.597109 \n",
- "29 0.642668 0.592454 \n",
- "30 NaN NaN \n",
- "31 NaN NaN \n",
- "32 NaN NaN \n",
- "33 0.799120 0.254066 \n",
- "\n",
- " epa_normalized_sequence_entropy_linear_forgetting \n",
- "0 0.317217 \\\n",
- "1 0.381612 \n",
- "2 0.100603 \n",
- "3 0.341521 \n",
- "4 0.320185 \n",
- "5 0.097572 \n",
- "6 0.184879 \n",
- "7 0.085439 \n",
- "8 0.355927 \n",
- "9 0.226133 \n",
- "10 0.267825 \n",
- "11 0.342410 \n",
- "12 0.323233 \n",
- "13 0.052586 \n",
- "14 0.052672 \n",
- "15 0.219365 \n",
- "16 0.278121 \n",
- "17 0.286515 \n",
- "18 0.391097 \n",
- "19 0.137287 \n",
- "20 0.270241 \n",
- "21 0.145611 \n",
- "22 0.231922 \n",
- "23 0.404685 \n",
- "24 0.424910 \n",
- "25 0.000000 \n",
- "26 0.262094 \n",
- "27 0.292824 \n",
- "28 0.373920 \n",
- "29 0.346832 \n",
- "30 NaN \n",
- "31 NaN \n",
- "32 NaN \n",
- "33 0.118478 \n",
- "\n",
- " epa_normalized_sequence_entropy_exponential_forgetting \n",
- "0 0.390473 \n",
- "1 0.420362 \n",
- "2 0.138113 \n",
- "3 0.404934 \n",
- "4 0.320282 \n",
- "5 0.118744 \n",
- "6 0.214387 \n",
- "7 0.104389 \n",
- "8 0.412835 \n",
- "9 0.275551 \n",
- "10 0.331672 \n",
- "11 0.404580 \n",
- "12 0.389858 \n",
- "13 0.068442 \n",
- "14 0.066000 \n",
- "15 0.299505 \n",
- "16 0.356439 \n",
- "17 0.288383 \n",
- "18 0.391625 \n",
- "19 0.215490 \n",
- "20 0.363928 \n",
- "21 0.193753 \n",
- "22 0.290464 \n",
- "23 0.470116 \n",
- "24 0.506118 \n",
- "25 0.000000 \n",
- "26 0.263029 \n",
- "27 0.376276 \n",
- "28 0.422526 \n",
- "29 0.401731 \n",
- "30 NaN \n",
- "31 NaN \n",
- "32 NaN \n",
- "33 0.154576 "
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
+ "ename": "KeyError",
+ "evalue": "\"['ratio_variants_per_number_of_traces'] not in index\"",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
+ "Input \u001b[0;32mIn [9]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 3\u001b[0m normalized_feature_names \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mratio_variants_per_number_of_traces\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mratio_most_common_variant\u001b[39m\u001b[38;5;124m'\u001b[39m, \n\u001b[1;32m 4\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mratio_top_10_variants\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mepa_normalized_variant_entropy\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mepa_normalized_sequence_entropy\u001b[39m\u001b[38;5;124m'\u001b[39m, \n\u001b[1;32m 5\u001b[0m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mepa_normalized_sequence_entropy_linear_forgetting\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mepa_normalized_sequence_entropy_exponential_forgetting\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(normalized_feature_names)\n\u001b[0;32m----> 7\u001b[0m \u001b[43mbpic_features\u001b[49m\u001b[43m[\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mlog\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43mnormalized_feature_names\u001b[49m\u001b[43m]\u001b[49m\n",
+ "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/frame.py:3767\u001b[0m, in \u001b[0;36mDataFrame.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3765\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n\u001b[1;32m 3766\u001b[0m key \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(key)\n\u001b[0;32m-> 3767\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_indexer_strict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcolumns\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m 3769\u001b[0m \u001b[38;5;66;03m# take() does not accept boolean indexers\u001b[39;00m\n\u001b[1;32m 3770\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(indexer, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mbool\u001b[39m:\n",
+ "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:5876\u001b[0m, in \u001b[0;36mIndex._get_indexer_strict\u001b[0;34m(self, key, axis_name)\u001b[0m\n\u001b[1;32m 5873\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 5874\u001b[0m keyarr, indexer, new_indexer \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reindex_non_unique(keyarr)\n\u001b[0;32m-> 5876\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_raise_if_missing\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkeyarr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5878\u001b[0m keyarr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtake(indexer)\n\u001b[1;32m 5879\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, Index):\n\u001b[1;32m 5880\u001b[0m \u001b[38;5;66;03m# GH 42790 - Preserve name from an Index\u001b[39;00m\n",
+ "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:5938\u001b[0m, in \u001b[0;36mIndex._raise_if_missing\u001b[0;34m(self, key, indexer, axis_name)\u001b[0m\n\u001b[1;32m 5935\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNone of [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] are in the [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00maxis_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 5937\u001b[0m not_found \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(ensure_index(key)[missing_mask\u001b[38;5;241m.\u001b[39mnonzero()[\u001b[38;5;241m0\u001b[39m]]\u001b[38;5;241m.\u001b[39munique())\n\u001b[0;32m-> 5938\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_found\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not in index\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+ "\u001b[0;31mKeyError\u001b[0m: \"['ratio_variants_per_number_of_traces'] not in index\""
+ ]
}
],
"source": [
"bpic_stats = bpic_features.describe().transpose()\n",
"normalized_feature_names = bpic_stats[(bpic_stats['min']>=0)&(bpic_stats['max']<=1)].index.to_list() \n",
- "normalized_feature_names = ['ratio_unique_traces_per_trace', 'ratio_most_common_variant', \n",
+ "normalized_feature_names = ['ratio_variants_per_number_of_traces', 'ratio_most_common_variant', \n",
" 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n",
" 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n",
"print(normalized_feature_names)\n",
@@ -2860,40 +905,10 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": null,
"id": "44909860",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "21\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_rt10v.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_rmcv.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_enself.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enve_rt10v.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_rt10v.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_enseef.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_rmcv.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_rmcv_rutpt.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_enve.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enve_rmcv.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_rmcv.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_rutpt.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_enve.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_rmcv_rt10v.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_rutpt.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_enself.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_rt10v.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_enve.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enve_rutpt.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_rt10v_rutpt.json\n",
- "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_rutpt.json\n",
- "None\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"#Features between 0 and 1: \n",
"def write_generator_bpic_experiment(objectives, n_para_obj=2):\n",
@@ -2901,7 +916,7 @@
" experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n",
" experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n",
" for exp in experiments:\n",
- " experiment_path = os.path.join('..','data', '34_bpic_features')\n",
+ " experiment_path = os.path.join('..','data', 'BaselineED_feat')\n",
" os.makedirs(experiment_path, exist_ok=True)\n",
" experiment_path = os.path.join(experiment_path, f\"34bpic_{len(exp)}objectives_{abbrev_obj_keys(exp)}.csv\") \n",
"\n",
@@ -2914,7 +929,7 @@
" 'pipeline_step': 'event_logs_generation',\n",
" 'output_path':'output/generated',\n",
" 'generator_params': {\n",
- " \"experiment\": {\"input_path\": \"data/34_bpic_features.csv\",\n",
+ " \"experiment\": {\"input_path\": \"data/BaselineED_feat.csv\",\n",
" \"objectives\": exp},\n",
" 'config_space': {\n",
" 'mode': [5, 20],\n",
@@ -2933,15 +948,15 @@
" },\n",
" {\n",
" 'pipeline_step': 'feature_extraction',\n",
- " 'input_path': os.path.join('output', 'features', 'generated', '34_bpic_features', second_dir),\n",
+ " 'input_path': os.path.join('output', 'features', 'generated', 'BaselineED_feat', second_dir),\n",
" 'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n",
" 'output_path': 'output/plots',\n",
- " 'real_eventlog_path': 'data/34_bpic_features.csv',\n",
+ " 'real_eventlog_path': 'data/BaselineED_feat.csv',\n",
" 'plot_type': 'boxplot'\n",
" }\n",
" ]\n",
"\n",
- " output_path = os.path.join('..', 'config_files','algorithm','34_bpic_features')\n",
+ " output_path = os.path.join('..', 'config_files','algorithm','BaselineED_feat')\n",
" os.makedirs(output_path, exist_ok=True)\n",
" output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n",
"\n",
@@ -2975,34 +990,10 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"id": "d759a677",
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "7 experiments: [('epa_normalized_sequence_entropy_exponential_forgetting',), ('epa_normalized_variant_entropy',), ('ratio_top_10_variants',), ('epa_normalized_sequence_entropy',), ('epa_normalized_sequence_entropy_linear_forgetting',), ('ratio_most_common_variant',), ('ratio_unique_traces_per_trace',)]\n",
- "11\n",
- "Saved experiment in ../data/grid_experiments/grid_1objectives_enseef.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enseef.json\n",
- "Saved experiment in ../data/grid_experiments/grid_1objectives_enve.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enve.json\n",
- "Saved experiment in ../data/grid_experiments/grid_1objectives_rt10v.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rt10v.json\n",
- "Saved experiment in ../data/grid_experiments/grid_1objectives_ense.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_ense.json\n",
- "Saved experiment in ../data/grid_experiments/grid_1objectives_enself.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enself.json\n",
- "Saved experiment in ../data/grid_experiments/grid_1objectives_rmcv.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rmcv.json\n",
- "Saved experiment in ../data/grid_experiments/grid_1objectives_rutpt.csv\n",
- "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rutpt.json\n",
- "None\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"def write_single_objective_experiment(experiment_path, objectives=[\"ratio_top_20_variants\", \"epa_normalized_sequence_entropy_linear_forgetting\"]):\n",
" first_dir = os.path.split(experiment_path[3:])[-1].replace(\".csv\",\"\")\n",
@@ -3035,7 +1026,7 @@
" 'input_path': os.path.join('output','features', 'generated', 'grid_1obj', first_dir, second_dir),\n",
" 'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n",
" 'output_path': 'output/plots',\n",
- " 'real_eventlog_path': 'data/34_bpic_features.csv',\n",
+ " 'real_eventlog_path': 'data/BaselineED_feat.csv',\n",
" 'plot_type': 'boxplot'\n",
" }\n",
" ]\n",
@@ -3102,7 +1093,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.9.19"
+ "version": "3.9.7"
}
},
"nbformat": 4,
|