Andrea Maldonado commited on
Commit
776721c
·
1 Parent(s): b279395

Renames ratio_unique_traces_per_trace --> ratio_variants_per_number_of_traces

Browse files
config_files/config_layout.json CHANGED
@@ -33,7 +33,7 @@
33
  {
34
  "pipeline_step": "feature_extraction",
35
  "input_path": "data/test",
36
- "feature_params": {"feature_set": ["n_traces", "n_unique_traces", "ratio_unique_traces_per_trace", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "eventropy_trace", "eventropy_prefix", "eventropy_prefix_flattened", "eventropy_global_block", "eventropy_global_block_flattened", "eventropy_lempel_ziv", "eventropy_lempel_ziv_flattened", "eventropy_k_block_diff_1", "eventropy_k_block_diff_3", "eventropy_k_block_diff_5", "eventropy_k_block_ratio_1", "eventropy_k_block_ratio_3", "eventropy_k_block_ratio_5", "eventropy_knn_3", "eventropy_knn_5", "eventropy_knn_7", "epa_variant_entropy", "epa_normalized_variant_entropy", "epa_sequence_entropy", "epa_normalized_sequence_entropy", "epa_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_linear_forgetting", "epa_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]},
37
  "output_path": "output/plots",
38
  "real_eventlog_path": "data/BaselineED_feat.csv",
39
  "plot_type": "boxplot"
 
33
  {
34
  "pipeline_step": "feature_extraction",
35
  "input_path": "data/test",
36
+ "feature_params": {"feature_set": ["n_traces", "n_unique_traces", "ratio_variants_per_number_of_traces", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "eventropy_trace", "eventropy_prefix", "eventropy_prefix_flattened", "eventropy_global_block", "eventropy_global_block_flattened", "eventropy_lempel_ziv", "eventropy_lempel_ziv_flattened", "eventropy_k_block_diff_1", "eventropy_k_block_diff_3", "eventropy_k_block_diff_5", "eventropy_k_block_ratio_1", "eventropy_k_block_ratio_3", "eventropy_k_block_ratio_5", "eventropy_knn_3", "eventropy_knn_5", "eventropy_knn_7", "epa_variant_entropy", "epa_normalized_variant_entropy", "epa_sequence_entropy", "epa_normalized_sequence_entropy", "epa_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_linear_forgetting", "epa_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]},
37
  "output_path": "output/plots",
38
  "real_eventlog_path": "data/BaselineED_feat.csv",
39
  "plot_type": "boxplot"
config_files/experiment_real_targets.json CHANGED
@@ -24,8 +24,8 @@
24
  },
25
  {
26
  "pipeline_step": "feature_extraction",
27
- "input_path": "output/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rutpt/",
28
- "input_path": "output/features/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rutpt/",
29
  "feature_params": {"feature_set":["ratio_variants_per_number_of_traces","ratio_most_common_variant","ratio_top_10_variants","epa_normalized_variant_entropy","epa_normalized_sequence_entropy","epa_normalized_sequence_entropy_linear_forgetting","epa_normalized_sequence_entropy_exponential_forgetting"]},
30
  "output_path": "output/plots",
31
  "real_eventlog_path": "data/BaselineED_feat.csv",
@@ -34,7 +34,7 @@
34
  {
35
  "pipeline_step": "benchmark_test",
36
  "benchmark_test": "discovery",
37
- "input_path": "output/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rutpt/",
38
  "output_path":"output",
39
  "miners" : ["heu", "imf", "ilp"]
40
  }
 
24
  },
25
  {
26
  "pipeline_step": "feature_extraction",
27
+ "input_path": "output/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rvpnot/",
28
+ "input_path": "output/features/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rvpnot/",
29
  "feature_params": {"feature_set":["ratio_variants_per_number_of_traces","ratio_most_common_variant","ratio_top_10_variants","epa_normalized_variant_entropy","epa_normalized_sequence_entropy","epa_normalized_sequence_entropy_linear_forgetting","epa_normalized_sequence_entropy_exponential_forgetting"]},
30
  "output_path": "output/plots",
31
  "real_eventlog_path": "data/BaselineED_feat.csv",
 
34
  {
35
  "pipeline_step": "benchmark_test",
36
  "benchmark_test": "discovery",
37
+ "input_path": "output/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rvpnot/",
38
  "output_path":"output",
39
  "miners" : ["heu", "imf", "ilp"]
40
  }
data/test/2_bpic_features.csv CHANGED
@@ -1,3 +1,3 @@
1
- log,n_traces,n_unique_traces,ratio_unique_traces_per_trace,trace_len_min,trace_len_max,trace_len_mean,trace_len_median,trace_len_mode,trace_len_std,trace_len_variance,trace_len_q1,trace_len_q3,trace_len_iqr,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_skewness,trace_len_kurtosis,trace_len_coefficient_variation,trace_len_entropy,trace_len_hist1,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_hist10,trace_len_skewness_hist,trace_len_kurtosis_hist,ratio_most_common_variant,ratio_top_1_variants,ratio_top_5_variants,ratio_top_10_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_75_variants,mean_variant_occurrence,std_variant_occurrence,skewness_variant_occurrence,kurtosis_variant_occurrence,n_unique_activities,activities_min,activities_max,activities_mean,activities_median,activities_std,activities_variance,activities_q1,activities_q3,activities_iqr,activities_skewness,activities_kurtosis,n_unique_start_activities,start_activities_min,start_activities_max,start_activities_mean,start_activities_median,start_activities_std,start_activities_variance,start_activities_q1,start_activities_q3,start_activities_iqr,start_activities_skewness,start_activities_kurtosis,n_unique_end_activities,end_activities_min,end_activities_max,end_activities_mean,end_activities_median,end_activities_std,end_activities_variance,end_activities_q1,end_activities_q3,end_activities_iqr,end_activities_skewness,end_activities_kurtosis,eventropy_trace,eventropy_prefix,eventropy_global_block,eventropy_lempel_ziv,eventropy_k_block_diff_1,eventropy_k_block_diff_3,eventropy_k_block_diff_5,eventropy_k_block_ratio_1,eventropy_k_block_ratio_3,eventropy_k_block_ratio_5,eventropy_knn_3,eventropy_knn_5,eventropy_knn_7,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,eventropy_global_block_flattened,eventropy_lempel_ziv_flattened,eventropy_prefix_flattened
2
  Sepsis_Cases_Event_Log,1050,846,0.805714285714285,3,185,14.48952380952381,13,8,11.470474925273926,131.57179501133788,9,16,7,12.281860759040903,1.7464004837799152,10.47731701485374,7.250526815880918,87.0376906898399,0.791639192292468,6.769403523350811,0.04861329147043401,0.005285190999476001,0.000575614861329,0.000209314495028,0.000104657247514,0.0,5.2328623757195225e-05,0.0,0.0,0.000104657247514,2.612850778156251,4.931206347805768,0.033333333333333,0.12,0.215238095238095,0.274285714285714,0.355238095238095,0.5971428571428571,0.7980952380952381,1.241134751773049,1.759408518249193,13.637101374069475,217.44268017168216,16,6,3383,950.875,788.0,1008.5815457239935,1017236.734375,101.75,1085.25,983.5,1.391238560701821,1.05777753209275,6,6,995,175.0,12.0,366.73787187399483,134496.66666666666,7.75,17.0,9.25,1.7883562472303312,1.199106773708694,14,2,393,75.0,32.5,112.91400014423114,12749.57142857143,14.0,53.5,39.5,2.004413358907822,2.500757934341361,9.334,10.227,14.501,1.7269999999999999,3.238,1.712,1.104,3.238,2.262,1.871,4.956,4.49,4.191,40624.49329803771,0.6957588422064961,76528.6794749776,0.5223430410751391,32139.284589305265,0.219365233602993,43880.53919110408,0.299504635939686,,,
3
  CoSeLoG_WABO_1,937,916,0.9775880469583771,2,95,41.56243329775881,43,40,16.678023092416094,278.1564542711645,36,51,15,36.71275216938179,1.784073253119976,28.84499612652788,-0.16821637154603802,0.17918482321640303,0.40127638757174006,6.750635463329985,0.006311609919555001,0.009524793151329002,0.006311609919555001,0.014229811454998001,0.039820520765196,0.016869211966812,0.008147714623426,0.0037869659517330003,0.002065617791854,0.00045902617596700005,1.7771796608234571,2.353958246469541,0.009605122732123,0.032017075773746004,0.07043756670224101,0.11953041622198501,0.21771611526147203,0.511205976520811,0.7556029882604051,1.022925764192139,0.33126487599778903,19.52280427642022,422.82376078444236,381,1,937,102.21522309711285,15.0,193.12603388747905,37297.6649651077,3.0,81.0,78.0,2.463005335171609,5.5066536611772605,11,1,899,85.18181818181819,2.0,257.3832721066592,66246.14876033057,1.0,7.5,6.5,2.844783898567343,6.0957042298129664,101,1,292,9.277227722772277,2.0,31.163929012921322,971.1904715223994,1.0,5.0,4.0,7.672745189703872,64.72182800579148,9.806000000000001,13.867,18.357,3.2640000000000002,6.888,1.299,0.582,6.888,3.542,2.403,5.413,4.929,4.629,195166.2442745276,0.6466967918841,247624.8365497508,0.601566424410453,120536.03113478613,0.292823733970692,154887.76808660102,0.37627599125765404,18.361,3.276,13.885
 
1
+ log,n_traces,n_unique_traces,ratio_variants_per_number_of_traces,trace_len_min,trace_len_max,trace_len_mean,trace_len_median,trace_len_mode,trace_len_std,trace_len_variance,trace_len_q1,trace_len_q3,trace_len_iqr,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_skewness,trace_len_kurtosis,trace_len_coefficient_variation,trace_len_entropy,trace_len_hist1,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_hist10,trace_len_skewness_hist,trace_len_kurtosis_hist,ratio_most_common_variant,ratio_top_1_variants,ratio_top_5_variants,ratio_top_10_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_75_variants,mean_variant_occurrence,std_variant_occurrence,skewness_variant_occurrence,kurtosis_variant_occurrence,n_unique_activities,activities_min,activities_max,activities_mean,activities_median,activities_std,activities_variance,activities_q1,activities_q3,activities_iqr,activities_skewness,activities_kurtosis,n_unique_start_activities,start_activities_min,start_activities_max,start_activities_mean,start_activities_median,start_activities_std,start_activities_variance,start_activities_q1,start_activities_q3,start_activities_iqr,start_activities_skewness,start_activities_kurtosis,n_unique_end_activities,end_activities_min,end_activities_max,end_activities_mean,end_activities_median,end_activities_std,end_activities_variance,end_activities_q1,end_activities_q3,end_activities_iqr,end_activities_skewness,end_activities_kurtosis,eventropy_trace,eventropy_prefix,eventropy_global_block,eventropy_lempel_ziv,eventropy_k_block_diff_1,eventropy_k_block_diff_3,eventropy_k_block_diff_5,eventropy_k_block_ratio_1,eventropy_k_block_ratio_3,eventropy_k_block_ratio_5,eventropy_knn_3,eventropy_knn_5,eventropy_knn_7,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,eventropy_global_block_flattened,eventropy_lempel_ziv_flattened,eventropy_prefix_flattened
2
  Sepsis_Cases_Event_Log,1050,846,0.805714285714285,3,185,14.48952380952381,13,8,11.470474925273926,131.57179501133788,9,16,7,12.281860759040903,1.7464004837799152,10.47731701485374,7.250526815880918,87.0376906898399,0.791639192292468,6.769403523350811,0.04861329147043401,0.005285190999476001,0.000575614861329,0.000209314495028,0.000104657247514,0.0,5.2328623757195225e-05,0.0,0.0,0.000104657247514,2.612850778156251,4.931206347805768,0.033333333333333,0.12,0.215238095238095,0.274285714285714,0.355238095238095,0.5971428571428571,0.7980952380952381,1.241134751773049,1.759408518249193,13.637101374069475,217.44268017168216,16,6,3383,950.875,788.0,1008.5815457239935,1017236.734375,101.75,1085.25,983.5,1.391238560701821,1.05777753209275,6,6,995,175.0,12.0,366.73787187399483,134496.66666666666,7.75,17.0,9.25,1.7883562472303312,1.199106773708694,14,2,393,75.0,32.5,112.91400014423114,12749.57142857143,14.0,53.5,39.5,2.004413358907822,2.500757934341361,9.334,10.227,14.501,1.7269999999999999,3.238,1.712,1.104,3.238,2.262,1.871,4.956,4.49,4.191,40624.49329803771,0.6957588422064961,76528.6794749776,0.5223430410751391,32139.284589305265,0.219365233602993,43880.53919110408,0.299504635939686,,,
3
  CoSeLoG_WABO_1,937,916,0.9775880469583771,2,95,41.56243329775881,43,40,16.678023092416094,278.1564542711645,36,51,15,36.71275216938179,1.784073253119976,28.84499612652788,-0.16821637154603802,0.17918482321640303,0.40127638757174006,6.750635463329985,0.006311609919555001,0.009524793151329002,0.006311609919555001,0.014229811454998001,0.039820520765196,0.016869211966812,0.008147714623426,0.0037869659517330003,0.002065617791854,0.00045902617596700005,1.7771796608234571,2.353958246469541,0.009605122732123,0.032017075773746004,0.07043756670224101,0.11953041622198501,0.21771611526147203,0.511205976520811,0.7556029882604051,1.022925764192139,0.33126487599778903,19.52280427642022,422.82376078444236,381,1,937,102.21522309711285,15.0,193.12603388747905,37297.6649651077,3.0,81.0,78.0,2.463005335171609,5.5066536611772605,11,1,899,85.18181818181819,2.0,257.3832721066592,66246.14876033057,1.0,7.5,6.5,2.844783898567343,6.0957042298129664,101,1,292,9.277227722772277,2.0,31.163929012921322,971.1904715223994,1.0,5.0,4.0,7.672745189703872,64.72182800579148,9.806000000000001,13.867,18.357,3.2640000000000002,6.888,1.299,0.582,6.888,3.542,2.403,5.413,4.929,4.629,195166.2442745276,0.6466967918841,247624.8365497508,0.601566424410453,120536.03113478613,0.292823733970692,154887.76808660102,0.37627599125765404,18.361,3.276,13.885
gedi/config.py CHANGED
@@ -19,7 +19,7 @@ def get_model_params_list(alg_json_file: str) :#-> list[dict]:
19
  ' Use a configuration from the `config_files`-folder together with the args `-a`.')
20
  return [
21
  {PIPELINE_STEP: 'feature_extraction', INPUT_PATH: 'data/test',
22
- FEATURE_PARAMS: {FEATURE_SET: ['ratio_unique_traces_per_trace',
23
  'ratio_most_common_variant']},
24
  OUTPUT_PATH: 'output/plots'}
25
  ]
 
19
  ' Use a configuration from the `config_files`-folder together with the args `-a`.')
20
  return [
21
  {PIPELINE_STEP: 'feature_extraction', INPUT_PATH: 'data/test',
22
+ FEATURE_PARAMS: {FEATURE_SET: ['ratio_variants_per_number_of_traces',
23
  'ratio_most_common_variant']},
24
  OUTPUT_PATH: 'output/plots'}
25
  ]
gedi/features.py CHANGED
@@ -16,6 +16,7 @@ def get_sortby_parameter(elem):
16
  number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
17
  return number
18
 
 
19
  class EventLogFile:
20
  def __init__(self, filename, folder_path):
21
  self.root_path: Path = Path(folder_path)
 
16
  number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
17
  return number
18
 
19
+
20
  class EventLogFile:
21
  def __init__(self, filename, folder_path):
22
  self.root_path: Path = Path(folder_path)
gedi/utils/column_mappings.py CHANGED
@@ -1,7 +1,7 @@
1
  def column_mappings():
2
 
3
  column_names_short = {
4
- 'rutpt': 'ratio_unique_traces_per_trace',
5
  'rmcv': 'ratio_most_common_variant',
6
  'tlcv': 'trace_len_coefficient_variation',
7
  'mvo': 'mean_variant_occurrence',
 
1
  def column_mappings():
2
 
3
  column_names_short = {
4
+ 'rvpnot': 'ratio_variants_per_number_of_traces',
5
  'rmcv': 'ratio_most_common_variant',
6
  'tlcv': 'trace_len_coefficient_variation',
7
  'mvo': 'mean_variant_occurrence',
utils/config_fabric.py CHANGED
@@ -174,7 +174,7 @@ def set_generator_experiments(generator_params):
174
  #TODO: This code is duplicated. Should be moved and removed.
175
  def column_mappings():
176
  column_names_short = {
177
- 'rutpt': 'ratio_unique_traces_per_trace',
178
  'rmcv': 'ratio_most_common_variant',
179
  'tlcv': 'trace_len_coefficient_variation',
180
  'mvo': 'mean_variant_occurrence',
 
174
  #TODO: This code is duplicated. Should be moved and removed.
175
  def column_mappings():
176
  column_names_short = {
177
+ 'rvpnot': 'ratio_variants_per_number_of_traces',
178
  'rmcv': 'ratio_most_common_variant',
179
  'tlcv': 'trace_len_coefficient_variation',
180
  'mvo': 'mean_variant_occurrence',