Spaces:
Running
Running
Andrea Maldonado
commited on
Commit
·
776721c
1
Parent(s):
b279395
Renames ratio_unique_traces_per_trace --> ratio_variants_per_number_of_traces
Browse files- config_files/config_layout.json +1 -1
- config_files/experiment_real_targets.json +3 -3
- data/test/2_bpic_features.csv +1 -1
- gedi/config.py +1 -1
- gedi/features.py +1 -0
- gedi/utils/column_mappings.py +1 -1
- utils/config_fabric.py +1 -1
config_files/config_layout.json
CHANGED
@@ -33,7 +33,7 @@
|
|
33 |
{
|
34 |
"pipeline_step": "feature_extraction",
|
35 |
"input_path": "data/test",
|
36 |
-
"feature_params": {"feature_set": ["n_traces", "n_unique_traces", "
|
37 |
"output_path": "output/plots",
|
38 |
"real_eventlog_path": "data/BaselineED_feat.csv",
|
39 |
"plot_type": "boxplot"
|
|
|
33 |
{
|
34 |
"pipeline_step": "feature_extraction",
|
35 |
"input_path": "data/test",
|
36 |
+
"feature_params": {"feature_set": ["n_traces", "n_unique_traces", "ratio_variants_per_number_of_traces", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "eventropy_trace", "eventropy_prefix", "eventropy_prefix_flattened", "eventropy_global_block", "eventropy_global_block_flattened", "eventropy_lempel_ziv", "eventropy_lempel_ziv_flattened", "eventropy_k_block_diff_1", "eventropy_k_block_diff_3", "eventropy_k_block_diff_5", "eventropy_k_block_ratio_1", "eventropy_k_block_ratio_3", "eventropy_k_block_ratio_5", "eventropy_knn_3", "eventropy_knn_5", "eventropy_knn_7", "epa_variant_entropy", "epa_normalized_variant_entropy", "epa_sequence_entropy", "epa_normalized_sequence_entropy", "epa_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_linear_forgetting", "epa_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]},
|
37 |
"output_path": "output/plots",
|
38 |
"real_eventlog_path": "data/BaselineED_feat.csv",
|
39 |
"plot_type": "boxplot"
|
config_files/experiment_real_targets.json
CHANGED
@@ -24,8 +24,8 @@
|
|
24 |
},
|
25 |
{
|
26 |
"pipeline_step": "feature_extraction",
|
27 |
-
"input_path": "output/BaselineED_feat/
|
28 |
-
"input_path": "output/features/BaselineED_feat/
|
29 |
"feature_params": {"feature_set":["ratio_variants_per_number_of_traces","ratio_most_common_variant","ratio_top_10_variants","epa_normalized_variant_entropy","epa_normalized_sequence_entropy","epa_normalized_sequence_entropy_linear_forgetting","epa_normalized_sequence_entropy_exponential_forgetting"]},
|
30 |
"output_path": "output/plots",
|
31 |
"real_eventlog_path": "data/BaselineED_feat.csv",
|
@@ -34,7 +34,7 @@
|
|
34 |
{
|
35 |
"pipeline_step": "benchmark_test",
|
36 |
"benchmark_test": "discovery",
|
37 |
-
"input_path": "output/BaselineED_feat/
|
38 |
"output_path":"output",
|
39 |
"miners" : ["heu", "imf", "ilp"]
|
40 |
}
|
|
|
24 |
},
|
25 |
{
|
26 |
"pipeline_step": "feature_extraction",
|
27 |
+
"input_path": "output/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rvpnot/",
|
28 |
+
"input_path": "output/features/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rvpnot/",
|
29 |
"feature_params": {"feature_set":["ratio_variants_per_number_of_traces","ratio_most_common_variant","ratio_top_10_variants","epa_normalized_variant_entropy","epa_normalized_sequence_entropy","epa_normalized_sequence_entropy_linear_forgetting","epa_normalized_sequence_entropy_exponential_forgetting"]},
|
30 |
"output_path": "output/plots",
|
31 |
"real_eventlog_path": "data/BaselineED_feat.csv",
|
|
|
34 |
{
|
35 |
"pipeline_step": "benchmark_test",
|
36 |
"benchmark_test": "discovery",
|
37 |
+
"input_path": "output/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rvpnot/",
|
38 |
"output_path":"output",
|
39 |
"miners" : ["heu", "imf", "ilp"]
|
40 |
}
|
data/test/2_bpic_features.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
-
log,n_traces,n_unique_traces,
|
2 |
Sepsis_Cases_Event_Log,1050,846,0.805714285714285,3,185,14.48952380952381,13,8,11.470474925273926,131.57179501133788,9,16,7,12.281860759040903,1.7464004837799152,10.47731701485374,7.250526815880918,87.0376906898399,0.791639192292468,6.769403523350811,0.04861329147043401,0.005285190999476001,0.000575614861329,0.000209314495028,0.000104657247514,0.0,5.2328623757195225e-05,0.0,0.0,0.000104657247514,2.612850778156251,4.931206347805768,0.033333333333333,0.12,0.215238095238095,0.274285714285714,0.355238095238095,0.5971428571428571,0.7980952380952381,1.241134751773049,1.759408518249193,13.637101374069475,217.44268017168216,16,6,3383,950.875,788.0,1008.5815457239935,1017236.734375,101.75,1085.25,983.5,1.391238560701821,1.05777753209275,6,6,995,175.0,12.0,366.73787187399483,134496.66666666666,7.75,17.0,9.25,1.7883562472303312,1.199106773708694,14,2,393,75.0,32.5,112.91400014423114,12749.57142857143,14.0,53.5,39.5,2.004413358907822,2.500757934341361,9.334,10.227,14.501,1.7269999999999999,3.238,1.712,1.104,3.238,2.262,1.871,4.956,4.49,4.191,40624.49329803771,0.6957588422064961,76528.6794749776,0.5223430410751391,32139.284589305265,0.219365233602993,43880.53919110408,0.299504635939686,,,
|
3 |
CoSeLoG_WABO_1,937,916,0.9775880469583771,2,95,41.56243329775881,43,40,16.678023092416094,278.1564542711645,36,51,15,36.71275216938179,1.784073253119976,28.84499612652788,-0.16821637154603802,0.17918482321640303,0.40127638757174006,6.750635463329985,0.006311609919555001,0.009524793151329002,0.006311609919555001,0.014229811454998001,0.039820520765196,0.016869211966812,0.008147714623426,0.0037869659517330003,0.002065617791854,0.00045902617596700005,1.7771796608234571,2.353958246469541,0.009605122732123,0.032017075773746004,0.07043756670224101,0.11953041622198501,0.21771611526147203,0.511205976520811,0.7556029882604051,1.022925764192139,0.33126487599778903,19.52280427642022,422.82376078444236,381,1,937,102.21522309711285,15.0,193.12603388747905,37297.6649651077,3.0,81.0,78.0,2.463005335171609,5.5066536611772605,11,1,899,85.18181818181819,2.0,257.3832721066592,66246.14876033057,1.0,7.5,6.5,2.844783898567343,6.0957042298129664,101,1,292,9.277227722772277,2.0,31.163929012921322,971.1904715223994,1.0,5.0,4.0,7.672745189703872,64.72182800579148,9.806000000000001,13.867,18.357,3.2640000000000002,6.888,1.299,0.582,6.888,3.542,2.403,5.413,4.929,4.629,195166.2442745276,0.6466967918841,247624.8365497508,0.601566424410453,120536.03113478613,0.292823733970692,154887.76808660102,0.37627599125765404,18.361,3.276,13.885
|
|
|
1 |
+
log,n_traces,n_unique_traces,ratio_variants_per_number_of_traces,trace_len_min,trace_len_max,trace_len_mean,trace_len_median,trace_len_mode,trace_len_std,trace_len_variance,trace_len_q1,trace_len_q3,trace_len_iqr,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_skewness,trace_len_kurtosis,trace_len_coefficient_variation,trace_len_entropy,trace_len_hist1,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_hist10,trace_len_skewness_hist,trace_len_kurtosis_hist,ratio_most_common_variant,ratio_top_1_variants,ratio_top_5_variants,ratio_top_10_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_75_variants,mean_variant_occurrence,std_variant_occurrence,skewness_variant_occurrence,kurtosis_variant_occurrence,n_unique_activities,activities_min,activities_max,activities_mean,activities_median,activities_std,activities_variance,activities_q1,activities_q3,activities_iqr,activities_skewness,activities_kurtosis,n_unique_start_activities,start_activities_min,start_activities_max,start_activities_mean,start_activities_median,start_activities_std,start_activities_variance,start_activities_q1,start_activities_q3,start_activities_iqr,start_activities_skewness,start_activities_kurtosis,n_unique_end_activities,end_activities_min,end_activities_max,end_activities_mean,end_activities_median,end_activities_std,end_activities_variance,end_activities_q1,end_activities_q3,end_activities_iqr,end_activities_skewness,end_activities_kurtosis,eventropy_trace,eventropy_prefix,eventropy_global_block,eventropy_lempel_ziv,eventropy_k_block_diff_1,eventropy_k_block_diff_3,eventropy_k_block_diff_5,eventropy_k_block_ratio_1,eventropy_k_block_ratio_3,eventropy_k_block_ratio_5,eventropy_knn_3,eventropy_knn_5,eventropy_knn_7,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,eventropy_global_block_flattened,eventropy_lempel_ziv_flattened,eventropy_prefix_flattened
|
2 |
Sepsis_Cases_Event_Log,1050,846,0.805714285714285,3,185,14.48952380952381,13,8,11.470474925273926,131.57179501133788,9,16,7,12.281860759040903,1.7464004837799152,10.47731701485374,7.250526815880918,87.0376906898399,0.791639192292468,6.769403523350811,0.04861329147043401,0.005285190999476001,0.000575614861329,0.000209314495028,0.000104657247514,0.0,5.2328623757195225e-05,0.0,0.0,0.000104657247514,2.612850778156251,4.931206347805768,0.033333333333333,0.12,0.215238095238095,0.274285714285714,0.355238095238095,0.5971428571428571,0.7980952380952381,1.241134751773049,1.759408518249193,13.637101374069475,217.44268017168216,16,6,3383,950.875,788.0,1008.5815457239935,1017236.734375,101.75,1085.25,983.5,1.391238560701821,1.05777753209275,6,6,995,175.0,12.0,366.73787187399483,134496.66666666666,7.75,17.0,9.25,1.7883562472303312,1.199106773708694,14,2,393,75.0,32.5,112.91400014423114,12749.57142857143,14.0,53.5,39.5,2.004413358907822,2.500757934341361,9.334,10.227,14.501,1.7269999999999999,3.238,1.712,1.104,3.238,2.262,1.871,4.956,4.49,4.191,40624.49329803771,0.6957588422064961,76528.6794749776,0.5223430410751391,32139.284589305265,0.219365233602993,43880.53919110408,0.299504635939686,,,
|
3 |
CoSeLoG_WABO_1,937,916,0.9775880469583771,2,95,41.56243329775881,43,40,16.678023092416094,278.1564542711645,36,51,15,36.71275216938179,1.784073253119976,28.84499612652788,-0.16821637154603802,0.17918482321640303,0.40127638757174006,6.750635463329985,0.006311609919555001,0.009524793151329002,0.006311609919555001,0.014229811454998001,0.039820520765196,0.016869211966812,0.008147714623426,0.0037869659517330003,0.002065617791854,0.00045902617596700005,1.7771796608234571,2.353958246469541,0.009605122732123,0.032017075773746004,0.07043756670224101,0.11953041622198501,0.21771611526147203,0.511205976520811,0.7556029882604051,1.022925764192139,0.33126487599778903,19.52280427642022,422.82376078444236,381,1,937,102.21522309711285,15.0,193.12603388747905,37297.6649651077,3.0,81.0,78.0,2.463005335171609,5.5066536611772605,11,1,899,85.18181818181819,2.0,257.3832721066592,66246.14876033057,1.0,7.5,6.5,2.844783898567343,6.0957042298129664,101,1,292,9.277227722772277,2.0,31.163929012921322,971.1904715223994,1.0,5.0,4.0,7.672745189703872,64.72182800579148,9.806000000000001,13.867,18.357,3.2640000000000002,6.888,1.299,0.582,6.888,3.542,2.403,5.413,4.929,4.629,195166.2442745276,0.6466967918841,247624.8365497508,0.601566424410453,120536.03113478613,0.292823733970692,154887.76808660102,0.37627599125765404,18.361,3.276,13.885
|
gedi/config.py
CHANGED
@@ -19,7 +19,7 @@ def get_model_params_list(alg_json_file: str) :#-> list[dict]:
|
|
19 |
' Use a configuration from the `config_files`-folder together with the args `-a`.')
|
20 |
return [
|
21 |
{PIPELINE_STEP: 'feature_extraction', INPUT_PATH: 'data/test',
|
22 |
-
FEATURE_PARAMS: {FEATURE_SET: ['
|
23 |
'ratio_most_common_variant']},
|
24 |
OUTPUT_PATH: 'output/plots'}
|
25 |
]
|
|
|
19 |
' Use a configuration from the `config_files`-folder together with the args `-a`.')
|
20 |
return [
|
21 |
{PIPELINE_STEP: 'feature_extraction', INPUT_PATH: 'data/test',
|
22 |
+
FEATURE_PARAMS: {FEATURE_SET: ['ratio_variants_per_number_of_traces',
|
23 |
'ratio_most_common_variant']},
|
24 |
OUTPUT_PATH: 'output/plots'}
|
25 |
]
|
gedi/features.py
CHANGED
@@ -16,6 +16,7 @@ def get_sortby_parameter(elem):
|
|
16 |
number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
|
17 |
return number
|
18 |
|
|
|
19 |
class EventLogFile:
|
20 |
def __init__(self, filename, folder_path):
|
21 |
self.root_path: Path = Path(folder_path)
|
|
|
16 |
number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
|
17 |
return number
|
18 |
|
19 |
+
|
20 |
class EventLogFile:
|
21 |
def __init__(self, filename, folder_path):
|
22 |
self.root_path: Path = Path(folder_path)
|
gedi/utils/column_mappings.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
def column_mappings():
|
2 |
|
3 |
column_names_short = {
|
4 |
-
'
|
5 |
'rmcv': 'ratio_most_common_variant',
|
6 |
'tlcv': 'trace_len_coefficient_variation',
|
7 |
'mvo': 'mean_variant_occurrence',
|
|
|
1 |
def column_mappings():
|
2 |
|
3 |
column_names_short = {
|
4 |
+
'rvpnot': 'ratio_variants_per_number_of_traces',
|
5 |
'rmcv': 'ratio_most_common_variant',
|
6 |
'tlcv': 'trace_len_coefficient_variation',
|
7 |
'mvo': 'mean_variant_occurrence',
|
utils/config_fabric.py
CHANGED
@@ -174,7 +174,7 @@ def set_generator_experiments(generator_params):
|
|
174 |
#TODO: This code is duplicated. Should be moved and removed.
|
175 |
def column_mappings():
|
176 |
column_names_short = {
|
177 |
-
'
|
178 |
'rmcv': 'ratio_most_common_variant',
|
179 |
'tlcv': 'trace_len_coefficient_variation',
|
180 |
'mvo': 'mean_variant_occurrence',
|
|
|
174 |
#TODO: This code is duplicated. Should be moved and removed.
|
175 |
def column_mappings():
|
176 |
column_names_short = {
|
177 |
+
'rvpnot': 'ratio_variants_per_number_of_traces',
|
178 |
'rmcv': 'ratio_most_common_variant',
|
179 |
'tlcv': 'trace_len_coefficient_variation',
|
180 |
'mvo': 'mean_variant_occurrence',
|