Andrea Maldonado commited on
Commit
da8f162
Β·
2 Parent(s): 1dbd766 2579241

Merge branch 'bpm24' into 16-documentation-update-readme

Browse files

* bpm24: (31 commits)
Adds grid_2 experiments
Adds grid_2 experiments
Passes arg to grid experiments script
Fixes paths
Hot fix for column name change
wip experiment with real targets
Converts encoding
Updates test data
Adds grid experiments script test
no need legacy config file
Adds convert to same encoding
Updates validation data
Sorts for replicability
CI augmentation pipeline step
Updates data
CI for benchmarking
Sorts file list
splits compare output
CI pipeline step generation
Adds validation data
...

This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .github/workflows/test_gedi.yml +49 -9
  2. config_files/algorithm/augmentation.json +0 -12
  3. config_files/algorithm/experiment_real_targets.json +41 -0
  4. config_files/algorithm/experiment_test.json +1 -1
  5. config_files/algorithm/fix_24.json +0 -34
  6. config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enseef.json +1 -0
  7. config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enself.json +1 -0
  8. config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enve.json +1 -0
  9. config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rmcv.json +1 -0
  10. config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rt10v.json +1 -0
  11. config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rvpnot.json +1 -0
  12. config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enself.json +1 -0
  13. config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enve.json +1 -0
  14. config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rmcv.json +1 -0
  15. config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rt10v.json +1 -0
  16. config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rvpnot.json +1 -0
  17. config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_enve.json +1 -0
  18. config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rmcv.json +1 -0
  19. config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rt10v.json +1 -0
  20. config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rvpnot.json +1 -0
  21. config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rmcv.json +1 -0
  22. config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rt10v.json +1 -0
  23. config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rvpnot.json +1 -0
  24. config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rt10v.json +1 -0
  25. config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rvpnot.json +1 -0
  26. config_files/algorithm/grid_2obj/generator_grid_2objectives_rt10v_rvpnot.json +1 -0
  27. config_files/algorithm/pipeline_steps/augmentation.json +12 -0
  28. config_files/algorithm/{benchmark.json β†’ pipeline_steps/benchmark.json} +1 -1
  29. config_files/algorithm/{evaluation_plotter.json β†’ pipeline_steps/evaluation_plotter.json} +2 -2
  30. config_files/algorithm/{feature_extraction.json β†’ pipeline_steps/feature_extraction.json} +1 -1
  31. config_files/algorithm/{generation.json β†’ pipeline_steps/generation.json} +0 -0
  32. dashboard.py +0 -295
  33. data/2_bpic_features.csv +3 -0
  34. data/GenED_bench.csv +0 -0
  35. data/GenED_feat.csv +0 -0
  36. data/bpic_features.csv +1 -1
  37. data/grid_1obj/{grid_1objectives_rutpt.csv β†’ grid_1objectives_rvpnot.csv} +1 -1
  38. data/grid_2obj/{grid_2objectives_enve_rutpt.csv β†’ grid_2objectives_ense_rvpnot.csv} +1 -1
  39. data/grid_2obj/{grid_2objectives_enseef_rutpt.csv β†’ grid_2objectives_enseef_rvpnot.csv} +1 -1
  40. data/grid_2obj/{grid_2objectives_enself_rutpt.csv β†’ grid_2objectives_enself_rvpnot.csv} +1 -1
  41. data/grid_2obj/{grid_2objectives_rt10v_rutpt.csv β†’ grid_2objectives_enve_rvpnot.csv} +1 -1
  42. data/grid_2obj/{grid_2objectives_ense_rutpt.csv β†’ grid_2objectives_rmcv_rvpnot.csv} +1 -1
  43. data/grid_2obj/{grid_2objectives_rmcv_rutpt.csv β†’ grid_2objectives_rt10v_rvpnot.csv} +1 -1
  44. data/validation/2_ense_enseef_feat.csv +3 -0
  45. data/validation/genELexperiment1_04_02.json +1 -0
  46. data/validation/genELexperiment2_07_04.json +1 -0
  47. data/validation/test_benchmark.csv +3 -0
  48. data/validation/test_feat.csv +3 -0
  49. execute_grid_experiments.py +9 -5
  50. gedi/benchmark.py +4 -2
.github/workflows/test_gedi.yml CHANGED
@@ -29,10 +29,10 @@ jobs:
29
 
30
  - name: Run test
31
  run:
32
- python main.py -o config_files/options/baseline.json -a config_files/algorithm/feature_extraction.json
33
 
34
  - name: Compare output
35
- run: diff data/test_feat.csv data/test_feat.csv
36
 
37
  test_generation:
38
  runs-on: ubuntu-latest
@@ -58,10 +58,15 @@ jobs:
58
 
59
  - name: Run test
60
  run:
61
- python main.py -o config_files/options/baseline.json -a config_files/algorithm/generation.json
62
 
63
- - name: Compare output
64
- run: diff output/features/grid_feat/2_enself_rt20v/genELexperiment2_07_04.json output/features/grid_feat/2_enself_rt20v/genELexperiment2_07_04.json
 
 
 
 
 
65
 
66
  test_benchmark:
67
  runs-on: ubuntu-latest
@@ -83,10 +88,13 @@ jobs:
83
 
84
  - name: Run test
85
  run:
86
- python main.py -o config_files/options/baseline.json -a config_files/algorithm/benchmark.json
 
 
 
87
 
88
  - name: Compare output
89
- run: diff output/benchmark/test_benchmark.csv output/benchmark/test_benchmark.csv
90
 
91
  test_augmentation:
92
  runs-on: ubuntu-latest
@@ -108,7 +116,7 @@ jobs:
108
 
109
  - name: Run test
110
  run:
111
- python main.py -o config_files/options/baseline.json -a config_files/algorithm/augmentation.json
112
 
113
  test_evaluation-plotter:
114
  runs-on: ubuntu-latest
@@ -134,7 +142,7 @@ jobs:
134
 
135
  - name: Run test
136
  run:
137
- python main.py -o config_files/options/baseline.json -a config_files/algorithm/evaluation_plotter.json
138
 
139
  test_integration:
140
  runs-on: ubuntu-latest
@@ -161,3 +169,35 @@ jobs:
161
  - name: Run test
162
  run:
163
  python main.py -o config_files/options/baseline.json -a config_files/algorithm/experiment_test.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  - name: Run test
31
  run:
32
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/feature_extraction.json
33
 
34
  - name: Compare output
35
+ run: diff data/validation/test_feat.csv data/test_feat.csv
36
 
37
  test_generation:
38
  runs-on: ubuntu-latest
 
58
 
59
  - name: Run test
60
  run:
61
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/generation.json
62
 
63
+ - name: Compare output 1
64
+ run:
65
+ diff data/validation/genELexperiment2_07_04.json output/features/grid_feat/2_enself_rt20v/genELexperiment2_07_04.json
66
+
67
+ - name: Compare output 2
68
+ run:
69
+ diff data/validation/genELexperiment1_04_02.json output/features/grid_feat/2_enself_rt20v/genELexperiment1_04_02.json
70
 
71
  test_benchmark:
72
  runs-on: ubuntu-latest
 
88
 
89
  - name: Run test
90
  run:
91
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/benchmark.json
92
+
93
+ - name: Convert output and validation to same encoding
94
+ run: iconv -f UTF-8 -t ASCII output/benchmark/test_benchmark.csv > data/validation/test_benchmark.csv
95
 
96
  - name: Compare output
97
+ run: diff data/validation/test_benchmark.csv output/benchmark/test_benchmark.csv
98
 
99
  test_augmentation:
100
  runs-on: ubuntu-latest
 
116
 
117
  - name: Run test
118
  run:
119
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/augmentation.json
120
 
121
  test_evaluation-plotter:
122
  runs-on: ubuntu-latest
 
142
 
143
  - name: Run test
144
  run:
145
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/evaluation_plotter.json
146
 
147
  test_integration:
148
  runs-on: ubuntu-latest
 
169
  - name: Run test
170
  run:
171
  python main.py -o config_files/options/baseline.json -a config_files/algorithm/experiment_test.json
172
+
173
+ test_grid_experiments_script:
174
+ runs-on: ubuntu-latest
175
+
176
+ # Setting up a python envronment for the test script to run
177
+ steps:
178
+ - name: Checkout code
179
+ uses: actions/checkout@v4
180
+
181
+ - name: Set up Python
182
+ uses: actions/setup-python@v5
183
+ with:
184
+ python-version: 3.9
185
+
186
+ - name: Install dependencies
187
+ run: |
188
+ sudo apt-get install build-essential python3 python3-dev
189
+
190
+ - name: Install feeed
191
+ run: |
192
+ python -m pip install --upgrade pip
193
+ pip install .
194
+
195
+ - name: Run test
196
+ run:
197
+ python execute_grid_experiments.py config_files/algorithm/test
198
+
199
+ - name: Convert output and validation to same encoding
200
+ run: iconv -f UTF-8 -t ASCII output/features/generated/2_bpic_features/2_ense_enseef_feat.csv > data/validation/2_ense_enseef_feat.csv
201
+
202
+ - name: Compare output
203
+ run: diff data/validation/2_ense_enseef_feat.csv output/features/generated/2_bpic_features/2_ense_enseef_feat.csv
config_files/algorithm/augmentation.json DELETED
@@ -1,12 +0,0 @@
1
- [
2
- {
3
- "pipeline_step": "instance_augmentation",
4
- "augmentation_params":
5
- {
6
- "method":"SMOTE", "no_samples":20,
7
- "feature_selection": ["n_traces", "n_unique_traces", "ratio_unique_traces_per_trace", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "entropy_trace", "entropy_prefix", "entropy_global_block", "entropy_lempel_ziv", "entropy_k_block_diff_1", "entropy_k_block_diff_3", "entropy_k_block_diff_5", "entropy_k_block_ratio_1", "entropy_k_block_ratio_3", "entropy_k_block_ratio_5", "entropy_knn_3", "entropy_knn_5", "entropy_knn_7", "epa_variant_entropy", "epa_normalized_variant_entropy", "epa_sequence_entropy", "epa_normalized_sequence_entropy", "epa_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_linear_forgetting", "epa_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]
8
- },
9
- "input_path": "data/bpic_features.csv",
10
- "output_path": "output"
11
- }
12
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
config_files/algorithm/experiment_real_targets.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "pipeline_step": "event_logs_generation",
4
+ "output_path": "output",
5
+ "generator_params": {
6
+ "experiment": {
7
+ "input_path": "data/BaselineED_feat.csv",
8
+ "objectives":["ratio_variants_per_number_of_traces","ratio_most_common_variant","ratio_top_10_variants","epa_normalized_variant_entropy","epa_normalized_sequence_entropy","epa_normalized_sequence_entropy_linear_forgetting","epa_normalized_sequence_entropy_exponential_forgetting"]},
9
+ "config_space": {
10
+ "mode": [5, 20],
11
+ "sequence": [0.01, 1],
12
+ "choice": [0.01, 1],
13
+ "parallel": [0.01, 1],
14
+ "loop": [0.01, 1],
15
+ "silent": [0.01, 1],
16
+ "lt_dependency": [0.01, 1],
17
+ "num_traces": [10, 10001],
18
+ "duplicate": [0],
19
+ "or": [0]
20
+ },
21
+ "n_trials": 200,
22
+ "plot_reference_feature": ""
23
+ }
24
+ },
25
+ {
26
+ "pipeline_step": "feature_extraction",
27
+ "input_path": "output/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rutpt/",
28
+ "input_path": "output/features/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rutpt/",
29
+ "feature_params": {"feature_set":["ratio_variants_per_number_of_traces","ratio_most_common_variant","ratio_top_10_variants","epa_normalized_variant_entropy","epa_normalized_sequence_entropy","epa_normalized_sequence_entropy_linear_forgetting","epa_normalized_sequence_entropy_exponential_forgetting"]},
30
+ "output_path": "output/plots",
31
+ "real_eventlog_path": "data/BaselineED_feat.csv",
32
+ "plot_type": "boxplot"
33
+ },
34
+ {
35
+ "pipeline_step": "benchmark_test",
36
+ "benchmark_test": "discovery",
37
+ "input_path": "output/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rutpt/",
38
+ "output_path":"output",
39
+ "miners" : ["heu", "imf", "ilp"]
40
+ }
41
+ ]
config_files/algorithm/experiment_test.json CHANGED
@@ -47,6 +47,6 @@
47
  "benchmark_test": "discovery",
48
  "input_path":"data/test",
49
  "output_path":"output",
50
- "miners" : ["inductive", "heuristics", "imf", "ilp"]
51
  }
52
  ]
 
47
  "benchmark_test": "discovery",
48
  "input_path":"data/test",
49
  "output_path":"output",
50
+ "miners" : ["inductive", "heu", "imf", "ilp"]
51
  }
52
  ]
config_files/algorithm/fix_24.json DELETED
@@ -1,34 +0,0 @@
1
- [
2
- {
3
- "pipeline_step": "event_logs_generation",
4
- "output_path":"data/generated",
5
- "generator_params": {
6
- "objectives": {
7
- "normalized_sequence_entropy_linear_forgetting": 0.05,
8
- "ratio_top_20_variants": 0.4
9
- },
10
- "config_space": {
11
- "mode": [5, 40],
12
- "sequence": [0.01, 1],
13
- "choice": [0.01, 1],
14
- "parallel": [0.01, 1],
15
- "loop": [0.01, 1],
16
- "silent": [0.01, 1],
17
- "lt_dependency": [0.01, 1],
18
- "num_traces": [100, 1001],
19
- "duplicate": [0],
20
- "or": [0]
21
- },
22
- "n_trials": 20
23
- }
24
- },
25
- {
26
- "pipeline_step": "feature_extraction",
27
- "input_path": "data/generated",
28
- "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "entropies", "complexity"]},
29
- "feature_params": {"feature_set":["trace_length"]},
30
- "output_path": "output/plots",
31
- "real_eventlog_path": "data/log_meta_features.csv",
32
- "plot_type": "boxplot"
33
- }
34
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enseef.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_ense_enseef.csv", "objectives": ["epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_ense_enseef/2_ense_enseef", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_ense_enseef/2_ense_enseef", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enself.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_ense_enself.csv", "objectives": ["epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_ense_enself/2_ense_enself", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_ense_enself/2_ense_enself", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enve.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_ense_enve.csv", "objectives": ["epa_normalized_sequence_entropy", "epa_normalized_variant_entropy"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_ense_enve/2_ense_enve", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_ense_enve/2_ense_enve", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rmcv.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_ense_rmcv.csv", "objectives": ["epa_normalized_sequence_entropy", "ratio_most_common_variant"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_ense_rmcv/2_ense_rmcv", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_ense_rmcv/2_ense_rmcv", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rt10v.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_ense_rt10v.csv", "objectives": ["epa_normalized_sequence_entropy", "ratio_top_10_variants"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_ense_rt10v/2_ense_rt10v", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_ense_rt10v/2_ense_rt10v", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rvpnot.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_ense_rvpnot.csv", "objectives": ["epa_normalized_sequence_entropy", "ratio_variants_per_number_of_traces"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_ense_rvpnot/2_ense_rvpnot", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_ense_rvpnot/2_ense_rvpnot", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enself.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enseef_enself.csv", "objectives": ["epa_normalized_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_linear_forgetting"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enseef_enself/2_enseef_enself", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enseef_enself/2_enseef_enself", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enve.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enseef_enve.csv", "objectives": ["epa_normalized_sequence_entropy_exponential_forgetting", "epa_normalized_variant_entropy"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enseef_enve/2_enseef_enve", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enseef_enve/2_enseef_enve", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rmcv.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enseef_rmcv.csv", "objectives": ["epa_normalized_sequence_entropy_exponential_forgetting", "ratio_most_common_variant"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enseef_rmcv/2_enseef_rmcv", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enseef_rmcv/2_enseef_rmcv", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rt10v.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enseef_rt10v.csv", "objectives": ["epa_normalized_sequence_entropy_exponential_forgetting", "ratio_top_10_variants"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enseef_rt10v/2_enseef_rt10v", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enseef_rt10v/2_enseef_rt10v", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rvpnot.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enseef_rvpnot.csv", "objectives": ["epa_normalized_sequence_entropy_exponential_forgetting", "ratio_variants_per_number_of_traces"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enseef_rvpnot/2_enseef_rvpnot", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enseef_rvpnot/2_enseef_rvpnot", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_enve.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enself_enve.csv", "objectives": ["epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_variant_entropy"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enself_enve/2_enself_enve", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enself_enve/2_enself_enve", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rmcv.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enself_rmcv.csv", "objectives": ["epa_normalized_sequence_entropy_linear_forgetting", "ratio_most_common_variant"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enself_rmcv/2_enself_rmcv", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enself_rmcv/2_enself_rmcv", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rt10v.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enself_rt10v.csv", "objectives": ["epa_normalized_sequence_entropy_linear_forgetting", "ratio_top_10_variants"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enself_rt10v/2_enself_rt10v", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enself_rt10v/2_enself_rt10v", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rvpnot.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enself_rvpnot.csv", "objectives": ["epa_normalized_sequence_entropy_linear_forgetting", "ratio_variants_per_number_of_traces"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enself_rvpnot/2_enself_rvpnot", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enself_rvpnot/2_enself_rvpnot", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rmcv.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enve_rmcv.csv", "objectives": ["epa_normalized_variant_entropy", "ratio_most_common_variant"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enve_rmcv/2_enve_rmcv", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enve_rmcv/2_enve_rmcv", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rt10v.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enve_rt10v.csv", "objectives": ["epa_normalized_variant_entropy", "ratio_top_10_variants"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enve_rt10v/2_enve_rt10v", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enve_rt10v/2_enve_rt10v", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rvpnot.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enve_rvpnot.csv", "objectives": ["epa_normalized_variant_entropy", "ratio_variants_per_number_of_traces"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enve_rvpnot/2_enve_rvpnot", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enve_rvpnot/2_enve_rvpnot", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rt10v.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_rmcv_rt10v.csv", "objectives": ["ratio_most_common_variant", "ratio_top_10_variants"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_rmcv_rt10v/2_rmcv_rt10v", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_rmcv_rt10v/2_rmcv_rt10v", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rvpnot.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_rmcv_rvpnot.csv", "objectives": ["ratio_most_common_variant", "ratio_variants_per_number_of_traces"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_rmcv_rvpnot/2_rmcv_rvpnot", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_rmcv_rvpnot/2_rmcv_rvpnot", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/grid_2obj/generator_grid_2objectives_rt10v_rvpnot.json ADDED
@@ -0,0 +1 @@
 
 
1
+ [{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_rt10v_rvpnot.csv", "objectives": ["ratio_top_10_variants", "ratio_variants_per_number_of_traces"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_rt10v_rvpnot/2_rt10v_rvpnot", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_rt10v_rvpnot/2_rt10v_rvpnot", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
config_files/algorithm/pipeline_steps/augmentation.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "pipeline_step": "instance_augmentation",
4
+ "augmentation_params":
5
+ {
6
+ "method":"SMOTE", "no_samples":20,
7
+ "feature_selection": ["n_traces", "n_unique_traces", "ratio_variants_per_number_of_traces", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "entropy_trace", "entropy_prefix", "entropy_global_block", "entropy_lempel_ziv", "entropy_k_block_diff_1", "entropy_k_block_diff_3", "entropy_k_block_diff_5", "entropy_k_block_ratio_1", "entropy_k_block_ratio_3", "entropy_k_block_ratio_5", "entropy_knn_3", "entropy_knn_5", "entropy_knn_7", "epa_variant_entropy", "epa_normalized_variant_entropy", "epa_sequence_entropy", "epa_normalized_sequence_entropy", "epa_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_linear_forgetting", "epa_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]
8
+ },
9
+ "input_path": "data/bpic_features.csv",
10
+ "output_path": "output"
11
+ }
12
+ ]
config_files/algorithm/{benchmark.json β†’ pipeline_steps/benchmark.json} RENAMED
@@ -4,6 +4,6 @@
4
  "benchmark_test": "discovery",
5
  "input_path":"data/test",
6
  "output_path":"output",
7
- "miners" : ["inductive", "heuristics", "imf", "ilp"]
8
  }
9
  ]
 
4
  "benchmark_test": "discovery",
5
  "input_path":"data/test",
6
  "output_path":"output",
7
+ "miners" : ["inductive", "heu", "imf", "ilp"]
8
  }
9
  ]
config_files/algorithm/{evaluation_plotter.json β†’ pipeline_steps/evaluation_plotter.json} RENAMED
@@ -1,7 +1,7 @@
1
  [
2
  {
3
  "pipeline_step": "evaluation_plotter",
4
- "input_path": "output/features/generated/34_bpic_features/",
5
  "input_path": "output/features/generated/grid_2obj/",
6
  "input_path": ["output/features/generated/grid_1obj/", "output/features/generated/grid_2obj/"],
7
  "input_path": "output/features/generated/grid_1obj/1_enve_feat.csv",
@@ -9,7 +9,7 @@
9
  "reference_feature": "epa_normalized_sequence_entropy",
10
  "reference_feature": "epa_normalized_sequence_entropy_exponential_forgetting",
11
  "reference_feature": "epa_normalized_variant_entropy",
12
- "targets": "data/34_bpic_features.csv",
13
  "targets": "data/grid_experiments/grid_2obj/",
14
  "targets": ["data/grid_experiments/grid_1obj/", "data/grid_experiments/grid_2obj/"],
15
  "targets": "data/grid_experiments/grid_1obj/grid_1objectives_enve.csv",
 
1
  [
2
  {
3
  "pipeline_step": "evaluation_plotter",
4
+ "input_path": "output/features/generated/BaselineED_feat/",
5
  "input_path": "output/features/generated/grid_2obj/",
6
  "input_path": ["output/features/generated/grid_1obj/", "output/features/generated/grid_2obj/"],
7
  "input_path": "output/features/generated/grid_1obj/1_enve_feat.csv",
 
9
  "reference_feature": "epa_normalized_sequence_entropy",
10
  "reference_feature": "epa_normalized_sequence_entropy_exponential_forgetting",
11
  "reference_feature": "epa_normalized_variant_entropy",
12
+ "targets": "data/BaselineED_feat.csv",
13
  "targets": "data/grid_experiments/grid_2obj/",
14
  "targets": ["data/grid_experiments/grid_1obj/", "data/grid_experiments/grid_2obj/"],
15
  "targets": "data/grid_experiments/grid_1obj/grid_1objectives_enve.csv",
config_files/algorithm/{feature_extraction.json β†’ pipeline_steps/feature_extraction.json} RENAMED
@@ -2,7 +2,7 @@
2
  {
3
  "pipeline_step": "feature_extraction",
4
  "input_path": "data/test",
5
- "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
6
  "output_path": "output/plots",
7
  "real_eventlog_path": "data/BaselineED_feat.csv",
8
  "plot_type": "boxplot",
 
2
  {
3
  "pipeline_step": "feature_extraction",
4
  "input_path": "data/test",
5
+ "feature_params": {"feature_set":["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]},
6
  "output_path": "output/plots",
7
  "real_eventlog_path": "data/BaselineED_feat.csv",
8
  "plot_type": "boxplot",
config_files/algorithm/{generation.json β†’ pipeline_steps/generation.json} RENAMED
File without changes
dashboard.py DELETED
@@ -1,295 +0,0 @@
1
- from copy import deepcopy
2
- from meta_feature_extraction.simple_stats import simple_stats
3
- from meta_feature_extraction.trace_length import trace_length
4
- from meta_feature_extraction.trace_variant import trace_variant
5
- from meta_feature_extraction.activities import activities
6
- from meta_feature_extraction.start_activities import start_activities
7
- from meta_feature_extraction.end_activities import end_activities
8
- from meta_feature_extraction.entropies import entropies
9
- from pm4py import discover_petri_net_inductive as inductive_miner
10
- from pm4py import generate_process_tree
11
- from pm4py import save_vis_petri_net, save_vis_process_tree
12
- from pm4py.algo.filtering.log.variants import variants_filter
13
- from pm4py.algo.simulation.tree_generator import algorithm as tree_generator
14
- from pm4py.algo.simulation.playout.process_tree import algorithm as playout
15
- from pm4py.objects.conversion.log import converter as log_converter
16
- from pm4py.objects.log.exporter.xes import exporter as xes_exporter
17
- from pm4py.objects.log.importer.xes import importer as xes_importer
18
- from pm4py.objects.log.util import dataframe_utils
19
- from pm4py.sim import play_out
20
-
21
- import matplotlib.image as mpimg
22
- import os
23
- import pandas as pd
24
- import streamlit as st
25
-
26
- OUTPUT_PATH = "output"
27
- SAMPLE_EVENTS = 500
28
-
29
- @st.cache(allow_output_mutation=True)
30
- def load_from_xes(uploaded_file):
31
- bytes_data = uploaded_file.getvalue()
32
- log1 = xes_importer.deserialize(bytes_data)
33
- get_stats(log1)
34
- return log1
35
-
36
- @st.cache
37
- def load_from_csv(uploaded_file, sep):
38
- if uploaded_file is not None:
39
- df = pd.read_csv(uploaded_file, sep=sep, index_col=False)
40
- return df
41
-
42
- def get_stats(log, save=True):
43
- """Returns the statistics of an event log."""
44
- num_traces = len(log)
45
- num_events = sum([len(c) for c in log])
46
- num_utraces = len(variants_filter.get_variants(log))
47
- if save:
48
- st.session_state["num_traces"] = num_traces
49
- st.session_state["num_events"] = num_events
50
- st.session_state["num_utraces"] = num_utraces
51
- return num_utraces, num_traces, num_events
52
-
53
- #@st.cache
54
- def df_to_log(df, case_id, activity, timestamp):
55
- df.rename(columns={case_id: 'case:concept:name',
56
- activity: 'concept:name',
57
- timestamp: "time:timestamp"}, inplace=True)
58
- temp = dataframe_utils.convert_timestamp_columns_in_df(df)
59
- #temp = temp.sort_values(timestamp)
60
- log = log_converter.apply(temp)
61
- return log, 'concept:name', "time:timestamp"
62
-
63
- def read_uploaded_file(uploaded_file):
64
- extension = uploaded_file.name.split('.')[-1]
65
- log_name = uploaded_file.name.split('.')[-2]
66
-
67
- st.sidebar.write("Loaded ", extension.upper(), '-File: ', uploaded_file.name)
68
- if extension == "xes":
69
- event_log = load_from_xes(uploaded_file)
70
- log_columns = [*list(event_log[0][0].keys())]
71
- convert_button = False
72
- case_id = "case:concept:name"
73
- activity = "concept:name"
74
- timestamp = "time:timestamp"
75
- default_act_id = log_columns.index("concept:name")
76
- default_tst_id = log_columns.index("time:timestamp")
77
-
78
- event_df = log_converter.apply(event_log, variant=log_converter.Variants.TO_DATA_FRAME)
79
- df_path = OUTPUT_PATH+"/"+log_name+".csv"
80
- event_df.to_csv(df_path, sep =";", index=False)
81
- return event_log, event_df, case_id, activity
82
-
83
- elif extension == "csv":
84
- sep = st.sidebar.text_input("Columns separator", ";")
85
- event_df = load_from_csv(uploaded_file, sep)
86
- old_df = deepcopy(event_df)
87
- log_columns = event_df.columns
88
-
89
- case_id = st.sidebar.selectbox("Choose 'case' column:", log_columns)
90
- activity = st.sidebar.selectbox("Choose 'activity' column:", log_columns, index=0)
91
- timestamp = st.sidebar.selectbox("Choose 'timestamp' column:", log_columns, index=0)
92
-
93
- convert_button = st.sidebar.button('Confirm selection')
94
- if convert_button:
95
- temp = deepcopy(event_df)
96
- event_log, activity, timestamp = df_to_log(temp, case_id, activity, timestamp)
97
- #xes_exporter.apply(event_log, INPUT_XES)
98
- log_columns = [*list(event_log[0][0].keys())]
99
- st.session_state['log'] = event_log
100
- return event_log, event_df, case_id, activity
101
-
102
- def sample_log_traces(complete_log, sample_size):
103
- '''
104
- Samples random traces out of logs.
105
- So that number of events is slightly over SAMPLE_SIZE.
106
- :param complete_log: Log extracted from xes
107
- '''
108
-
109
- log_traces = variants_filter.get_variants(complete_log)
110
- keys = list(log_traces.keys())
111
- sample_traces = {}
112
- num_evs = 0
113
- while num_evs < sample_size:
114
- if len(keys) == 0:
115
- break
116
- random_trace = keys.pop()
117
- sample_traces[random_trace] = log_traces[random_trace]
118
- evs = sum([len(case_id) for case_id in sample_traces[random_trace]])
119
- num_evs += evs
120
- log1 = variants_filter.apply(complete_log, sample_traces)
121
- return log1
122
-
123
- def show_process_petrinet(event_log, filter_info, OUTPUT_PATH):
124
- OUTPUT_PLOT = f"{OUTPUT_PATH}_{filter_info}".replace(":","").replace(".","")+".png" # OUTPUT_PATH is OUTPUT_PATH+INPUT_FILE
125
-
126
- try:
127
- fig_pt = mpimg.imread(OUTPUT_PLOT)
128
- st.write("Loaded from memory")
129
- except FileNotFoundError:
130
- net, im, fm = inductive_miner(event_log)
131
- # parameters={heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99,
132
- # pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"})
133
- #parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"}
134
- save_vis_petri_net(net, im, fm, OUTPUT_PLOT)
135
- st.write("Saved in: ", OUTPUT_PLOT)
136
- fig_pt = mpimg.imread(OUTPUT_PLOT)
137
- st.image(fig_pt)
138
-
139
- def show_loaded_event_log(event_log, event_df):
140
- get_stats(event_log)
141
- st.write("### Loaded event-log")
142
- col1, col2 = st.columns(2)
143
- with col2:
144
- st.dataframe(event_df)
145
- with col1:
146
- show_process_petrinet(event_log, None, OUTPUT_PATH+"running-example")
147
-
148
- def extract_meta_features(log, log_name):
149
- mtf_cols = ["log", "n_traces", "n_unique_traces", "ratio_unique_traces_per_trace", "n_events", "trace_len_min", "trace_len_max",
150
- "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1",
151
- "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean",
152
- "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1",
153
- "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7",
154
- "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist",
155
- "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants",
156
- "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence",
157
- "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median",
158
- "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness",
159
- "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean",
160
- "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3",
161
- "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min",
162
- "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance",
163
- "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "entropy_trace",
164
- "entropy_prefix", "entropy_global_block", "entropy_lempel_ziv", "entropy_k_block_diff_1", "entropy_k_block_diff_3",
165
- "entropy_k_block_diff_5", "entropy_k_block_ratio_1", "entropy_k_block_ratio_3", "entropy_k_block_ratio_5", "entropy_knn_3",
166
- "entropy_knn_5", "entropy_knn_7"]
167
- features = [log_name]
168
- features.extend(simple_stats(log))
169
- features.extend(trace_length(log))
170
- features.extend(trace_variant(log))
171
- features.extend(activities(log))
172
- features.extend(start_activities(log))
173
- features.extend(end_activities(log))
174
- features.extend(entropies(log_name, OUTPUT_PATH))
175
-
176
- mtf = pd.DataFrame([features], columns=mtf_cols)
177
-
178
- st.dataframe(mtf)
179
- return mtf
180
-
181
- def generate_pt(mtf):
182
- OUTPUT_PLOT = f"{OUTPUT_PATH}/generated_pt".replace(":","").replace(".","")#+".png" # OUTPUT_PATH is OUTPUT_PATH+INPUT_FILE
183
-
184
- st.write("### PT Gen configurations")
185
- col1, col2, col3, col4, col5, col6 = st.columns(6)
186
- with col1:
187
- param_mode = st.text_input('Mode', str(round(mtf['activities_median'].iat[0]))) #?
188
- st.write("Sum of probabilities must be one")
189
- with col2:
190
- param_min = st.text_input('Min', str(mtf['activities_min'].iat[0]))
191
- param_seq = st.text_input('Probability Sequence', 0.25)
192
- with col3:
193
- param_max = st.text_input('Max', str(mtf['activities_max'].iat[0]))
194
- param_cho = st.text_input('Probability Choice (XOR)', 0.25)
195
- with col4:
196
- param_nmo = st.text_input('Number of models', 1)
197
- param_par = st.text_input('Probability Parallel', 0.25)
198
- with col5:
199
- param_dup = st.text_input('Duplicates', 0)
200
- param_lop = st.text_input('Probability Loop', 0.25)
201
- with col6:
202
- param_sil = st.text_input('Silent', 0.2)
203
- param_or = st.text_input('Probability Or', 0.0)
204
-
205
- PT_PARAMS = {tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MODE: round(float(param_mode)), #most frequent number of visible activities
206
- tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MIN: int(param_min), #minimum number of visible activities
207
- tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MAX: int(param_max), #maximum number of visible activities
208
- tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.SEQUENCE: float(param_seq), #probability to add a sequence operator to tree
209
- tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.CHOICE: float(param_cho), #probability to add a choice (XOR) operator to tree
210
- tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.PARALLEL: float(param_par), #probability to add a parallel operator to tree
211
- tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.LOOP: float(param_lop), #probability to add a loop operator to tree
212
- tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.OR: float(param_or), #probability to add an or operator to tree
213
- tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.SILENT: float(param_sil), #probability to add silent activity to a choice or loop operator
214
- tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.DUPLICATE: int(param_dup), #probability to duplicate an activity label
215
- tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.NO_MODELS: int(param_nmo)} #number of trees to generate from model population
216
-
217
- process_tree = generate_process_tree(parameters=PT_PARAMS)
218
- save_vis_process_tree(process_tree, OUTPUT_PLOT+"_tree.png")
219
-
220
- st.write("### Playout configurations")
221
-
222
- param_ntraces = st.text_input('Number of traces', str(mtf['n_traces'].iat[0]))
223
- PO_PARAMS = {playout.Variants.BASIC_PLAYOUT.value.Parameters.NO_TRACES : int(param_ntraces)}
224
-
225
- ptgen_log = play_out(process_tree, parameters=PO_PARAMS)
226
-
227
- net, im, fm = inductive_miner(ptgen_log)
228
- save_vis_petri_net(net, im, fm, OUTPUT_PLOT+".png")
229
- st.write("Saved in: ", OUTPUT_PLOT)
230
- fig_pt_net = mpimg.imread(OUTPUT_PLOT+".png")
231
- fig_pt_tree = mpimg.imread(OUTPUT_PLOT+"_tree.png")
232
-
233
- fcol1, fcol2 = st.columns(2)
234
- with fcol1:
235
- st.image(fig_pt_tree)
236
- with fcol2:
237
- st.image(fig_pt_net)
238
- extract_meta_features(ptgen_log, "gen_pt")
239
-
240
-
241
- if __name__ == '__main__':
242
- st.set_page_config(layout='wide')
243
- """
244
- # Event Log Generator
245
- """
246
- start_options = ['Event-Log', 'Meta-features']
247
- start_preference = st.sidebar.selectbox("Do you want to start with a log or with metafeatures?", start_options,0)
248
- #lets_start = st.sidebar.button("Let's start with "+start_preference+'!')
249
-
250
- if start_preference==start_options[0]:
251
- st.sidebar.write("Upload a dataset in csv or xes-format:")
252
- uploaded_file = st.sidebar.file_uploader("Pick a logfile")
253
-
254
- bar = st.progress(0)
255
-
256
- os.makedirs(OUTPUT_PATH, exist_ok=True)
257
- event_log = st.session_state['log'] if "log" in st.session_state else None
258
- if uploaded_file:
259
- event_log, event_df, case_id, activity_id = read_uploaded_file(uploaded_file)
260
- #event_log = deepcopy(event_log)
261
-
262
- use_sample = st.sidebar.checkbox('Use random sample', True)
263
- if use_sample:
264
- sample_size = st.sidebar.text_input('Sample size of approx number of events', str(SAMPLE_EVENTS))
265
- sample_size = int(sample_size)
266
-
267
- event_log = sample_log_traces(event_log, sample_size)
268
- sample_cases = [event_log[i].attributes['concept:name'] for i in range(0, len(event_log))]
269
- event_df = event_df[event_df[case_id].isin(sample_cases)]
270
-
271
- show_loaded_event_log(event_log, event_df)
272
- ext_mtf = extract_meta_features(event_log, "running-example")
273
- generate_pt(ext_mtf)
274
-
275
- elif start_preference==start_options[1]:
276
- LOG_COL = 'log'
277
- st.sidebar.write("Upload a dataset in csv-format")
278
- uploaded_file = st.sidebar.file_uploader("Pick a file containing meta-features")
279
-
280
- bar = st.progress(0)
281
-
282
- os.makedirs(OUTPUT_PATH, exist_ok=True)
283
- event_log = st.session_state[LOG_COL] if "log" in st.session_state else None
284
- if uploaded_file:
285
- sep = st.sidebar.text_input("Columns separator", ";")
286
- mtf = load_from_csv(uploaded_file, sep)
287
- st.dataframe(mtf)
288
-
289
- log_options = mtf['log'].unique()
290
- log_preference = st.selectbox("What log should we use for generating a new event-log?", log_options,1)
291
- mtf_selection = mtf[mtf[LOG_COL]==log_preference]
292
- generate_pt(mtf_selection)
293
- st.write("##### Original")
294
- st.write(mtf_selection)
295
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data/2_bpic_features.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ log,n_traces,n_unique_traces,ratio_unique_traces_per_trace,trace_len_min,trace_len_max,trace_len_mean,trace_len_median,trace_len_mode,trace_len_std,trace_len_variance,trace_len_q1,trace_len_q3,trace_len_iqr,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_skewness,trace_len_kurtosis,trace_len_coefficient_variation,trace_len_entropy,trace_len_hist1,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_hist10,trace_len_skewness_hist,trace_len_kurtosis_hist,ratio_most_common_variant,ratio_top_1_variants,ratio_top_5_variants,ratio_top_10_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_75_variants,mean_variant_occurrence,std_variant_occurrence,skewness_variant_occurrence,kurtosis_variant_occurrence,n_unique_activities,activities_min,activities_max,activities_mean,activities_median,activities_std,activities_variance,activities_q1,activities_q3,activities_iqr,activities_skewness,activities_kurtosis,n_unique_start_activities,start_activities_min,start_activities_max,start_activities_mean,start_activities_median,start_activities_std,start_activities_variance,start_activities_q1,start_activities_q3,start_activities_iqr,start_activities_skewness,start_activities_kurtosis,n_unique_end_activities,end_activities_min,end_activities_max,end_activities_mean,end_activities_median,end_activities_std,end_activities_variance,end_activities_q1,end_activities_q3,end_activities_iqr,end_activities_skewness,end_activities_kurtosis,eventropy_trace,eventropy_prefix,eventropy_global_block,eventropy_lempel_ziv,eventropy_k_block_diff_1,eventropy_k_block_diff_3,eventropy_k_block_diff_5,eventropy_k_block_ratio_1,eventropy_k_block_ratio_3,eventropy_k_block_ratio_5,eventropy_knn_3,eventropy_knn_5,eventropy_knn_7,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,eventropy_global_block_flattened,eventropy_lempel_ziv_flattened,eventropy_prefix_flattened
2
+ Sepsis_Cases_Event_Log,1050,846,0.805714285714285,3,185,14.48952380952381,13,8,11.470474925273926,131.57179501133788,9,16,7,12.281860759040903,1.7464004837799152,10.47731701485374,7.250526815880918,87.0376906898399,0.791639192292468,6.769403523350811,0.04861329147043401,0.005285190999476001,0.000575614861329,0.000209314495028,0.000104657247514,0.0,5.2328623757195225e-05,0.0,0.0,0.000104657247514,2.612850778156251,4.931206347805768,0.033333333333333,0.12,0.215238095238095,0.274285714285714,0.355238095238095,0.5971428571428571,0.7980952380952381,1.241134751773049,1.759408518249193,13.637101374069475,217.44268017168216,16,6,3383,950.875,788.0,1008.5815457239935,1017236.734375,101.75,1085.25,983.5,1.391238560701821,1.05777753209275,6,6,995,175.0,12.0,366.73787187399483,134496.66666666666,7.75,17.0,9.25,1.7883562472303312,1.199106773708694,14,2,393,75.0,32.5,112.91400014423114,12749.57142857143,14.0,53.5,39.5,2.004413358907822,2.500757934341361,9.334,10.227,14.501,1.7269999999999999,3.238,1.712,1.104,3.238,2.262,1.871,4.956,4.49,4.191,40624.49329803771,0.6957588422064961,76528.6794749776,0.5223430410751391,32139.284589305265,0.219365233602993,43880.53919110408,0.299504635939686,,,
3
+ CoSeLoG_WABO_1,937,916,0.9775880469583771,2,95,41.56243329775881,43,40,16.678023092416094,278.1564542711645,36,51,15,36.71275216938179,1.784073253119976,28.84499612652788,-0.16821637154603802,0.17918482321640303,0.40127638757174006,6.750635463329985,0.006311609919555001,0.009524793151329002,0.006311609919555001,0.014229811454998001,0.039820520765196,0.016869211966812,0.008147714623426,0.0037869659517330003,0.002065617791854,0.00045902617596700005,1.7771796608234571,2.353958246469541,0.009605122732123,0.032017075773746004,0.07043756670224101,0.11953041622198501,0.21771611526147203,0.511205976520811,0.7556029882604051,1.022925764192139,0.33126487599778903,19.52280427642022,422.82376078444236,381,1,937,102.21522309711285,15.0,193.12603388747905,37297.6649651077,3.0,81.0,78.0,2.463005335171609,5.5066536611772605,11,1,899,85.18181818181819,2.0,257.3832721066592,66246.14876033057,1.0,7.5,6.5,2.844783898567343,6.0957042298129664,101,1,292,9.277227722772277,2.0,31.163929012921322,971.1904715223994,1.0,5.0,4.0,7.672745189703872,64.72182800579148,9.806000000000001,13.867,18.357,3.2640000000000002,6.888,1.299,0.582,6.888,3.542,2.403,5.413,4.929,4.629,195166.2442745276,0.6466967918841,247624.8365497508,0.601566424410453,120536.03113478613,0.292823733970692,154887.76808660102,0.37627599125765404,18.361,3.276,13.885
data/GenED_bench.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/GenED_feat.csv CHANGED
The diff for this file is too large to render. See raw diff
 
data/bpic_features.csv CHANGED
@@ -1,4 +1,4 @@
1
- log,n_traces,n_unique_traces,ratio_unique_traces_per_trace,trace_len_min,trace_len_max,trace_len_mean,trace_len_median,trace_len_mode,trace_len_std,trace_len_variance,trace_len_q1,trace_len_q3,trace_len_iqr,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_skewness,trace_len_kurtosis,trace_len_coefficient_variation,trace_len_entropy,trace_len_hist1,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_hist10,trace_len_skewness_hist,trace_len_kurtosis_hist,ratio_most_common_variant,ratio_top_1_variants,ratio_top_5_variants,ratio_top_10_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_75_variants,mean_variant_occurrence,std_variant_occurrence,skewness_variant_occurrence,kurtosis_variant_occurrence,n_unique_activities,activities_min,activities_max,activities_mean,activities_median,activities_std,activities_variance,activities_q1,activities_q3,activities_iqr,activities_skewness,activities_kurtosis,n_unique_start_activities,start_activities_min,start_activities_max,start_activities_mean,start_activities_median,start_activities_std,start_activities_variance,start_activities_q1,start_activities_q3,start_activities_iqr,start_activities_skewness,start_activities_kurtosis,n_unique_end_activities,end_activities_min,end_activities_max,end_activities_mean,end_activities_median,end_activities_std,end_activities_variance,end_activities_q1,end_activities_q3,end_activities_iqr,end_activities_skewness,end_activities_kurtosis,entropy_trace,entropy_prefix,entropy_global_block,entropy_lempel_ziv,entropy_k_block_diff_1,entropy_k_block_diff_3,entropy_k_block_diff_5,entropy_k_block_ratio_1,entropy_k_block_ratio_3,entropy_k_block_ratio_5,entropy_knn_3,entropy_knn_5,entropy_knn_7,Log Nature,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,accumulated_time_time_min,accumulated_time_time_max,accumulated_time_time_mean,accumulated_time_time_median,accumulated_time_time_mode,accumulated_time_time_std,accumulated_time_time_variance,accumulated_time_time_q1,accumulated_time_time_q3,accumulated_time_time_iqr,accumulated_time_time_geometric_mean,accumulated_time_time_geometric_std,accumulated_time_time_harmonic_mean,accumulated_time_time_skewness,accumulated_time_time_kurtosis,accumulated_time_time_coefficient_variation,accumulated_time_time_entropy,accumulated_time_time_skewness_hist,accumulated_time_time_kurtosis_hist,execution_time_time_min,execution_time_time_max,execution_time_time_mean,execution_time_time_median,execution_time_time_mode,execution_time_time_std,execution_time_time_variance,execution_time_time_q1,execution_time_time_q3,execution_time_time_iqr,execution_time_time_geometric_mean,execution_time_time_geometric_std,execution_time_time_harmonic_mean,execution_time_time_skewness,execution_time_time_kurtosis,execution_time_time_coefficient_variation,execution_time_time_entropy,execution_time_time_skewness_hist,execution_time_time_kurtosis_hist,remaining_time_time_min,remaining_time_time_max,remaining_time_time_mean,remaining_time_time_median,remaining_time_time_mode,remaining_time_time_std,remaining_time_time_variance,remaining_time_time_q1,remaining_time_time_q3,remaining_time_time_iqr,remaining_time_time_geometric_mean,remaining_time_time_geometric_std,remaining_time_time_harmonic_mean,remaining_time_time_skewness,remaining_time_time_kurtosis,remaining_time_time_coefficient_variation,remaining_time_time_entropy,remaining_time_time_skewness_hist,remaining_time_time_kurtosis_hist,within_day_time_min,within_day_time_max,within_day_time_mean,within_day_time_median,within_day_time_mode,within_day_time_std,within_day_time_variance,within_day_time_q1,within_day_time_q3,within_day_time_iqr,within_day_time_geometric_mean,within_day_time_geometric_std,within_day_time_harmonic_mean,within_day_time_skewness,within_day_time_kurtosis,within_day_time_coefficient_variation,within_day_time_entropy,within_day_time_skewness_hist,within_day_time_kurtosis_hist
2
  BPIC15_2,832,828,0.9951923076923076,1,132,53.31009615384615,54.0,61,19.89497651105348,395.8100903753698,44.0,62.0,18.0,48.15011097917017,1.6953108255055442,37.583741492631816,0.0541383907866727,0.8049916722455452,0.3731934088739797,6.6467154289258925,0.0038534938344098,0.0048627422196124,0.0046792425132119,0.0239467116852613,0.0237632119788608,0.0082574867880211,0.0047709923664122,0.0013762477980035,0.0006422489724016,0.0001834997064004,0.0541383907866727,0.8049916722455452,0.0024038461538461,0.0144230769230769,0.0540865384615384,0.1033653846153846,0.203125,0.5024038461538461,0.7512019230769231,1.0048309178743962,0.0693367154319194,14.283026792978164,202.00485436893203,410,1,830,108.18048780487806,12.0,187.5881623228515,35189.31864366448,3.0,125.5,122.5,2.1294119001489484,3.808278466770415,14,1,731,59.42857142857143,1.0,186.71740078284623,34863.387755102034,1.0,8.25,7.25,3.300411469802443,8.960767075527839,82,1,216,10.146341463414634,1.0,35.31879964786925,1247.4176085663291,1.0,3.0,2.0,5.098791193232185,25.861991394282988,9.691,14.524,19.448,3.859,7.105,7.105,7.105,7.105,7.105,7.105,5.545,5.039,4.721,Real,240512.2242485009,0.6279728735030676,285876.9226982823,0.6023712370019746,150546.57168151825,0.3172166670686898,185312.93742252485,0.3904728730604407,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3
  BPI_Challenge_2018,43809,28457,0.6495697231162546,24,2973,57.39154055102833,49.0,49,34.87213051501663,1216.065486656354,44.0,59.0,15.0,53.775007740790905,1.3673968195217023,51.6515023255421,26.12645867504185,1720.3996647748236,0.6076179551934296,10.59875768208314,0.0033846328873849,5.263453617722996e-06,9.28844756068764e-07,0.0,0.0,0.0,0.0,0.0,7.740372967239698e-08,7.740372967239698e-08,26.12645867504185,1720.3996647748236,0.0269807573786208,0.2903741240384396,0.3730055468054509,0.4153712707434545,0.4803350909630441,0.6752037252619325,0.837590449451026,1.53948061988263,12.487438103768865,64.62568045475237,5083.4558063165005,41,17,466141,61323.56097560976,7530.0,120522.24741658216,14525612122.343842,902.0,45907.0,45005.0,2.444006846537922,4.7732537682944125,4,2,38623,10952.25,2592.0,16111.407548302535,259577453.1875,36.5,13507.75,13471.25,1.098736017040351,-0.714799753613248,21,1,34830,2086.1428571428573,13.0,7431.744980540056,55230833.45578231,2.0,193.0,191.0,4.062386890920656,14.95282428002514,13.191,16.272,20.972,1.023,-0.01,1.855,0.511,1.403,3.572,2.001,7.849,7.371,7.067,Real,11563842.153239768,0.7120788464629594,21146257.119093828,0.5706879719331716,14140225.903138256,0.3816115919659581,15576076.832943872,0.4203618469408319,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4
  Receipt_WABO_CoSeLoG,1434,116,0.0808926080892608,1,25,5.981171548117155,6.0,6,2.166128830112964,4.692114108646557,6.0,6.0,0.0,5.414708441482159,1.7049649652198722,4.356444755372117,1.276525010246869,12.296005610487518,0.3621579506100023,7.197192878385,0.0360297536029753,0.008135750813575,0.341120409112041,0.0235355648535564,0.0037773128777312,0.0017433751743375,0.0002905625290562,0.0014528126452812,0.0,0.0005811250581125,1.276525010246869,12.296005610487518,0.4972105997210599,0.4972105997210599,0.796373779637378,0.8870292887029289,0.9302649930264992,0.9595536959553695,0.9797768479776848,12.362068965517242,68.36027740401485,9.380686726353323,92.2819193173858,27,1,1434,317.6666666666667,27.0,553.3898230870318,306240.2962962963,8.0,50.0,42.0,1.342950616318748,-0.1780942423969453,1,1434,1434,1434.0,1434.0,0.0,0.0,1434.0,1434.0,0.0,,,14,1,828,102.42857142857144,6.0,225.87155461384123,51017.95918367348,1.25,33.25,32.0,2.471765166310402,4.8465409223704325,3.209,4.746,7.019,0.385,2.672,2.966,0.804,1.484,2.966,2.966,3.26,2.845,2.584,Real,2382.325855313024,0.6893625408247437,18296.27229411094,0.235532333261429,7814.867608807029,0.1006026786464005,10728.696951225804,0.1381131076951861,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
 
1
+ log,n_traces,n_unique_traces,ratio_variants_per_number_of_traces,trace_len_min,trace_len_max,trace_len_mean,trace_len_median,trace_len_mode,trace_len_std,trace_len_variance,trace_len_q1,trace_len_q3,trace_len_iqr,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_skewness,trace_len_kurtosis,trace_len_coefficient_variation,trace_len_entropy,trace_len_hist1,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_hist10,trace_len_skewness_hist,trace_len_kurtosis_hist,ratio_most_common_variant,ratio_top_1_variants,ratio_top_5_variants,ratio_top_10_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_75_variants,mean_variant_occurrence,std_variant_occurrence,skewness_variant_occurrence,kurtosis_variant_occurrence,n_unique_activities,activities_min,activities_max,activities_mean,activities_median,activities_std,activities_variance,activities_q1,activities_q3,activities_iqr,activities_skewness,activities_kurtosis,n_unique_start_activities,start_activities_min,start_activities_max,start_activities_mean,start_activities_median,start_activities_std,start_activities_variance,start_activities_q1,start_activities_q3,start_activities_iqr,start_activities_skewness,start_activities_kurtosis,n_unique_end_activities,end_activities_min,end_activities_max,end_activities_mean,end_activities_median,end_activities_std,end_activities_variance,end_activities_q1,end_activities_q3,end_activities_iqr,end_activities_skewness,end_activities_kurtosis,entropy_trace,entropy_prefix,entropy_global_block,entropy_lempel_ziv,entropy_k_block_diff_1,entropy_k_block_diff_3,entropy_k_block_diff_5,entropy_k_block_ratio_1,entropy_k_block_ratio_3,entropy_k_block_ratio_5,entropy_knn_3,entropy_knn_5,entropy_knn_7,Log Nature,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,accumulated_time_time_min,accumulated_time_time_max,accumulated_time_time_mean,accumulated_time_time_median,accumulated_time_time_mode,accumulated_time_time_std,accumulated_time_time_variance,accumulated_time_time_q1,accumulated_time_time_q3,accumulated_time_time_iqr,accumulated_time_time_geometric_mean,accumulated_time_time_geometric_std,accumulated_time_time_harmonic_mean,accumulated_time_time_skewness,accumulated_time_time_kurtosis,accumulated_time_time_coefficient_variation,accumulated_time_time_entropy,accumulated_time_time_skewness_hist,accumulated_time_time_kurtosis_hist,execution_time_time_min,execution_time_time_max,execution_time_time_mean,execution_time_time_median,execution_time_time_mode,execution_time_time_std,execution_time_time_variance,execution_time_time_q1,execution_time_time_q3,execution_time_time_iqr,execution_time_time_geometric_mean,execution_time_time_geometric_std,execution_time_time_harmonic_mean,execution_time_time_skewness,execution_time_time_kurtosis,execution_time_time_coefficient_variation,execution_time_time_entropy,execution_time_time_skewness_hist,execution_time_time_kurtosis_hist,remaining_time_time_min,remaining_time_time_max,remaining_time_time_mean,remaining_time_time_median,remaining_time_time_mode,remaining_time_time_std,remaining_time_time_variance,remaining_time_time_q1,remaining_time_time_q3,remaining_time_time_iqr,remaining_time_time_geometric_mean,remaining_time_time_geometric_std,remaining_time_time_harmonic_mean,remaining_time_time_skewness,remaining_time_time_kurtosis,remaining_time_time_coefficient_variation,remaining_time_time_entropy,remaining_time_time_skewness_hist,remaining_time_time_kurtosis_hist,within_day_time_min,within_day_time_max,within_day_time_mean,within_day_time_median,within_day_time_mode,within_day_time_std,within_day_time_variance,within_day_time_q1,within_day_time_q3,within_day_time_iqr,within_day_time_geometric_mean,within_day_time_geometric_std,within_day_time_harmonic_mean,within_day_time_skewness,within_day_time_kurtosis,within_day_time_coefficient_variation,within_day_time_entropy,within_day_time_skewness_hist,within_day_time_kurtosis_hist
2
  BPIC15_2,832,828,0.9951923076923076,1,132,53.31009615384615,54.0,61,19.89497651105348,395.8100903753698,44.0,62.0,18.0,48.15011097917017,1.6953108255055442,37.583741492631816,0.0541383907866727,0.8049916722455452,0.3731934088739797,6.6467154289258925,0.0038534938344098,0.0048627422196124,0.0046792425132119,0.0239467116852613,0.0237632119788608,0.0082574867880211,0.0047709923664122,0.0013762477980035,0.0006422489724016,0.0001834997064004,0.0541383907866727,0.8049916722455452,0.0024038461538461,0.0144230769230769,0.0540865384615384,0.1033653846153846,0.203125,0.5024038461538461,0.7512019230769231,1.0048309178743962,0.0693367154319194,14.283026792978164,202.00485436893203,410,1,830,108.18048780487806,12.0,187.5881623228515,35189.31864366448,3.0,125.5,122.5,2.1294119001489484,3.808278466770415,14,1,731,59.42857142857143,1.0,186.71740078284623,34863.387755102034,1.0,8.25,7.25,3.300411469802443,8.960767075527839,82,1,216,10.146341463414634,1.0,35.31879964786925,1247.4176085663291,1.0,3.0,2.0,5.098791193232185,25.861991394282988,9.691,14.524,19.448,3.859,7.105,7.105,7.105,7.105,7.105,7.105,5.545,5.039,4.721,Real,240512.2242485009,0.6279728735030676,285876.9226982823,0.6023712370019746,150546.57168151825,0.3172166670686898,185312.93742252485,0.3904728730604407,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3
  BPI_Challenge_2018,43809,28457,0.6495697231162546,24,2973,57.39154055102833,49.0,49,34.87213051501663,1216.065486656354,44.0,59.0,15.0,53.775007740790905,1.3673968195217023,51.6515023255421,26.12645867504185,1720.3996647748236,0.6076179551934296,10.59875768208314,0.0033846328873849,5.263453617722996e-06,9.28844756068764e-07,0.0,0.0,0.0,0.0,0.0,7.740372967239698e-08,7.740372967239698e-08,26.12645867504185,1720.3996647748236,0.0269807573786208,0.2903741240384396,0.3730055468054509,0.4153712707434545,0.4803350909630441,0.6752037252619325,0.837590449451026,1.53948061988263,12.487438103768865,64.62568045475237,5083.4558063165005,41,17,466141,61323.56097560976,7530.0,120522.24741658216,14525612122.343842,902.0,45907.0,45005.0,2.444006846537922,4.7732537682944125,4,2,38623,10952.25,2592.0,16111.407548302535,259577453.1875,36.5,13507.75,13471.25,1.098736017040351,-0.714799753613248,21,1,34830,2086.1428571428573,13.0,7431.744980540056,55230833.45578231,2.0,193.0,191.0,4.062386890920656,14.95282428002514,13.191,16.272,20.972,1.023,-0.01,1.855,0.511,1.403,3.572,2.001,7.849,7.371,7.067,Real,11563842.153239768,0.7120788464629594,21146257.119093828,0.5706879719331716,14140225.903138256,0.3816115919659581,15576076.832943872,0.4203618469408319,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4
  Receipt_WABO_CoSeLoG,1434,116,0.0808926080892608,1,25,5.981171548117155,6.0,6,2.166128830112964,4.692114108646557,6.0,6.0,0.0,5.414708441482159,1.7049649652198722,4.356444755372117,1.276525010246869,12.296005610487518,0.3621579506100023,7.197192878385,0.0360297536029753,0.008135750813575,0.341120409112041,0.0235355648535564,0.0037773128777312,0.0017433751743375,0.0002905625290562,0.0014528126452812,0.0,0.0005811250581125,1.276525010246869,12.296005610487518,0.4972105997210599,0.4972105997210599,0.796373779637378,0.8870292887029289,0.9302649930264992,0.9595536959553695,0.9797768479776848,12.362068965517242,68.36027740401485,9.380686726353323,92.2819193173858,27,1,1434,317.6666666666667,27.0,553.3898230870318,306240.2962962963,8.0,50.0,42.0,1.342950616318748,-0.1780942423969453,1,1434,1434,1434.0,1434.0,0.0,0.0,1434.0,1434.0,0.0,,,14,1,828,102.42857142857144,6.0,225.87155461384123,51017.95918367348,1.25,33.25,32.0,2.471765166310402,4.8465409223704325,3.209,4.746,7.019,0.385,2.672,2.966,0.804,1.484,2.966,2.966,3.26,2.845,2.584,Real,2382.325855313024,0.6893625408247437,18296.27229411094,0.235532333261429,7814.867608807029,0.1006026786464005,10728.696951225804,0.1381131076951861,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
data/grid_1obj/{grid_1objectives_rutpt.csv β†’ grid_1objectives_rvpnot.csv} RENAMED
@@ -1,4 +1,4 @@
1
- task,ratio_unique_traces_per_trace
2
  task_1,0.0
3
  task_2,0.1
4
  task_3,0.2
 
1
+ task,ratio_variants_per_number_of_traces
2
  task_1,0.0
3
  task_2,0.1
4
  task_3,0.2
data/grid_2obj/{grid_2objectives_enve_rutpt.csv β†’ grid_2objectives_ense_rvpnot.csv} RENAMED
@@ -1,4 +1,4 @@
1
- task,epa_normalized_variant_entropy,ratio_unique_traces_per_trace
2
  task_1,0.0,0.0
3
  task_2,0.0,0.1
4
  task_3,0.0,0.2
 
1
+ task,epa_normalized_sequence_entropy,ratio_variants_per_number_of_traces
2
  task_1,0.0,0.0
3
  task_2,0.0,0.1
4
  task_3,0.0,0.2
data/grid_2obj/{grid_2objectives_enseef_rutpt.csv β†’ grid_2objectives_enseef_rvpnot.csv} RENAMED
@@ -1,4 +1,4 @@
1
- task,epa_normalized_sequence_entropy_exponential_forgetting,ratio_unique_traces_per_trace
2
  task_1,0.0,0.0
3
  task_2,0.0,0.1
4
  task_3,0.0,0.2
 
1
+ task,epa_normalized_sequence_entropy_exponential_forgetting,ratio_variants_per_number_of_traces
2
  task_1,0.0,0.0
3
  task_2,0.0,0.1
4
  task_3,0.0,0.2
data/grid_2obj/{grid_2objectives_enself_rutpt.csv β†’ grid_2objectives_enself_rvpnot.csv} RENAMED
@@ -1,4 +1,4 @@
1
- task,epa_normalized_sequence_entropy_linear_forgetting,ratio_unique_traces_per_trace
2
  task_1,0.0,0.0
3
  task_2,0.0,0.1
4
  task_3,0.0,0.2
 
1
+ task,epa_normalized_sequence_entropy_linear_forgetting,ratio_variants_per_number_of_traces
2
  task_1,0.0,0.0
3
  task_2,0.0,0.1
4
  task_3,0.0,0.2
data/grid_2obj/{grid_2objectives_rt10v_rutpt.csv β†’ grid_2objectives_enve_rvpnot.csv} RENAMED
@@ -1,4 +1,4 @@
1
- task,ratio_top_10_variants,ratio_unique_traces_per_trace
2
  task_1,0.0,0.0
3
  task_2,0.0,0.1
4
  task_3,0.0,0.2
 
1
+ task,epa_normalized_variant_entropy,ratio_variants_per_number_of_traces
2
  task_1,0.0,0.0
3
  task_2,0.0,0.1
4
  task_3,0.0,0.2
data/grid_2obj/{grid_2objectives_ense_rutpt.csv β†’ grid_2objectives_rmcv_rvpnot.csv} RENAMED
@@ -1,4 +1,4 @@
1
- task,epa_normalized_sequence_entropy,ratio_unique_traces_per_trace
2
  task_1,0.0,0.0
3
  task_2,0.0,0.1
4
  task_3,0.0,0.2
 
1
+ task,ratio_most_common_variant,ratio_variants_per_number_of_traces
2
  task_1,0.0,0.0
3
  task_2,0.0,0.1
4
  task_3,0.0,0.2
data/grid_2obj/{grid_2objectives_rmcv_rutpt.csv β†’ grid_2objectives_rt10v_rvpnot.csv} RENAMED
@@ -1,4 +1,4 @@
1
- task,ratio_most_common_variant,ratio_unique_traces_per_trace
2
  task_1,0.0,0.0
3
  task_2,0.0,0.1
4
  task_3,0.0,0.2
 
1
+ task,ratio_top_10_variants,ratio_variants_per_number_of_traces
2
  task_1,0.0,0.0
3
  task_2,0.0,0.1
4
  task_3,0.0,0.2
data/validation/2_ense_enseef_feat.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_exponential_forgetting,log
2
+ 0.617035580430171,0.25759383686118104,CoSeLoG_WABO_1
3
+ 0.547597168193871,0.22387845232743803,Sepsis_Cases_Event_Log
data/validation/genELexperiment1_04_02.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ratio_top_20_variants": 0.20017714791851196, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "experiment1"}
data/validation/genELexperiment2_07_04.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"ratio_top_20_variants": 0.38863337713534823, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "experiment2"}
data/validation/test_benchmark.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ log,fitness_inductive,precision_inductive,fscore_inductive,size_inductive,pnsize_inductive,cfc_inductive,fitness_heu,precision_heu,fscore_heu,size_heu,pnsize_heu,cfc_heu,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp
2
+ gen_el_169,0.9998052420892378,0.6662312989788649,0.7996241723917423,34,24,22,0.9383563249832565,0.5979149389882715,0.7304143193451293,22,14,13,0.9358843752091403,0.6513022517490741,0.7680805654451066,28,18,16,0.9999637006454563,0.432690150325331,0.6040181215566763,27,7,9
3
+ gen_el_168,0.9997678338833808,0.6033523537803138,0.7525477883058467,61,34,20,0.48155419290534085,0.9449078138718174,0.6379760800037585,60,35,32,0.9479094601490539,0.5169524053224155,0.669037930473001,67,38,24,0.9999513902099882,0.4283471743974073,0.5997714527549697,93,30,28
data/validation/test_feat.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ log,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,ratio_variants_per_number_of_traces
2
+ gen_el_168,0.13580246913580246,0.5709876543209876,0.6920749183939835,0.6241163465815115,0.06011912975523125,0.2577500062839078,0.44135802469135804
3
+ gen_el_169,0.25813692480359146,0.6846240179573513,0.6517697077716751,0.4929433574247866,0.06332152226023505,0.21109493857555106,0.3153759820426487
execute_grid_experiments.py CHANGED
@@ -1,5 +1,6 @@
1
  import multiprocessing
2
  import os
 
3
 
4
  from datetime import datetime as dt
5
  from gedi.utils.io_helpers import sort_files
@@ -13,15 +14,18 @@ def multi_experiment_wrapper(config_file, i=0):
13
  print(f"=========================FINISHED EXPERIMENT #{i+1}=======================")
14
 
15
  if __name__ == '__main__':
16
- EXPERIMENTS_FOLDER = os.path.join('config_files','algorithm','34_bpic_features')
17
- EXPERIMENTS_FOLDER = os.path.join('config_files','algorithm','grid_1obj')
18
- EXPERIMENTS_FOLDER = os.path.join('config_files','algorithm','grid_experiments')
19
- EXPERIMENTS_FOLDER = os.path.join('config_files','algorithm','test')
 
 
 
20
  start = dt.now()
21
 
22
  experiment_list = list(tqdm(sort_files(os.listdir(EXPERIMENTS_FOLDER))))
23
  experiment_list = [os.path.join(EXPERIMENTS_FOLDER, config_file) for config_file in experiment_list]
24
- experiment_list = experiment_list[:10]
25
 
26
  print(f"========================STARTING MULTIPLE EXPERIMENTS=========================")
27
  print(f"INFO: {EXPERIMENTS_FOLDER} contains config files for {len(experiment_list)}.")
 
1
  import multiprocessing
2
  import os
3
+ import sys
4
 
5
  from datetime import datetime as dt
6
  from gedi.utils.io_helpers import sort_files
 
14
  print(f"=========================FINISHED EXPERIMENT #{i+1}=======================")
15
 
16
  if __name__ == '__main__':
17
+ EXPERIMENTS_FOLDER = sys.argv[1]
18
+ """
19
+ Following args run the following experiments:
20
+ - config_files/algorithm/grid_1obj
21
+ - config_files/algorithm/grid_experiments
22
+ - config_files/algorithm/test
23
+ """
24
  start = dt.now()
25
 
26
  experiment_list = list(tqdm(sort_files(os.listdir(EXPERIMENTS_FOLDER))))
27
  experiment_list = [os.path.join(EXPERIMENTS_FOLDER, config_file) for config_file in experiment_list]
28
+ #experiment_list = experiment_list[:10]
29
 
30
  print(f"========================STARTING MULTIPLE EXPERIMENTS=========================")
31
  print(f"INFO: {EXPERIMENTS_FOLDER} contains config files for {len(experiment_list)}.")
gedi/benchmark.py CHANGED
@@ -34,7 +34,7 @@ class BenchmarkTest:
34
  event_logs = [""]
35
  else:
36
  try:
37
- event_logs =[filename for filename in os.listdir(log_path) if filename.endswith(".xes")]
38
  except FileNotFoundError:
39
  print(f" FAILED: Cannot find {params[INPUT_PATH]}" )
40
  return
@@ -94,7 +94,7 @@ class BenchmarkTest:
94
  else:
95
  log_name = "gen_el_"+str(log_counter)
96
  results = {"log": event_log}
97
-
98
  for miner in miners:
99
  miner_cols = [f"fitness_{miner}", f"precision_{miner}", f"fscore_{miner}", f"size_{miner}", f"cfc_{miner}", f"pnsize_{miner}"]# f"generalization_{miner}",f"simplicity_{miner}"]
100
  start_miner = dt.now()
@@ -186,6 +186,8 @@ class BenchmarkTest:
186
  if miner == 'imf':
187
  miner = 'inductive'
188
  miner_params = f', noise_threshold={NOISE_THRESHOLD}'
 
 
189
  net, im, fm = eval(f"discover_petri_net_{miner}(log {miner_params})")
190
  bpmn_graph = convert_to_bpmn(net, im, fm)
191
  fitness = fitness_alignments(log, net, im, fm)['log_fitness']
 
34
  event_logs = [""]
35
  else:
36
  try:
37
+ event_logs =sorted([filename for filename in os.listdir(log_path) if filename.endswith(".xes")])
38
  except FileNotFoundError:
39
  print(f" FAILED: Cannot find {params[INPUT_PATH]}" )
40
  return
 
94
  else:
95
  log_name = "gen_el_"+str(log_counter)
96
  results = {"log": event_log}
97
+
98
  for miner in miners:
99
  miner_cols = [f"fitness_{miner}", f"precision_{miner}", f"fscore_{miner}", f"size_{miner}", f"cfc_{miner}", f"pnsize_{miner}"]# f"generalization_{miner}",f"simplicity_{miner}"]
100
  start_miner = dt.now()
 
186
  if miner == 'imf':
187
  miner = 'inductive'
188
  miner_params = f', noise_threshold={NOISE_THRESHOLD}'
189
+ elif miner == 'heu':
190
+ miner = 'heuristics'
191
  net, im, fm = eval(f"discover_petri_net_{miner}(log {miner_params})")
192
  bpmn_graph = convert_to_bpmn(net, im, fm)
193
  fitness = fitness_alignments(log, net, im, fm)['log_fitness']