Andrea Maldonado commited on
Commit
5734fc9
·
2 Parent(s): ae4446b 5191b40

Merge branch 'main' into name-debug

Browse files

* main:
Updates order on validation data
Tests FE also for n_variants
Erases old feeed installation
Updates feeed version requirement
Updates feeed version in setup
Adds generation test for feeed update
Replaces n_unique_traces --> n_variants
Gets rid of Hotfixes
Renames ratio_unique_traces_per_trace --> ratio_variants_per_number_of_traces
Test main

.conda.yml CHANGED
@@ -41,7 +41,7 @@ dependencies:
41
  - deprecation==2.1.0
42
  - distributed==2024.2.1
43
  - emcee==3.1.4
44
- - feeed == 1.2.0
45
  - fsspec==2024.2.0
46
  - imbalanced-learn==0.12.0
47
  - imblearn==0.0
 
41
  - deprecation==2.1.0
42
  - distributed==2024.2.1
43
  - emcee==3.1.4
44
+ - feeed >= 1.3.2
45
  - fsspec==2024.2.0
46
  - imbalanced-learn==0.12.0
47
  - imblearn==0.0
.github/workflows/test_gedi.yml CHANGED
@@ -24,7 +24,7 @@ jobs:
24
  with:
25
  python-version: 3.9
26
 
27
- - name: Install feeed
28
  run: |
29
  python -m pip install --upgrade pip
30
  pip install .
@@ -53,7 +53,7 @@ jobs:
53
  run: |
54
  sudo apt-get install build-essential python3 python3-dev
55
 
56
- - name: Install feeed
57
  run: |
58
  python -m pip install --upgrade pip
59
  pip install .
@@ -91,7 +91,7 @@ jobs:
91
  with:
92
  python-version: 3.9
93
 
94
- - name: Install feeed
95
  run: |
96
  python -m pip install --upgrade pip
97
  pip install .
@@ -121,7 +121,7 @@ jobs:
121
  with:
122
  python-version: 3.9
123
 
124
- - name: Install feeed
125
  run: |
126
  python -m pip install --upgrade pip
127
  pip install .
@@ -147,7 +147,7 @@ jobs:
147
  run: |
148
  sudo apt-get install build-essential python3 python3-dev
149
 
150
- - name: Install feeed
151
  run: |
152
  python -m pip install --upgrade pip
153
  pip install .
@@ -173,7 +173,7 @@ jobs:
173
  run: |
174
  sudo apt-get install build-essential python3 python3-dev
175
 
176
- - name: Install feeed
177
  run: |
178
  python -m pip install --upgrade pip
179
  pip install .
@@ -199,7 +199,7 @@ jobs:
199
  run: |
200
  sudo apt-get install build-essential python3 python3-dev
201
 
202
- - name: Install feeed
203
  run: |
204
  python -m pip install --upgrade pip
205
  pip install .
@@ -231,7 +231,7 @@ jobs:
231
  run: |
232
  sudo apt-get install build-essential python3 python3-dev
233
 
234
- - name: Install feeed
235
  run: |
236
  python -m pip install --upgrade pip
237
  pip install .
@@ -244,6 +244,30 @@ jobs:
244
  run:
245
  diff data/validation/2_ense_rmcv_feat.csv output/test/igedi_table_1/2_ense_rmcv_feat.csv
246
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  test_pypible_generate_event_logs:
248
  runs-on: ubuntu-latest
249
  steps:
@@ -259,11 +283,11 @@ jobs:
259
  run: |
260
  sudo apt-get install build-essential python3 python3-dev
261
 
262
- - name: Install feeed
263
  run: |
264
  python -m pip install --upgrade pip
265
  pip install .
266
 
267
  - name: Run test
268
  run: |
269
- python -c "from gedi import GenerateEventLogs;default_params = {'generator_params': {'experiment': {'ratio_top_20_variants': 0.2, 'epa_normalized_sequence_entropy_linear_forgetting': 0.4}, 'config_space': {'mode': [5, 20], 'sequence': [0.01, 1], 'choice': [0.01, 1], 'parallel': [0.01, 1], 'loop': [0.01, 1], 'silent': [0.01, 1], 'lt_dependency': [0.01, 1], 'num_traces': [10, 101], 'duplicate': [0], 'or': [0]}, 'n_trials': 50}}; GenerateEventLogs(default_params)"
 
24
  with:
25
  python-version: 3.9
26
 
27
+ - name: Install locally
28
  run: |
29
  python -m pip install --upgrade pip
30
  pip install .
 
53
  run: |
54
  sudo apt-get install build-essential python3 python3-dev
55
 
56
+ - name: Install locally
57
  run: |
58
  python -m pip install --upgrade pip
59
  pip install .
 
91
  with:
92
  python-version: 3.9
93
 
94
+ - name: Install locally
95
  run: |
96
  python -m pip install --upgrade pip
97
  pip install .
 
121
  with:
122
  python-version: 3.9
123
 
124
+ - name: Install locally
125
  run: |
126
  python -m pip install --upgrade pip
127
  pip install .
 
147
  run: |
148
  sudo apt-get install build-essential python3 python3-dev
149
 
150
+ - name: Install locally
151
  run: |
152
  python -m pip install --upgrade pip
153
  pip install .
 
173
  run: |
174
  sudo apt-get install build-essential python3 python3-dev
175
 
176
+ - name: Install locally
177
  run: |
178
  python -m pip install --upgrade pip
179
  pip install .
 
199
  run: |
200
  sudo apt-get install build-essential python3 python3-dev
201
 
202
+ - name: Install locally
203
  run: |
204
  python -m pip install --upgrade pip
205
  pip install .
 
231
  run: |
232
  sudo apt-get install build-essential python3 python3-dev
233
 
234
+ - name: Install locally
235
  run: |
236
  python -m pip install --upgrade pip
237
  pip install .
 
244
  run:
245
  diff data/validation/2_ense_rmcv_feat.csv output/test/igedi_table_1/2_ense_rmcv_feat.csv
246
 
247
+ test_pypible_generate_event_logs_feeed_update:
248
+ runs-on: ubuntu-latest
249
+ steps:
250
+ - name: Checkout code
251
+ uses: actions/checkout@v4
252
+
253
+ - name: Set up Python
254
+ uses: actions/setup-python@v5
255
+ with:
256
+ python-version: 3.9
257
+
258
+ - name: Install dependencies
259
+ run: |
260
+ sudo apt-get install build-essential python3 python3-dev
261
+
262
+ - name: Install locally
263
+ run: |
264
+ python -m pip install --upgrade pip
265
+ pip install .
266
+
267
+ - name: Run test
268
+ run: |
269
+ python -c "from gedi import GenerateEventLogs;default_params = {'generator_params': {'experiment': {'ratio_variants_per_number_of_traces': 0.2, 'n_variants': 4}, 'config_space': {'mode': [5, 20], 'sequence': [0.01, 1], 'choice': [0.01, 1], 'parallel': [0.01, 1], 'loop': [0.01, 1], 'silent': [0.01, 1], 'lt_dependency': [0.01, 1], 'num_traces': [10, 101], 'duplicate': [0], 'or': [0]}, 'n_trials': 50}}; GenerateEventLogs(default_params)"
270
+
271
  test_pypible_generate_event_logs:
272
  runs-on: ubuntu-latest
273
  steps:
 
283
  run: |
284
  sudo apt-get install build-essential python3 python3-dev
285
 
286
+ - name: Install locally
287
  run: |
288
  python -m pip install --upgrade pip
289
  pip install .
290
 
291
  - name: Run test
292
  run: |
293
+ python -c "from gedi import GenerateEventLogs;default_params = {'generator_params': {'experiment': {'ratio_most_common_variant': 0.2, 'epa_normalized_sequence_entropy_linear_forgetting': 0.4}, 'config_space': {'mode': [5, 20], 'sequence': [0.01, 1], 'choice': [0.01, 1], 'parallel': [0.01, 1], 'loop': [0.01, 1], 'silent': [0.01, 1], 'lt_dependency': [0.01, 1], 'num_traces': [10, 101], 'duplicate': [0], 'or': [0]}, 'n_trials': 50}}; GenerateEventLogs(default_params)"
README.md CHANGED
@@ -140,7 +140,7 @@ In case of manually defining the targets for the features in config space, the f
140
  <tr style="text-align: right;">
141
  <th></th>
142
  <th>n_traces</th>
143
- <th>n_unique_traces</th>
144
  <th>ratio_variants_per_number_of_traces</th>
145
  <th>trace_len_min</th>
146
  <th>trace_len_max</th>
 
140
  <tr style="text-align: right;">
141
  <th></th>
142
  <th>n_traces</th>
143
+ <th>n_variants</th>
144
  <th>ratio_variants_per_number_of_traces</th>
145
  <th>trace_len_min</th>
146
  <th>trace_len_max</th>
config_files/config_layout.json CHANGED
@@ -14,7 +14,7 @@
14
  "experiment": "data/grid_objectives.csv",
15
  "experiment": {"input_path": "data/2_bpic_features.csv",
16
  "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
17
- "experiment": {"n_traces":832, "n_unique_traces":828, "ratio_variants_per_number_of_traces":0.99, "trace_len_min":1, "trace_len_max":132, "trace_len_mean":53.31, "trace_len_median":54, "trace_len_mode":61, "trace_len_std":19.89, "trace_len_variance":395.81, "trace_len_q1":44, "trace_len_q3":62, "trace_len_iqr":18, "trace_len_geometric_mean":48.15, "trace_len_geometric_std":1.69, "trace_len_harmonic_mean":37.58, "trace_len_skewness":0.0541, "trace_len_kurtosis":0.81, "trace_len_coefficient_variation":0.37, "trace_len_entropy":6.65, "trace_len_hist1":0.004, "trace_len_hist2":0.005, "trace_len_hist3":0.005, "trace_len_hist4":0.024, "trace_len_hist5":0.024, "trace_len_hist6":0.008, "trace_len_hist7":0.005, "trace_len_hist8":0.001, "trace_len_hist9":0.0, "trace_len_hist10":0.00, "trace_len_skewness_hist":0.05, "trace_len_kurtosis_hist":0.8, "ratio_most_common_variant":0.0, "ratio_top_1_variants":0.01, "ratio_top_5_variants":0.05, "ratio_top_10_variants":0.10, "ratio_top_20_variants":0.2, "ratio_top_50_variants":0.5, "ratio_top_75_variants":0.75, "mean_variant_occurrence":1.0, "std_variant_occurrence":0.07, "skewness_variant_occurrence":14.28, "kurtosis_variant_occurrence":202.00, "n_unique_activities":410, "activities_min":1, "activities_max":830, "activities_mean":108.18, "activities_median":12, "activities_std":187.59, "activities_variance":35189, "activities_q1":3, "activities_q3":125, "activities_iqr":122, "activities_skewness":2.13, "activities_kurtosis":3.81, "n_unique_start_activities":14, "start_activities_min":1, "start_activities_max":731, "start_activities_mean":59.43, "start_activities_median":1, "start_activities_std":186.72, "start_activities_variance":34863, "start_activities_q1":1, "start_activities_q3":8, "start_activities_iqr":7, "start_activities_skewness":3, "start_activities_kurtosis":9.0, "n_unique_end_activities":82, "end_activities_min":1, "end_activities_max":216, "end_activities_mean":10, "end_activities_median":1, "end_activities_std":35, "end_activities_variance":1247, "end_activities_q1":1, "end_activities_q3":3, "end_activities_iqr":2, "end_activities_skewness":5, "end_activities_kurtosis":26, "eventropy_trace":10, "eventropy_prefix":15, "eventropy_global_block":19, "eventropy_lempel_ziv":4, "eventropy_k_block_diff_1":7.1, "eventropy_k_block_diff_3":7.1, "eventropy_k_block_diff_5":7.1, "eventropy_k_block_ratio_1":7.1, "eventropy_k_block_ratio_3":7.1, "eventropy_k_block_ratio_5":7.1, "eventropy_knn_3":5.54, "eventropy_knn_5":5.04, "eventropy_knn_7":4.72, "epa_variant_entropy":240512, "epa_normalized_variant_entropy":0.68, "epa_sequence_entropy":285876, "epa_normalized_sequence_entropy":0.60, "epa_sequence_entropy_linear_forgetting":150546, "epa_normalized_sequence_entropy_linear_forgetting":0.32, "epa_sequence_entropy_exponential_forgetting":185312, "epa_normalized_sequence_entropy_exponential_forgetting":0.39},
18
  "config_space": {
19
  "mode": [5, 20],
20
  "sequence": [0.01, 1],
@@ -33,7 +33,7 @@
33
  {
34
  "pipeline_step": "feature_extraction",
35
  "input_path": "data/test",
36
- "feature_params": {"feature_set": ["n_traces", "n_unique_traces", "ratio_unique_traces_per_trace", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "eventropy_trace", "eventropy_prefix", "eventropy_prefix_flattened", "eventropy_global_block", "eventropy_global_block_flattened", "eventropy_lempel_ziv", "eventropy_lempel_ziv_flattened", "eventropy_k_block_diff_1", "eventropy_k_block_diff_3", "eventropy_k_block_diff_5", "eventropy_k_block_ratio_1", "eventropy_k_block_ratio_3", "eventropy_k_block_ratio_5", "eventropy_knn_3", "eventropy_knn_5", "eventropy_knn_7", "epa_variant_entropy", "epa_normalized_variant_entropy", "epa_sequence_entropy", "epa_normalized_sequence_entropy", "epa_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_linear_forgetting", "epa_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]},
37
  "output_path": "output/plots",
38
  "real_eventlog_path": "data/BaselineED_feat.csv",
39
  "plot_type": "boxplot"
 
14
  "experiment": "data/grid_objectives.csv",
15
  "experiment": {"input_path": "data/2_bpic_features.csv",
16
  "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
17
+ "experiment": {"n_traces":832, "n_variants":828, "ratio_variants_per_number_of_traces":0.99, "trace_len_min":1, "trace_len_max":132, "trace_len_mean":53.31, "trace_len_median":54, "trace_len_mode":61, "trace_len_std":19.89, "trace_len_variance":395.81, "trace_len_q1":44, "trace_len_q3":62, "trace_len_iqr":18, "trace_len_geometric_mean":48.15, "trace_len_geometric_std":1.69, "trace_len_harmonic_mean":37.58, "trace_len_skewness":0.0541, "trace_len_kurtosis":0.81, "trace_len_coefficient_variation":0.37, "trace_len_entropy":6.65, "trace_len_hist1":0.004, "trace_len_hist2":0.005, "trace_len_hist3":0.005, "trace_len_hist4":0.024, "trace_len_hist5":0.024, "trace_len_hist6":0.008, "trace_len_hist7":0.005, "trace_len_hist8":0.001, "trace_len_hist9":0.0, "trace_len_hist10":0.00, "trace_len_skewness_hist":0.05, "trace_len_kurtosis_hist":0.8, "ratio_most_common_variant":0.0, "ratio_top_1_variants":0.01, "ratio_top_5_variants":0.05, "ratio_top_10_variants":0.10, "ratio_top_20_variants":0.2, "ratio_top_50_variants":0.5, "ratio_top_75_variants":0.75, "mean_variant_occurrence":1.0, "std_variant_occurrence":0.07, "skewness_variant_occurrence":14.28, "kurtosis_variant_occurrence":202.00, "n_unique_activities":410, "activities_min":1, "activities_max":830, "activities_mean":108.18, "activities_median":12, "activities_std":187.59, "activities_variance":35189, "activities_q1":3, "activities_q3":125, "activities_iqr":122, "activities_skewness":2.13, "activities_kurtosis":3.81, "n_unique_start_activities":14, "start_activities_min":1, "start_activities_max":731, "start_activities_mean":59.43, "start_activities_median":1, "start_activities_std":186.72, "start_activities_variance":34863, "start_activities_q1":1, "start_activities_q3":8, "start_activities_iqr":7, "start_activities_skewness":3, "start_activities_kurtosis":9.0, "n_unique_end_activities":82, "end_activities_min":1, "end_activities_max":216, "end_activities_mean":10, "end_activities_median":1, "end_activities_std":35, "end_activities_variance":1247, "end_activities_q1":1, "end_activities_q3":3, "end_activities_iqr":2, "end_activities_skewness":5, "end_activities_kurtosis":26, "eventropy_trace":10, "eventropy_prefix":15, "eventropy_global_block":19, "eventropy_lempel_ziv":4, "eventropy_k_block_diff_1":7.1, "eventropy_k_block_diff_3":7.1, "eventropy_k_block_diff_5":7.1, "eventropy_k_block_ratio_1":7.1, "eventropy_k_block_ratio_3":7.1, "eventropy_k_block_ratio_5":7.1, "eventropy_knn_3":5.54, "eventropy_knn_5":5.04, "eventropy_knn_7":4.72, "epa_variant_entropy":240512, "epa_normalized_variant_entropy":0.68, "epa_sequence_entropy":285876, "epa_normalized_sequence_entropy":0.60, "epa_sequence_entropy_linear_forgetting":150546, "epa_normalized_sequence_entropy_linear_forgetting":0.32, "epa_sequence_entropy_exponential_forgetting":185312, "epa_normalized_sequence_entropy_exponential_forgetting":0.39},
18
  "config_space": {
19
  "mode": [5, 20],
20
  "sequence": [0.01, 1],
 
33
  {
34
  "pipeline_step": "feature_extraction",
35
  "input_path": "data/test",
36
+ "feature_params": {"feature_set": ["n_traces", "n_variants", "ratio_variants_per_number_of_traces", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "eventropy_trace", "eventropy_prefix", "eventropy_prefix_flattened", "eventropy_global_block", "eventropy_global_block_flattened", "eventropy_lempel_ziv", "eventropy_lempel_ziv_flattened", "eventropy_k_block_diff_1", "eventropy_k_block_diff_3", "eventropy_k_block_diff_5", "eventropy_k_block_ratio_1", "eventropy_k_block_ratio_3", "eventropy_k_block_ratio_5", "eventropy_knn_3", "eventropy_knn_5", "eventropy_knn_7", "epa_variant_entropy", "epa_normalized_variant_entropy", "epa_sequence_entropy", "epa_normalized_sequence_entropy", "epa_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_linear_forgetting", "epa_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]},
37
  "output_path": "output/plots",
38
  "real_eventlog_path": "data/BaselineED_feat.csv",
39
  "plot_type": "boxplot"
config_files/experiment_real_targets.json CHANGED
@@ -24,8 +24,8 @@
24
  },
25
  {
26
  "pipeline_step": "feature_extraction",
27
- "input_path": "output/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rutpt/",
28
- "input_path": "output/features/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rutpt/",
29
  "feature_params": {"feature_set":["ratio_variants_per_number_of_traces","ratio_most_common_variant","ratio_top_10_variants","epa_normalized_variant_entropy","epa_normalized_sequence_entropy","epa_normalized_sequence_entropy_linear_forgetting","epa_normalized_sequence_entropy_exponential_forgetting"]},
30
  "output_path": "output/plots",
31
  "real_eventlog_path": "data/BaselineED_feat.csv",
@@ -34,7 +34,7 @@
34
  {
35
  "pipeline_step": "benchmark_test",
36
  "benchmark_test": "discovery",
37
- "input_path": "output/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rutpt/",
38
  "output_path":"output",
39
  "miners" : ["heu", "imf", "ilp"]
40
  }
 
24
  },
25
  {
26
  "pipeline_step": "feature_extraction",
27
+ "input_path": "output/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rvpnot/",
28
+ "input_path": "output/features/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rvpnot/",
29
  "feature_params": {"feature_set":["ratio_variants_per_number_of_traces","ratio_most_common_variant","ratio_top_10_variants","epa_normalized_variant_entropy","epa_normalized_sequence_entropy","epa_normalized_sequence_entropy_linear_forgetting","epa_normalized_sequence_entropy_exponential_forgetting"]},
30
  "output_path": "output/plots",
31
  "real_eventlog_path": "data/BaselineED_feat.csv",
 
34
  {
35
  "pipeline_step": "benchmark_test",
36
  "benchmark_test": "discovery",
37
+ "input_path": "output/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rvpnot/",
38
  "output_path":"output",
39
  "miners" : ["heu", "imf", "ilp"]
40
  }
config_files/pipeline_steps/augmentation.json CHANGED
@@ -4,7 +4,7 @@
4
  "augmentation_params":
5
  {
6
  "method":"SMOTE", "no_samples":20,
7
- "feature_selection": ["n_traces", "n_unique_traces", "ratio_variants_per_number_of_traces", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "entropy_trace", "entropy_prefix", "entropy_global_block", "entropy_lempel_ziv", "entropy_k_block_diff_1", "entropy_k_block_diff_3", "entropy_k_block_diff_5", "entropy_k_block_ratio_1", "entropy_k_block_ratio_3", "entropy_k_block_ratio_5", "entropy_knn_3", "entropy_knn_5", "entropy_knn_7", "epa_variant_entropy", "epa_normalized_variant_entropy", "epa_sequence_entropy", "epa_normalized_sequence_entropy", "epa_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_linear_forgetting", "epa_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]
8
  },
9
  "input_path": "data/test/bpic_features.csv",
10
  "output_path": "output"
 
4
  "augmentation_params":
5
  {
6
  "method":"SMOTE", "no_samples":20,
7
+ "feature_selection": ["n_traces", "n_variants", "ratio_variants_per_number_of_traces", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "entropy_trace", "entropy_prefix", "entropy_global_block", "entropy_lempel_ziv", "entropy_k_block_diff_1", "entropy_k_block_diff_3", "entropy_k_block_diff_5", "entropy_k_block_ratio_1", "entropy_k_block_ratio_3", "entropy_k_block_ratio_5", "entropy_knn_3", "entropy_knn_5", "entropy_knn_7", "epa_variant_entropy", "epa_normalized_variant_entropy", "epa_sequence_entropy", "epa_normalized_sequence_entropy", "epa_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_linear_forgetting", "epa_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]
8
  },
9
  "input_path": "data/test/bpic_features.csv",
10
  "output_path": "output"
config_files/pipeline_steps/feature_extraction.json CHANGED
@@ -2,7 +2,7 @@
2
  {
3
  "pipeline_step": "feature_extraction",
4
  "input_path": "data/test",
5
- "feature_params": {"feature_set":["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]},
6
  "output_path": "output/plots",
7
  "real_eventlog_path": "data/BaselineED_feat.csv",
8
  "plot_type": "boxplot",
 
2
  {
3
  "pipeline_step": "feature_extraction",
4
  "input_path": "data/test",
5
+ "feature_params": {"feature_set":["n_variants", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]},
6
  "output_path": "output/plots",
7
  "real_eventlog_path": "data/BaselineED_feat.csv",
8
  "plot_type": "boxplot",
data/test/2_bpic_features.csv CHANGED
@@ -1,3 +1,3 @@
1
- log,n_traces,n_unique_traces,ratio_unique_traces_per_trace,trace_len_min,trace_len_max,trace_len_mean,trace_len_median,trace_len_mode,trace_len_std,trace_len_variance,trace_len_q1,trace_len_q3,trace_len_iqr,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_skewness,trace_len_kurtosis,trace_len_coefficient_variation,trace_len_entropy,trace_len_hist1,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_hist10,trace_len_skewness_hist,trace_len_kurtosis_hist,ratio_most_common_variant,ratio_top_1_variants,ratio_top_5_variants,ratio_top_10_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_75_variants,mean_variant_occurrence,std_variant_occurrence,skewness_variant_occurrence,kurtosis_variant_occurrence,n_unique_activities,activities_min,activities_max,activities_mean,activities_median,activities_std,activities_variance,activities_q1,activities_q3,activities_iqr,activities_skewness,activities_kurtosis,n_unique_start_activities,start_activities_min,start_activities_max,start_activities_mean,start_activities_median,start_activities_std,start_activities_variance,start_activities_q1,start_activities_q3,start_activities_iqr,start_activities_skewness,start_activities_kurtosis,n_unique_end_activities,end_activities_min,end_activities_max,end_activities_mean,end_activities_median,end_activities_std,end_activities_variance,end_activities_q1,end_activities_q3,end_activities_iqr,end_activities_skewness,end_activities_kurtosis,eventropy_trace,eventropy_prefix,eventropy_global_block,eventropy_lempel_ziv,eventropy_k_block_diff_1,eventropy_k_block_diff_3,eventropy_k_block_diff_5,eventropy_k_block_ratio_1,eventropy_k_block_ratio_3,eventropy_k_block_ratio_5,eventropy_knn_3,eventropy_knn_5,eventropy_knn_7,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,eventropy_global_block_flattened,eventropy_lempel_ziv_flattened,eventropy_prefix_flattened
2
  Sepsis_Cases_Event_Log,1050,846,0.805714285714285,3,185,14.48952380952381,13,8,11.470474925273926,131.57179501133788,9,16,7,12.281860759040903,1.7464004837799152,10.47731701485374,7.250526815880918,87.0376906898399,0.791639192292468,6.769403523350811,0.04861329147043401,0.005285190999476001,0.000575614861329,0.000209314495028,0.000104657247514,0.0,5.2328623757195225e-05,0.0,0.0,0.000104657247514,2.612850778156251,4.931206347805768,0.033333333333333,0.12,0.215238095238095,0.274285714285714,0.355238095238095,0.5971428571428571,0.7980952380952381,1.241134751773049,1.759408518249193,13.637101374069475,217.44268017168216,16,6,3383,950.875,788.0,1008.5815457239935,1017236.734375,101.75,1085.25,983.5,1.391238560701821,1.05777753209275,6,6,995,175.0,12.0,366.73787187399483,134496.66666666666,7.75,17.0,9.25,1.7883562472303312,1.199106773708694,14,2,393,75.0,32.5,112.91400014423114,12749.57142857143,14.0,53.5,39.5,2.004413358907822,2.500757934341361,9.334,10.227,14.501,1.7269999999999999,3.238,1.712,1.104,3.238,2.262,1.871,4.956,4.49,4.191,40624.49329803771,0.6957588422064961,76528.6794749776,0.5223430410751391,32139.284589305265,0.219365233602993,43880.53919110408,0.299504635939686,,,
3
  CoSeLoG_WABO_1,937,916,0.9775880469583771,2,95,41.56243329775881,43,40,16.678023092416094,278.1564542711645,36,51,15,36.71275216938179,1.784073253119976,28.84499612652788,-0.16821637154603802,0.17918482321640303,0.40127638757174006,6.750635463329985,0.006311609919555001,0.009524793151329002,0.006311609919555001,0.014229811454998001,0.039820520765196,0.016869211966812,0.008147714623426,0.0037869659517330003,0.002065617791854,0.00045902617596700005,1.7771796608234571,2.353958246469541,0.009605122732123,0.032017075773746004,0.07043756670224101,0.11953041622198501,0.21771611526147203,0.511205976520811,0.7556029882604051,1.022925764192139,0.33126487599778903,19.52280427642022,422.82376078444236,381,1,937,102.21522309711285,15.0,193.12603388747905,37297.6649651077,3.0,81.0,78.0,2.463005335171609,5.5066536611772605,11,1,899,85.18181818181819,2.0,257.3832721066592,66246.14876033057,1.0,7.5,6.5,2.844783898567343,6.0957042298129664,101,1,292,9.277227722772277,2.0,31.163929012921322,971.1904715223994,1.0,5.0,4.0,7.672745189703872,64.72182800579148,9.806000000000001,13.867,18.357,3.2640000000000002,6.888,1.299,0.582,6.888,3.542,2.403,5.413,4.929,4.629,195166.2442745276,0.6466967918841,247624.8365497508,0.601566424410453,120536.03113478613,0.292823733970692,154887.76808660102,0.37627599125765404,18.361,3.276,13.885
 
1
+ log,n_traces,n_variants,ratio_variants_per_number_of_traces,trace_len_min,trace_len_max,trace_len_mean,trace_len_median,trace_len_mode,trace_len_std,trace_len_variance,trace_len_q1,trace_len_q3,trace_len_iqr,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_skewness,trace_len_kurtosis,trace_len_coefficient_variation,trace_len_entropy,trace_len_hist1,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_hist10,trace_len_skewness_hist,trace_len_kurtosis_hist,ratio_most_common_variant,ratio_top_1_variants,ratio_top_5_variants,ratio_top_10_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_75_variants,mean_variant_occurrence,std_variant_occurrence,skewness_variant_occurrence,kurtosis_variant_occurrence,n_unique_activities,activities_min,activities_max,activities_mean,activities_median,activities_std,activities_variance,activities_q1,activities_q3,activities_iqr,activities_skewness,activities_kurtosis,n_unique_start_activities,start_activities_min,start_activities_max,start_activities_mean,start_activities_median,start_activities_std,start_activities_variance,start_activities_q1,start_activities_q3,start_activities_iqr,start_activities_skewness,start_activities_kurtosis,n_unique_end_activities,end_activities_min,end_activities_max,end_activities_mean,end_activities_median,end_activities_std,end_activities_variance,end_activities_q1,end_activities_q3,end_activities_iqr,end_activities_skewness,end_activities_kurtosis,eventropy_trace,eventropy_prefix,eventropy_global_block,eventropy_lempel_ziv,eventropy_k_block_diff_1,eventropy_k_block_diff_3,eventropy_k_block_diff_5,eventropy_k_block_ratio_1,eventropy_k_block_ratio_3,eventropy_k_block_ratio_5,eventropy_knn_3,eventropy_knn_5,eventropy_knn_7,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,eventropy_global_block_flattened,eventropy_lempel_ziv_flattened,eventropy_prefix_flattened
2
  Sepsis_Cases_Event_Log,1050,846,0.805714285714285,3,185,14.48952380952381,13,8,11.470474925273926,131.57179501133788,9,16,7,12.281860759040903,1.7464004837799152,10.47731701485374,7.250526815880918,87.0376906898399,0.791639192292468,6.769403523350811,0.04861329147043401,0.005285190999476001,0.000575614861329,0.000209314495028,0.000104657247514,0.0,5.2328623757195225e-05,0.0,0.0,0.000104657247514,2.612850778156251,4.931206347805768,0.033333333333333,0.12,0.215238095238095,0.274285714285714,0.355238095238095,0.5971428571428571,0.7980952380952381,1.241134751773049,1.759408518249193,13.637101374069475,217.44268017168216,16,6,3383,950.875,788.0,1008.5815457239935,1017236.734375,101.75,1085.25,983.5,1.391238560701821,1.05777753209275,6,6,995,175.0,12.0,366.73787187399483,134496.66666666666,7.75,17.0,9.25,1.7883562472303312,1.199106773708694,14,2,393,75.0,32.5,112.91400014423114,12749.57142857143,14.0,53.5,39.5,2.004413358907822,2.500757934341361,9.334,10.227,14.501,1.7269999999999999,3.238,1.712,1.104,3.238,2.262,1.871,4.956,4.49,4.191,40624.49329803771,0.6957588422064961,76528.6794749776,0.5223430410751391,32139.284589305265,0.219365233602993,43880.53919110408,0.299504635939686,,,
3
  CoSeLoG_WABO_1,937,916,0.9775880469583771,2,95,41.56243329775881,43,40,16.678023092416094,278.1564542711645,36,51,15,36.71275216938179,1.784073253119976,28.84499612652788,-0.16821637154603802,0.17918482321640303,0.40127638757174006,6.750635463329985,0.006311609919555001,0.009524793151329002,0.006311609919555001,0.014229811454998001,0.039820520765196,0.016869211966812,0.008147714623426,0.0037869659517330003,0.002065617791854,0.00045902617596700005,1.7771796608234571,2.353958246469541,0.009605122732123,0.032017075773746004,0.07043756670224101,0.11953041622198501,0.21771611526147203,0.511205976520811,0.7556029882604051,1.022925764192139,0.33126487599778903,19.52280427642022,422.82376078444236,381,1,937,102.21522309711285,15.0,193.12603388747905,37297.6649651077,3.0,81.0,78.0,2.463005335171609,5.5066536611772605,11,1,899,85.18181818181819,2.0,257.3832721066592,66246.14876033057,1.0,7.5,6.5,2.844783898567343,6.0957042298129664,101,1,292,9.277227722772277,2.0,31.163929012921322,971.1904715223994,1.0,5.0,4.0,7.672745189703872,64.72182800579148,9.806000000000001,13.867,18.357,3.2640000000000002,6.888,1.299,0.582,6.888,3.542,2.403,5.413,4.929,4.629,195166.2442745276,0.6466967918841,247624.8365497508,0.601566424410453,120536.03113478613,0.292823733970692,154887.76808660102,0.37627599125765404,18.361,3.276,13.885
data/test/bpic_features.csv CHANGED
@@ -1,4 +1,4 @@
1
- log,n_traces,n_unique_traces,ratio_variants_per_number_of_traces,trace_len_min,trace_len_max,trace_len_mean,trace_len_median,trace_len_mode,trace_len_std,trace_len_variance,trace_len_q1,trace_len_q3,trace_len_iqr,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_skewness,trace_len_kurtosis,trace_len_coefficient_variation,trace_len_entropy,trace_len_hist1,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_hist10,trace_len_skewness_hist,trace_len_kurtosis_hist,ratio_most_common_variant,ratio_top_1_variants,ratio_top_5_variants,ratio_top_10_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_75_variants,mean_variant_occurrence,std_variant_occurrence,skewness_variant_occurrence,kurtosis_variant_occurrence,n_unique_activities,activities_min,activities_max,activities_mean,activities_median,activities_std,activities_variance,activities_q1,activities_q3,activities_iqr,activities_skewness,activities_kurtosis,n_unique_start_activities,start_activities_min,start_activities_max,start_activities_mean,start_activities_median,start_activities_std,start_activities_variance,start_activities_q1,start_activities_q3,start_activities_iqr,start_activities_skewness,start_activities_kurtosis,n_unique_end_activities,end_activities_min,end_activities_max,end_activities_mean,end_activities_median,end_activities_std,end_activities_variance,end_activities_q1,end_activities_q3,end_activities_iqr,end_activities_skewness,end_activities_kurtosis,entropy_trace,entropy_prefix,entropy_global_block,entropy_lempel_ziv,entropy_k_block_diff_1,entropy_k_block_diff_3,entropy_k_block_diff_5,entropy_k_block_ratio_1,entropy_k_block_ratio_3,entropy_k_block_ratio_5,entropy_knn_3,entropy_knn_5,entropy_knn_7,Log Nature,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,accumulated_time_time_min,accumulated_time_time_max,accumulated_time_time_mean,accumulated_time_time_median,accumulated_time_time_mode,accumulated_time_time_std,accumulated_time_time_variance,accumulated_time_time_q1,accumulated_time_time_q3,accumulated_time_time_iqr,accumulated_time_time_geometric_mean,accumulated_time_time_geometric_std,accumulated_time_time_harmonic_mean,accumulated_time_time_skewness,accumulated_time_time_kurtosis,accumulated_time_time_coefficient_variation,accumulated_time_time_entropy,accumulated_time_time_skewness_hist,accumulated_time_time_kurtosis_hist,execution_time_time_min,execution_time_time_max,execution_time_time_mean,execution_time_time_median,execution_time_time_mode,execution_time_time_std,execution_time_time_variance,execution_time_time_q1,execution_time_time_q3,execution_time_time_iqr,execution_time_time_geometric_mean,execution_time_time_geometric_std,execution_time_time_harmonic_mean,execution_time_time_skewness,execution_time_time_kurtosis,execution_time_time_coefficient_variation,execution_time_time_entropy,execution_time_time_skewness_hist,execution_time_time_kurtosis_hist,remaining_time_time_min,remaining_time_time_max,remaining_time_time_mean,remaining_time_time_median,remaining_time_time_mode,remaining_time_time_std,remaining_time_time_variance,remaining_time_time_q1,remaining_time_time_q3,remaining_time_time_iqr,remaining_time_time_geometric_mean,remaining_time_time_geometric_std,remaining_time_time_harmonic_mean,remaining_time_time_skewness,remaining_time_time_kurtosis,remaining_time_time_coefficient_variation,remaining_time_time_entropy,remaining_time_time_skewness_hist,remaining_time_time_kurtosis_hist,within_day_time_min,within_day_time_max,within_day_time_mean,within_day_time_median,within_day_time_mode,within_day_time_std,within_day_time_variance,within_day_time_q1,within_day_time_q3,within_day_time_iqr,within_day_time_geometric_mean,within_day_time_geometric_std,within_day_time_harmonic_mean,within_day_time_skewness,within_day_time_kurtosis,within_day_time_coefficient_variation,within_day_time_entropy,within_day_time_skewness_hist,within_day_time_kurtosis_hist
2
  BPIC15_2,832,828,0.9951923076923076,1,132,53.31009615384615,54.0,61,19.89497651105348,395.8100903753698,44.0,62.0,18.0,48.15011097917017,1.6953108255055442,37.583741492631816,0.0541383907866727,0.8049916722455452,0.3731934088739797,6.6467154289258925,0.0038534938344098,0.0048627422196124,0.0046792425132119,0.0239467116852613,0.0237632119788608,0.0082574867880211,0.0047709923664122,0.0013762477980035,0.0006422489724016,0.0001834997064004,0.0541383907866727,0.8049916722455452,0.0024038461538461,0.0144230769230769,0.0540865384615384,0.1033653846153846,0.203125,0.5024038461538461,0.7512019230769231,1.0048309178743962,0.0693367154319194,14.283026792978164,202.00485436893203,410,1,830,108.18048780487806,12.0,187.5881623228515,35189.31864366448,3.0,125.5,122.5,2.1294119001489484,3.808278466770415,14,1,731,59.42857142857143,1.0,186.71740078284623,34863.387755102034,1.0,8.25,7.25,3.300411469802443,8.960767075527839,82,1,216,10.146341463414634,1.0,35.31879964786925,1247.4176085663291,1.0,3.0,2.0,5.098791193232185,25.861991394282988,9.691,14.524,19.448,3.859,7.105,7.105,7.105,7.105,7.105,7.105,5.545,5.039,4.721,Real,240512.2242485009,0.6279728735030676,285876.9226982823,0.6023712370019746,150546.57168151825,0.3172166670686898,185312.93742252485,0.3904728730604407,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3
  BPI_Challenge_2018,43809,28457,0.6495697231162546,24,2973,57.39154055102833,49.0,49,34.87213051501663,1216.065486656354,44.0,59.0,15.0,53.775007740790905,1.3673968195217023,51.6515023255421,26.12645867504185,1720.3996647748236,0.6076179551934296,10.59875768208314,0.0033846328873849,5.263453617722996e-06,9.28844756068764e-07,0.0,0.0,0.0,0.0,0.0,7.740372967239698e-08,7.740372967239698e-08,26.12645867504185,1720.3996647748236,0.0269807573786208,0.2903741240384396,0.3730055468054509,0.4153712707434545,0.4803350909630441,0.6752037252619325,0.837590449451026,1.53948061988263,12.487438103768865,64.62568045475237,5083.4558063165005,41,17,466141,61323.56097560976,7530.0,120522.24741658216,14525612122.343842,902.0,45907.0,45005.0,2.444006846537922,4.7732537682944125,4,2,38623,10952.25,2592.0,16111.407548302535,259577453.1875,36.5,13507.75,13471.25,1.098736017040351,-0.714799753613248,21,1,34830,2086.1428571428573,13.0,7431.744980540056,55230833.45578231,2.0,193.0,191.0,4.062386890920656,14.95282428002514,13.191,16.272,20.972,1.023,-0.01,1.855,0.511,1.403,3.572,2.001,7.849,7.371,7.067,Real,11563842.153239768,0.7120788464629594,21146257.119093828,0.5706879719331716,14140225.903138256,0.3816115919659581,15576076.832943872,0.4203618469408319,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4
  Receipt_WABO_CoSeLoG,1434,116,0.0808926080892608,1,25,5.981171548117155,6.0,6,2.166128830112964,4.692114108646557,6.0,6.0,0.0,5.414708441482159,1.7049649652198722,4.356444755372117,1.276525010246869,12.296005610487518,0.3621579506100023,7.197192878385,0.0360297536029753,0.008135750813575,0.341120409112041,0.0235355648535564,0.0037773128777312,0.0017433751743375,0.0002905625290562,0.0014528126452812,0.0,0.0005811250581125,1.276525010246869,12.296005610487518,0.4972105997210599,0.4972105997210599,0.796373779637378,0.8870292887029289,0.9302649930264992,0.9595536959553695,0.9797768479776848,12.362068965517242,68.36027740401485,9.380686726353323,92.2819193173858,27,1,1434,317.6666666666667,27.0,553.3898230870318,306240.2962962963,8.0,50.0,42.0,1.342950616318748,-0.1780942423969453,1,1434,1434,1434.0,1434.0,0.0,0.0,1434.0,1434.0,0.0,,,14,1,828,102.42857142857144,6.0,225.87155461384123,51017.95918367348,1.25,33.25,32.0,2.471765166310402,4.8465409223704325,3.209,4.746,7.019,0.385,2.672,2.966,0.804,1.484,2.966,2.966,3.26,2.845,2.584,Real,2382.325855313024,0.6893625408247437,18296.27229411094,0.235532333261429,7814.867608807029,0.1006026786464005,10728.696951225804,0.1381131076951861,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
 
1
+ log,n_traces,n_variants,ratio_variants_per_number_of_traces,trace_len_min,trace_len_max,trace_len_mean,trace_len_median,trace_len_mode,trace_len_std,trace_len_variance,trace_len_q1,trace_len_q3,trace_len_iqr,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_skewness,trace_len_kurtosis,trace_len_coefficient_variation,trace_len_entropy,trace_len_hist1,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_hist10,trace_len_skewness_hist,trace_len_kurtosis_hist,ratio_most_common_variant,ratio_top_1_variants,ratio_top_5_variants,ratio_top_10_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_75_variants,mean_variant_occurrence,std_variant_occurrence,skewness_variant_occurrence,kurtosis_variant_occurrence,n_unique_activities,activities_min,activities_max,activities_mean,activities_median,activities_std,activities_variance,activities_q1,activities_q3,activities_iqr,activities_skewness,activities_kurtosis,n_unique_start_activities,start_activities_min,start_activities_max,start_activities_mean,start_activities_median,start_activities_std,start_activities_variance,start_activities_q1,start_activities_q3,start_activities_iqr,start_activities_skewness,start_activities_kurtosis,n_unique_end_activities,end_activities_min,end_activities_max,end_activities_mean,end_activities_median,end_activities_std,end_activities_variance,end_activities_q1,end_activities_q3,end_activities_iqr,end_activities_skewness,end_activities_kurtosis,entropy_trace,entropy_prefix,entropy_global_block,entropy_lempel_ziv,entropy_k_block_diff_1,entropy_k_block_diff_3,entropy_k_block_diff_5,entropy_k_block_ratio_1,entropy_k_block_ratio_3,entropy_k_block_ratio_5,entropy_knn_3,entropy_knn_5,entropy_knn_7,Log Nature,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,accumulated_time_time_min,accumulated_time_time_max,accumulated_time_time_mean,accumulated_time_time_median,accumulated_time_time_mode,accumulated_time_time_std,accumulated_time_time_variance,accumulated_time_time_q1,accumulated_time_time_q3,accumulated_time_time_iqr,accumulated_time_time_geometric_mean,accumulated_time_time_geometric_std,accumulated_time_time_harmonic_mean,accumulated_time_time_skewness,accumulated_time_time_kurtosis,accumulated_time_time_coefficient_variation,accumulated_time_time_entropy,accumulated_time_time_skewness_hist,accumulated_time_time_kurtosis_hist,execution_time_time_min,execution_time_time_max,execution_time_time_mean,execution_time_time_median,execution_time_time_mode,execution_time_time_std,execution_time_time_variance,execution_time_time_q1,execution_time_time_q3,execution_time_time_iqr,execution_time_time_geometric_mean,execution_time_time_geometric_std,execution_time_time_harmonic_mean,execution_time_time_skewness,execution_time_time_kurtosis,execution_time_time_coefficient_variation,execution_time_time_entropy,execution_time_time_skewness_hist,execution_time_time_kurtosis_hist,remaining_time_time_min,remaining_time_time_max,remaining_time_time_mean,remaining_time_time_median,remaining_time_time_mode,remaining_time_time_std,remaining_time_time_variance,remaining_time_time_q1,remaining_time_time_q3,remaining_time_time_iqr,remaining_time_time_geometric_mean,remaining_time_time_geometric_std,remaining_time_time_harmonic_mean,remaining_time_time_skewness,remaining_time_time_kurtosis,remaining_time_time_coefficient_variation,remaining_time_time_entropy,remaining_time_time_skewness_hist,remaining_time_time_kurtosis_hist,within_day_time_min,within_day_time_max,within_day_time_mean,within_day_time_median,within_day_time_mode,within_day_time_std,within_day_time_variance,within_day_time_q1,within_day_time_q3,within_day_time_iqr,within_day_time_geometric_mean,within_day_time_geometric_std,within_day_time_harmonic_mean,within_day_time_skewness,within_day_time_kurtosis,within_day_time_coefficient_variation,within_day_time_entropy,within_day_time_skewness_hist,within_day_time_kurtosis_hist
2
  BPIC15_2,832,828,0.9951923076923076,1,132,53.31009615384615,54.0,61,19.89497651105348,395.8100903753698,44.0,62.0,18.0,48.15011097917017,1.6953108255055442,37.583741492631816,0.0541383907866727,0.8049916722455452,0.3731934088739797,6.6467154289258925,0.0038534938344098,0.0048627422196124,0.0046792425132119,0.0239467116852613,0.0237632119788608,0.0082574867880211,0.0047709923664122,0.0013762477980035,0.0006422489724016,0.0001834997064004,0.0541383907866727,0.8049916722455452,0.0024038461538461,0.0144230769230769,0.0540865384615384,0.1033653846153846,0.203125,0.5024038461538461,0.7512019230769231,1.0048309178743962,0.0693367154319194,14.283026792978164,202.00485436893203,410,1,830,108.18048780487806,12.0,187.5881623228515,35189.31864366448,3.0,125.5,122.5,2.1294119001489484,3.808278466770415,14,1,731,59.42857142857143,1.0,186.71740078284623,34863.387755102034,1.0,8.25,7.25,3.300411469802443,8.960767075527839,82,1,216,10.146341463414634,1.0,35.31879964786925,1247.4176085663291,1.0,3.0,2.0,5.098791193232185,25.861991394282988,9.691,14.524,19.448,3.859,7.105,7.105,7.105,7.105,7.105,7.105,5.545,5.039,4.721,Real,240512.2242485009,0.6279728735030676,285876.9226982823,0.6023712370019746,150546.57168151825,0.3172166670686898,185312.93742252485,0.3904728730604407,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3
  BPI_Challenge_2018,43809,28457,0.6495697231162546,24,2973,57.39154055102833,49.0,49,34.87213051501663,1216.065486656354,44.0,59.0,15.0,53.775007740790905,1.3673968195217023,51.6515023255421,26.12645867504185,1720.3996647748236,0.6076179551934296,10.59875768208314,0.0033846328873849,5.263453617722996e-06,9.28844756068764e-07,0.0,0.0,0.0,0.0,0.0,7.740372967239698e-08,7.740372967239698e-08,26.12645867504185,1720.3996647748236,0.0269807573786208,0.2903741240384396,0.3730055468054509,0.4153712707434545,0.4803350909630441,0.6752037252619325,0.837590449451026,1.53948061988263,12.487438103768865,64.62568045475237,5083.4558063165005,41,17,466141,61323.56097560976,7530.0,120522.24741658216,14525612122.343842,902.0,45907.0,45005.0,2.444006846537922,4.7732537682944125,4,2,38623,10952.25,2592.0,16111.407548302535,259577453.1875,36.5,13507.75,13471.25,1.098736017040351,-0.714799753613248,21,1,34830,2086.1428571428573,13.0,7431.744980540056,55230833.45578231,2.0,193.0,191.0,4.062386890920656,14.95282428002514,13.191,16.272,20.972,1.023,-0.01,1.855,0.511,1.403,3.572,2.001,7.849,7.371,7.067,Real,11563842.153239768,0.7120788464629594,21146257.119093828,0.5706879719331716,14140225.903138256,0.3816115919659581,15576076.832943872,0.4203618469408319,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4
  Receipt_WABO_CoSeLoG,1434,116,0.0808926080892608,1,25,5.981171548117155,6.0,6,2.166128830112964,4.692114108646557,6.0,6.0,0.0,5.414708441482159,1.7049649652198722,4.356444755372117,1.276525010246869,12.296005610487518,0.3621579506100023,7.197192878385,0.0360297536029753,0.008135750813575,0.341120409112041,0.0235355648535564,0.0037773128777312,0.0017433751743375,0.0002905625290562,0.0014528126452812,0.0,0.0005811250581125,1.276525010246869,12.296005610487518,0.4972105997210599,0.4972105997210599,0.796373779637378,0.8870292887029289,0.9302649930264992,0.9595536959553695,0.9797768479776848,12.362068965517242,68.36027740401485,9.380686726353323,92.2819193173858,27,1,1434,317.6666666666667,27.0,553.3898230870318,306240.2962962963,8.0,50.0,42.0,1.342950616318748,-0.1780942423969453,1,1434,1434,1434.0,1434.0,0.0,0.0,1434.0,1434.0,0.0,,,14,1,828,102.42857142857144,6.0,225.87155461384123,51017.95918367348,1.25,33.25,32.0,2.471765166310402,4.8465409223704325,3.209,4.746,7.019,0.385,2.672,2.966,0.804,1.484,2.966,2.966,3.26,2.845,2.584,Real,2382.325855313024,0.6893625408247437,18296.27229411094,0.235532333261429,7814.867608807029,0.1006026786464005,10728.696951225804,0.1381131076951861,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
data/validation/2_ense_rmcv_feat.csv CHANGED
@@ -1,4 +1,4 @@
1
- log,n_traces,n_unique_traces,trace_len_coefficient_variation,trace_len_entropy,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_hist1,trace_len_hist10,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_iqr,trace_len_kurtosis,trace_len_kurtosis_hist,trace_len_max,trace_len_mean,trace_len_median,trace_len_min,trace_len_mode,trace_len_q1,trace_len_q3,trace_len_skewness,trace_len_skewness_hist,trace_len_std,trace_len_variance,kurtosis_variant_occurrence,mean_variant_occurrence,ratio_most_common_variant,ratio_top_10_variants,ratio_top_1_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_5_variants,ratio_top_75_variants,skewness_variant_occurrence,std_variant_occurrence,activities_iqr,activities_kurtosis,activities_max,activities_mean,activities_median,activities_min,activities_q1,activities_q3,activities_skewness,activities_std,activities_variance,n_unique_activities,n_unique_start_activities,start_activities_iqr,start_activities_kurtosis,start_activities_max,start_activities_mean,start_activities_median,start_activities_min,start_activities_q1,start_activities_q3,start_activities_skewness,start_activities_std,start_activities_variance,end_activities_iqr,end_activities_kurtosis,end_activities_max,end_activities_mean,end_activities_median,end_activities_min,end_activities_q1,end_activities_q3,end_activities_skewness,end_activities_std,end_activities_variance,n_unique_end_activities,eventropy_global_block,eventropy_global_block_flattened,eventropy_k_block_diff_1,eventropy_k_block_diff_3,eventropy_k_block_diff_5,eventropy_k_block_ratio_1,eventropy_k_block_ratio_3,eventropy_k_block_ratio_5,eventropy_knn_3,eventropy_knn_5,eventropy_knn_7,eventropy_lempel_ziv,eventropy_lempel_ziv_flattened,eventropy_prefix,eventropy_prefix_flattened,eventropy_trace,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,ratio_variants_per_number_of_traces
2
- genELBPIC15f4_0604_0003,8616,4031,1.0086445672512825,8.700230419287818,8.516920996327995,2.1832133718212567,6.58111248846037,0.05713165933282198,1.682074468800883e-05,0.009932649738269211,0.0033136867035377378,0.0012279143622246447,0.0005214430853282738,0.00017661781922409254,0.0001093348404720574,3.364148937601766e-05,0.0,9.0,11.77613857723645,4.64306597180025,141,11.964136490250697,7.0,3,3,5.0,14.0,2.836323931248485,2.5294876299887217,12.067561272744191,145.6260350714354,1651.5545366193303,2.137434879682461,0.09099350046425256,0.5789229340761374,0.40401578458681525,0.6256963788300836,0.766016713091922,0.5258820798514392,0.883008356545961,36.276105773051086,15.574023282690577,2184.5,1.9085746306932307,34121,12885.375,8627.0,8584,8616.0,10800.5,1.8663249384138656,8507.416043333898,72376127.734375,8,2,2111.0,-2.0,6419,4308.0,4308.0,2197,3252.5,5363.5,0.0,2111.0,4456321.0,768.0,0.0021026107788850723,4895,1723.2,832.0,495,813.0,1581.0,1.331337855426617,1625.5283940922102,2642342.56,5,15.897,16.276,2.756,1.525,1.375,2.756,2.016,1.775,6.564,6.07,5.761,1.405,1.786,12.139,13.493,9.703,365917.06171394786,0.7166786736830569,651595.1462643282,0.5475971681938718,62016.045914910814,0.05211796208164211,266396.7627350506,0.22387845232743814,0.46785051067780875
3
- genELHD_0254_0517,6822,565,1.1300022933733087,8.390788875278787,1.9006921917027269,2.263915758458681,1.4763543408149593,0.28822871537617945,0.00010858116985352402,0.04077222927999826,0.02383356678284851,0.006080545511797346,0.005591930247456488,0.002823110416191621,0.0017915893025831464,0.0006514870191211442,0.0004886152643408582,2.0,9.718268017319556,4.770965470001153,28,2.8346525945470535,1.0,1,1,1.0,3.0,2.765986310146101,2.5637920433464965,3.2031639327547703,10.260259180101007,226.4931382842208,12.07433628318584,0.24860744649662855,0.9079448841981823,0.6807387862796834,0.9321313397830548,0.9585165640574611,0.8717384931105248,0.9791849897390794,14.639488482439702,105.6342402074512,1283.0,8.118508585327676,6848,1137.5294117647059,472.0,208,413.0,1696.0,2.9234849385484285,1541.823981624173,2377221.1903114184,17,10,294.25,2.299363631971671,3383,682.2,217.0,101,121.75,416.0,1.9301655015244086,1008.2924972447232,1016653.7600000001,334.5,2.8813625853874614,3383,620.1818181818181,157.0,79,104.5,439.0,2.0614116860983223,981.5564465945092,963453.0578512397,11,9.069,10.932,3.265,0.908,0.67,3.265,1.808,1.456,4.81,4.359,4.05,0.696,2.01,6.995,10.12,4.469,16958.33766640406,0.7450438396474315,70379.87102533762,0.36874603139171797,9719.481922433943,0.050923940806750986,30545.050254490514,0.16003675334882345,0.08282028730577544
4
- genELRTFMP_0112_0376,6822,565,1.1300022933733087,8.390788875278787,1.9006921917027269,2.263915758458681,1.4763543408149593,0.28822871537617945,0.00010858116985352402,0.04077222927999826,0.02383356678284851,0.006080545511797346,0.005591930247456488,0.002823110416191621,0.0017915893025831464,0.0006514870191211442,0.0004886152643408582,2.0,9.718268017319556,4.770965470001153,28,2.8346525945470535,1.0,1,1,1.0,3.0,2.765986310146101,2.5637920433464965,3.2031639327547703,10.260259180101007,226.4931382842208,12.07433628318584,0.24860744649662855,0.9079448841981823,0.6807387862796834,0.9321313397830548,0.9585165640574611,0.8717384931105248,0.9791849897390794,14.639488482439702,105.6342402074512,1283.0,8.118508585327676,6848,1137.5294117647059,472.0,208,413.0,1696.0,2.9234849385484285,1541.823981624173,2377221.1903114184,17,10,294.25,2.299363631971671,3383,682.2,217.0,101,121.75,416.0,1.9301655015244086,1008.2924972447232,1016653.7600000001,334.5,2.8813625853874614,3383,620.1818181818181,157.0,79,104.5,439.0,2.0614116860983223,981.5564465945092,963453.0578512397,11,9.069,10.932,3.265,0.908,0.67,3.265,1.808,1.456,4.81,4.359,4.05,0.696,2.01,6.995,10.12,4.469,16958.33766640406,0.7450438396474315,70379.87102533762,0.36874603139171797,9719.481922433943,0.050923940806750986,30545.050254490514,0.16003675334882345,0.08282028730577544
 
1
+ log,n_traces,n_variants,ratio_variants_per_number_of_traces,trace_len_coefficient_variation,trace_len_entropy,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_hist1,trace_len_hist10,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_iqr,trace_len_kurtosis,trace_len_kurtosis_hist,trace_len_max,trace_len_mean,trace_len_median,trace_len_min,trace_len_mode,trace_len_q1,trace_len_q3,trace_len_skewness,trace_len_skewness_hist,trace_len_std,trace_len_variance,kurtosis_variant_occurrence,mean_variant_occurrence,ratio_most_common_variant,ratio_top_10_variants,ratio_top_1_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_5_variants,ratio_top_75_variants,skewness_variant_occurrence,std_variant_occurrence,activities_iqr,activities_kurtosis,activities_max,activities_mean,activities_median,activities_min,activities_q1,activities_q3,activities_skewness,activities_std,activities_variance,n_unique_activities,n_unique_start_activities,start_activities_iqr,start_activities_kurtosis,start_activities_max,start_activities_mean,start_activities_median,start_activities_min,start_activities_q1,start_activities_q3,start_activities_skewness,start_activities_std,start_activities_variance,end_activities_iqr,end_activities_kurtosis,end_activities_max,end_activities_mean,end_activities_median,end_activities_min,end_activities_q1,end_activities_q3,end_activities_skewness,end_activities_std,end_activities_variance,n_unique_end_activities,eventropy_global_block,eventropy_global_block_flattened,eventropy_k_block_diff_1,eventropy_k_block_diff_3,eventropy_k_block_diff_5,eventropy_k_block_ratio_1,eventropy_k_block_ratio_3,eventropy_k_block_ratio_5,eventropy_knn_3,eventropy_knn_5,eventropy_knn_7,eventropy_lempel_ziv,eventropy_lempel_ziv_flattened,eventropy_prefix,eventropy_prefix_flattened,eventropy_trace,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting
2
+ genELBPIC15f4_0604_0003,8616,4031,0.46785051067780875,1.0086445672512825,8.700230419287818,8.516920996327995,2.1832133718212567,6.58111248846037,0.05713165933282198,1.682074468800883e-05,0.009932649738269211,0.0033136867035377378,0.0012279143622246447,0.0005214430853282738,0.00017661781922409254,0.0001093348404720574,3.364148937601766e-05,0.0,9.0,11.77613857723645,4.64306597180025,141,11.964136490250697,7.0,3,3,5.0,14.0,2.836323931248485,2.5294876299887217,12.067561272744191,145.6260350714354,1651.5545366193303,2.137434879682461,0.09099350046425256,0.5789229340761374,0.40401578458681525,0.6256963788300836,0.766016713091922,0.5258820798514392,0.883008356545961,36.276105773051086,15.574023282690577,2184.5,1.9085746306932307,34121,12885.375,8627.0,8584,8616.0,10800.5,1.8663249384138656,8507.416043333898,72376127.734375,8,2,2111.0,-2.0,6419,4308.0,4308.0,2197,3252.5,5363.5,0.0,2111.0,4456321.0,768.0,0.0021026107788850723,4895,1723.2,832.0,495,813.0,1581.0,1.331337855426617,1625.5283940922102,2642342.56,5,15.897,16.276,2.756,1.525,1.375,2.756,2.016,1.775,6.564,6.07,5.761,1.405,1.786,12.139,13.493,9.703,365917.06171394786,0.7166786736830569,651595.1462643282,0.5475971681938718,62016.045914910814,0.05211796208164211,266396.7627350506,0.22387845232743814
3
+ genELHD_0254_0517,6822,565,0.08282028730577544,1.1300022933733087,8.390788875278787,1.9006921917027269,2.263915758458681,1.4763543408149593,0.28822871537617945,0.00010858116985352402,0.04077222927999826,0.02383356678284851,0.006080545511797346,0.005591930247456488,0.002823110416191621,0.0017915893025831464,0.0006514870191211442,0.0004886152643408582,2.0,9.718268017319556,4.770965470001153,28,2.8346525945470535,1.0,1,1,1.0,3.0,2.765986310146101,2.5637920433464965,3.2031639327547703,10.260259180101007,226.4931382842208,12.07433628318584,0.24860744649662855,0.9079448841981823,0.6807387862796834,0.9321313397830548,0.9585165640574611,0.8717384931105248,0.9791849897390794,14.639488482439702,105.6342402074512,1283.0,8.118508585327676,6848,1137.5294117647059,472.0,208,413.0,1696.0,2.9234849385484285,1541.823981624173,2377221.1903114184,17,10,294.25,2.299363631971671,3383,682.2,217.0,101,121.75,416.0,1.9301655015244086,1008.2924972447232,1016653.7600000001,334.5,2.8813625853874614,3383,620.1818181818181,157.0,79,104.5,439.0,2.0614116860983223,981.5564465945092,963453.0578512397,11,9.069,10.932,3.265,0.908,0.67,3.265,1.808,1.456,4.81,4.359,4.05,0.696,2.01,6.995,10.12,4.469,16958.33766640406,0.7450438396474315,70379.87102533762,0.36874603139171797,9719.481922433943,0.050923940806750986,30545.050254490514,0.16003675334882345
4
+ genELRTFMP_0112_0376,6822,565,0.08282028730577544,1.1300022933733087,8.390788875278787,1.9006921917027269,2.263915758458681,1.4763543408149593,0.28822871537617945,0.00010858116985352402,0.04077222927999826,0.02383356678284851,0.006080545511797346,0.005591930247456488,0.002823110416191621,0.0017915893025831464,0.0006514870191211442,0.0004886152643408582,2.0,9.718268017319556,4.770965470001153,28,2.8346525945470535,1.0,1,1,1.0,3.0,2.765986310146101,2.5637920433464965,3.2031639327547703,10.260259180101007,226.4931382842208,12.07433628318584,0.24860744649662855,0.9079448841981823,0.6807387862796834,0.9321313397830548,0.9585165640574611,0.8717384931105248,0.9791849897390794,14.639488482439702,105.6342402074512,1283.0,8.118508585327676,6848,1137.5294117647059,472.0,208,413.0,1696.0,2.9234849385484285,1541.823981624173,2377221.1903114184,17,10,294.25,2.299363631971671,3383,682.2,217.0,101,121.75,416.0,1.9301655015244086,1008.2924972447232,1016653.7600000001,334.5,2.8813625853874614,3383,620.1818181818181,157.0,79,104.5,439.0,2.0614116860983223,981.5564465945092,963453.0578512397,11,9.069,10.932,3.265,0.908,0.67,3.265,1.808,1.456,4.81,4.359,4.05,0.696,2.01,6.995,10.12,4.469,16958.33766640406,0.7450438396474315,70379.87102533762,0.36874603139171797,9719.481922433943,0.050923940806750986,30545.050254490514,0.16003675334882345
data/validation/test_feat.csv CHANGED
@@ -1,3 +1,3 @@
1
- log,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,ratio_variants_per_number_of_traces
2
- gen_el_168,0.13580246913580246,0.5709876543209876,0.6920749183939835,0.6241163465815115,0.06011912975523125,0.2577500062839078,0.44135802469135804
3
- gen_el_169,0.25813692480359146,0.6846240179573513,0.6517697077716751,0.4929433574247866,0.06332152226023505,0.21109493857555106,0.3153759820426487
 
1
+ log,n_variants,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting
2
+ gen_el_168,143,0.13580246913580246,0.5709876543209876,0.6920749183939835,0.6241163465815115,0.06011912975523125,0.2577500062839078
3
+ gen_el_169,281,0.25813692480359146,0.6846240179573513,0.6517697077716751,0.4929433574247866,0.06332152226023505,0.21109493857555106
gedi/config.py CHANGED
@@ -19,7 +19,7 @@ def get_model_params_list(alg_json_file: str) :#-> list[dict]:
19
  ' Use a configuration from the `config_files`-folder together with the args `-a`.')
20
  return [
21
  {PIPELINE_STEP: 'feature_extraction', INPUT_PATH: 'data/test',
22
- FEATURE_PARAMS: {FEATURE_SET: ['ratio_unique_traces_per_trace',
23
  'ratio_most_common_variant']},
24
  OUTPUT_PATH: 'output/plots'}
25
  ]
 
19
  ' Use a configuration from the `config_files`-folder together with the args `-a`.')
20
  return [
21
  {PIPELINE_STEP: 'feature_extraction', INPUT_PATH: 'data/test',
22
+ FEATURE_PARAMS: {FEATURE_SET: ['ratio_variants_per_number_of_traces',
23
  'ratio_most_common_variant']},
24
  OUTPUT_PATH: 'output/plots'}
25
  ]
gedi/features.py CHANGED
@@ -36,12 +36,7 @@ class EventLogFeatures(EventLogFile):
36
  elif ft_params.get(FEATURE_PARAMS) == None:
37
  self.params = {FEATURE_SET: None}
38
  else:
39
- #TODO: Replace hotfix
40
  self.params=ft_params.get(FEATURE_PARAMS)
41
- if 'ratio_variants_per_number_of_traces' in self.params.get(FEATURE_SET):#HOTFIX
42
- self.params[FEATURE_SET] = ['ratio_unique_traces_per_trace'\
43
- if feat=='ratio_variants_per_number_of_traces'\
44
- else feat for feat in self.params.get(FEATURE_SET)]
45
 
46
  # TODO: handle parameters in main, not in features. Move to main.py
47
  if ft_params[INPUT_PATH]:
@@ -150,9 +145,6 @@ class EventLogFeatures(EventLogFile):
150
  file_path = os.path.join(self.root_path, file)
151
  print(f" INFO: Starting FEEED for {file_path} and {feature_set}")
152
  features = extract_features(file_path, feature_set)
153
- #TODO: Replace hotfix
154
- if features.get('ratio_unique_traces_per_trace'):#HOTFIX
155
- features['ratio_variants_per_number_of_traces']=features.pop('ratio_unique_traces_per_trace')
156
 
157
  except Exception as e:
158
  print("ERROR: for ",file.rsplit(".", 1)[0], feature_set, "skipping and continuing with next log.")
 
36
  elif ft_params.get(FEATURE_PARAMS) == None:
37
  self.params = {FEATURE_SET: None}
38
  else:
 
39
  self.params=ft_params.get(FEATURE_PARAMS)
 
 
 
 
40
 
41
  # TODO: handle parameters in main, not in features. Move to main.py
42
  if ft_params[INPUT_PATH]:
 
145
  file_path = os.path.join(self.root_path, file)
146
  print(f" INFO: Starting FEEED for {file_path} and {feature_set}")
147
  features = extract_features(file_path, feature_set)
 
 
 
148
 
149
  except Exception as e:
150
  print("ERROR: for ",file.rsplit(".", 1)[0], feature_set, "skipping and continuing with next log.")
gedi/generator.py CHANGED
@@ -162,9 +162,6 @@ class GenerateEventLogs():
162
  tasks = tasks.rename(columns=columns_to_rename)
163
  self.output_path = output_path
164
 
165
- if 'ratio_variants_per_number_of_traces' in tasks.columns:#HOTFIX
166
- tasks=tasks.rename(columns={"ratio_variants_per_number_of_traces": "ratio_unique_traces_per_trace"})
167
-
168
  if tasks is not None:
169
  self.feature_keys = sorted([feature for feature in tasks.columns.tolist() if feature != "log"])
170
  num_cores = multiprocessing.cpu_count() if len(tasks) >= multiprocessing.cpu_count() else len(tasks)
@@ -182,10 +179,6 @@ class GenerateEventLogs():
182
  self.configs = [self.configs]
183
  temp = self.generate_optimized_log(self.configs[0])
184
  self.log_config = [temp]
185
- #TODO: Replace hotfix
186
- if self.params[EXPERIMENT].get('ratio_unique_traces_per_trace'):#HOTFIX
187
- self.params[EXPERIMENT]['ratio_variants_per_number_of_traces']=self.params[EXPERIMENT].pop('ratio_unique_traces_per_trace')
188
-
189
  save_path = get_output_key_value_location(self.params[EXPERIMENT],
190
  self.output_path, "genEL")+".xes"
191
  write_xes(temp['log'], save_path)
@@ -213,10 +206,6 @@ class GenerateEventLogs():
213
  log_config = self.generate_optimized_log(self.configs)
214
 
215
  identifier = 'genEL'+str(identifier)
216
- #TODO: Replace hotfix
217
- if self.objectives.get('ratio_unique_traces_per_trace'):#HOTFIX
218
- self.objectives['ratio_variants_per_number_of_traces']=self.objectives.pop('ratio_unique_traces_per_trace')
219
-
220
  save_path = get_output_key_value_location(task.to_dict(),
221
  self.output_path, identifier, self.feature_keys)+".xes"
222
 
@@ -225,9 +214,6 @@ class GenerateEventLogs():
225
  print("SUCCESS: Saved generated event log in", save_path)
226
  features_to_dump = log_config['metafeatures']
227
 
228
- #TODO: Replace hotfix
229
- if features_to_dump.get('ratio_unique_traces_per_trace'):#HOTFIX
230
- features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
231
  features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
232
  # calculating the manhattan distance of the generated log to the target features
233
  #features_to_dump['distance_to_target'] = calculate_manhattan_distance(self.objectives, features_to_dump)
 
162
  tasks = tasks.rename(columns=columns_to_rename)
163
  self.output_path = output_path
164
 
 
 
 
165
  if tasks is not None:
166
  self.feature_keys = sorted([feature for feature in tasks.columns.tolist() if feature != "log"])
167
  num_cores = multiprocessing.cpu_count() if len(tasks) >= multiprocessing.cpu_count() else len(tasks)
 
179
  self.configs = [self.configs]
180
  temp = self.generate_optimized_log(self.configs[0])
181
  self.log_config = [temp]
 
 
 
 
182
  save_path = get_output_key_value_location(self.params[EXPERIMENT],
183
  self.output_path, "genEL")+".xes"
184
  write_xes(temp['log'], save_path)
 
206
  log_config = self.generate_optimized_log(self.configs)
207
 
208
  identifier = 'genEL'+str(identifier)
 
 
 
 
209
  save_path = get_output_key_value_location(task.to_dict(),
210
  self.output_path, identifier, self.feature_keys)+".xes"
211
 
 
214
  print("SUCCESS: Saved generated event log in", save_path)
215
  features_to_dump = log_config['metafeatures']
216
 
 
 
 
217
  features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
218
  # calculating the manhattan distance of the generated log to the target features
219
  #features_to_dump['distance_to_target'] = calculate_manhattan_distance(self.objectives, features_to_dump)
gedi/utils/column_mappings.py CHANGED
@@ -1,7 +1,7 @@
1
  def column_mappings():
2
 
3
  column_names_short = {
4
- 'rutpt': 'ratio_unique_traces_per_trace',
5
  'rmcv': 'ratio_most_common_variant',
6
  'tlcv': 'trace_len_coefficient_variation',
7
  'mvo': 'mean_variant_occurrence',
 
1
  def column_mappings():
2
 
3
  column_names_short = {
4
+ 'rvpnot': 'ratio_variants_per_number_of_traces',
5
  'rmcv': 'ratio_most_common_variant',
6
  'tlcv': 'trace_len_coefficient_variation',
7
  'mvo': 'mean_variant_occurrence',
gedi/utils/io_helpers.py CHANGED
@@ -90,10 +90,6 @@ def compute_similarity(v1, v2):
90
  v1 = {k: (float(v) if k != "log" else v) for k, v in v1.items()}
91
  v2 = {k: (float(v) if k != "log" else v) for k, v in v2.items()}
92
 
93
- # HOTFIX: Rename 'ratio_unique_traces_per_trace'
94
- if 'ratio_unique_traces_per_trace' in v1:
95
- v1['ratio_variants_per_number_of_traces'] = v1.pop('ratio_unique_traces_per_trace')
96
-
97
  # Filter out non-numeric values and ensure the same keys exist in both dictionaries
98
  common_keys = set(v1.keys()).intersection(set(v2.keys()))
99
  numeric_keys = [k for k in common_keys if isinstance(v1[k], (int, float)) and isinstance(v2[k], (int, float))]
 
90
  v1 = {k: (float(v) if k != "log" else v) for k, v in v1.items()}
91
  v2 = {k: (float(v) if k != "log" else v) for k, v in v2.items()}
92
 
 
 
 
 
93
  # Filter out non-numeric values and ensure the same keys exist in both dictionaries
94
  common_keys = set(v1.keys()).intersection(set(v2.keys()))
95
  numeric_keys = [k for k in common_keys if isinstance(v1[k], (int, float)) and isinstance(v2[k], (int, float))]
requirements.txt CHANGED
@@ -22,7 +22,7 @@ deprecation==2.1.0
22
  distributed==2024.4.1
23
  emcee==3.1.4
24
  executing==2.0.1
25
- feeed==1.2.0
26
  fsspec==2024.3.1
27
  fonttools==4.51.0
28
  gitdb==4.0.11
 
22
  distributed==2024.4.1
23
  emcee==3.1.4
24
  executing==2.0.1
25
+ feeed>=1.3.2
26
  fsspec==2024.3.1
27
  fonttools==4.51.0
28
  gitdb==4.0.11
setup.py CHANGED
@@ -4,7 +4,7 @@ import os
4
  with open("README.md", "r") as fh:
5
  long_description = fh.read()
6
 
7
- version_string = os.environ.get("VERSION_PLACEHOLDER", "1.0.0")
8
  print(version_string)
9
  version = version_string
10
 
@@ -22,7 +22,6 @@ setup(
22
  include_package_data=True,
23
  install_requires=[
24
  'ConfigSpace==0.7.1',
25
- 'feeed==1.2.0',
26
  'imblearn==0.0',
27
  'Levenshtein==0.23.0',
28
  'matplotlib==3.8.4',
@@ -44,7 +43,7 @@ setup(
44
  'deprecation==2.1.0',
45
  'distributed==2024.2.1',
46
  'emcee==3.1.4',
47
- 'feeed == 1.2.0',
48
  'fsspec==2024.2.0',
49
  'imbalanced-learn==0.12.0',
50
  'imblearn==0.0',
 
4
  with open("README.md", "r") as fh:
5
  long_description = fh.read()
6
 
7
+ version_string = os.environ.get("VERSION_PLACEHOLDER", "1.0.1")
8
  print(version_string)
9
  version = version_string
10
 
 
22
  include_package_data=True,
23
  install_requires=[
24
  'ConfigSpace==0.7.1',
 
25
  'imblearn==0.0',
26
  'Levenshtein==0.23.0',
27
  'matplotlib==3.8.4',
 
43
  'deprecation==2.1.0',
44
  'distributed==2024.2.1',
45
  'emcee==3.1.4',
46
+ 'feeed>=1.3.2',
47
  'fsspec==2024.2.0',
48
  'imbalanced-learn==0.12.0',
49
  'imblearn==0.0',
utils/config_fabric.py CHANGED
@@ -174,7 +174,7 @@ def set_generator_experiments(generator_params):
174
  #TODO: This code is duplicated. Should be moved and removed.
175
  def column_mappings():
176
  column_names_short = {
177
- 'rutpt': 'ratio_unique_traces_per_trace',
178
  'rmcv': 'ratio_most_common_variant',
179
  'tlcv': 'trace_len_coefficient_variation',
180
  'mvo': 'mean_variant_occurrence',
@@ -285,9 +285,6 @@ def set_generator_experiments(generator_params):
285
  f.write(uploaded_file.getbuffer())
286
 
287
  sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
288
- if 'ratio_variants_per_number_of_traces' in sel_features: #Hotfix
289
- sel_features[sel_features.index('ratio_variants_per_number_of_traces')] = 'ratio_unique_traces_per_trace'
290
-
291
  xes_features = extract_features(f"{uploaded_file.name}", sel_features)
292
  del xes_features['log']
293
  # removing the temporary file
@@ -296,7 +293,7 @@ def set_generator_experiments(generator_params):
296
  os.remove(f"{uploaded_file.name}")
297
  xes_features = {key: float(value) for key, value in xes_features.items()}
298
  experiments = [xes_features]
299
-
300
  if uploaded_file.name.endswith('.csv'):
301
  df, sel_features = handle_csv_file(uploaded_file,grid_option)
302
  if df is not None and sel_features is not None:
 
174
  #TODO: This code is duplicated. Should be moved and removed.
175
  def column_mappings():
176
  column_names_short = {
177
+ 'rvpnot': 'ratio_variants_per_number_of_traces',
178
  'rmcv': 'ratio_most_common_variant',
179
  'tlcv': 'trace_len_coefficient_variation',
180
  'mvo': 'mean_variant_occurrence',
 
285
  f.write(uploaded_file.getbuffer())
286
 
287
  sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
 
 
 
288
  xes_features = extract_features(f"{uploaded_file.name}", sel_features)
289
  del xes_features['log']
290
  # removing the temporary file
 
293
  os.remove(f"{uploaded_file.name}")
294
  xes_features = {key: float(value) for key, value in xes_features.items()}
295
  experiments = [xes_features]
296
+
297
  if uploaded_file.name.endswith('.csv'):
298
  df, sel_features = handle_csv_file(uploaded_file,grid_option)
299
  if df is not None and sel_features is not None: