Spaces:
Running
Running
Merge branch 'bpm24' into 16-documentation-update-readme
Browse files* bpm24: (31 commits)
Adds grid_2 experiments
Adds grid_2 experiments
Passes arg to grid experiments script
Fixes paths
Hot fix for column name change
wip experiment with real targets
Converts encoding
Updates test data
Adds grid experiments script test
no need legacy config file
Adds convert to same encoding
Updates validation data
Sorts for replicability
CI augmentation pipeline step
Updates data
CI for benchmarking
Sorts file list
splits compare output
CI pipeline step generation
Adds validation data
...
This view is limited to 50 files because it contains too many changes. Β
See raw diff
- .github/workflows/test_gedi.yml +49 -9
- config_files/algorithm/augmentation.json +0 -12
- config_files/algorithm/experiment_real_targets.json +41 -0
- config_files/algorithm/experiment_test.json +1 -1
- config_files/algorithm/fix_24.json +0 -34
- config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enseef.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enself.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enve.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rmcv.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rt10v.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rvpnot.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enself.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enve.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rmcv.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rt10v.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rvpnot.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_enve.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rmcv.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rt10v.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rvpnot.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rmcv.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rt10v.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rvpnot.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rt10v.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rvpnot.json +1 -0
- config_files/algorithm/grid_2obj/generator_grid_2objectives_rt10v_rvpnot.json +1 -0
- config_files/algorithm/pipeline_steps/augmentation.json +12 -0
- config_files/algorithm/{benchmark.json β pipeline_steps/benchmark.json} +1 -1
- config_files/algorithm/{evaluation_plotter.json β pipeline_steps/evaluation_plotter.json} +2 -2
- config_files/algorithm/{feature_extraction.json β pipeline_steps/feature_extraction.json} +1 -1
- config_files/algorithm/{generation.json β pipeline_steps/generation.json} +0 -0
- dashboard.py +0 -295
- data/2_bpic_features.csv +3 -0
- data/GenED_bench.csv +0 -0
- data/GenED_feat.csv +0 -0
- data/bpic_features.csv +1 -1
- data/grid_1obj/{grid_1objectives_rutpt.csv β grid_1objectives_rvpnot.csv} +1 -1
- data/grid_2obj/{grid_2objectives_enve_rutpt.csv β grid_2objectives_ense_rvpnot.csv} +1 -1
- data/grid_2obj/{grid_2objectives_enseef_rutpt.csv β grid_2objectives_enseef_rvpnot.csv} +1 -1
- data/grid_2obj/{grid_2objectives_enself_rutpt.csv β grid_2objectives_enself_rvpnot.csv} +1 -1
- data/grid_2obj/{grid_2objectives_rt10v_rutpt.csv β grid_2objectives_enve_rvpnot.csv} +1 -1
- data/grid_2obj/{grid_2objectives_ense_rutpt.csv β grid_2objectives_rmcv_rvpnot.csv} +1 -1
- data/grid_2obj/{grid_2objectives_rmcv_rutpt.csv β grid_2objectives_rt10v_rvpnot.csv} +1 -1
- data/validation/2_ense_enseef_feat.csv +3 -0
- data/validation/genELexperiment1_04_02.json +1 -0
- data/validation/genELexperiment2_07_04.json +1 -0
- data/validation/test_benchmark.csv +3 -0
- data/validation/test_feat.csv +3 -0
- execute_grid_experiments.py +9 -5
- gedi/benchmark.py +4 -2
.github/workflows/test_gedi.yml
CHANGED
@@ -29,10 +29,10 @@ jobs:
|
|
29 |
|
30 |
- name: Run test
|
31 |
run:
|
32 |
-
python main.py -o config_files/options/baseline.json -a config_files/algorithm/feature_extraction.json
|
33 |
|
34 |
- name: Compare output
|
35 |
-
run: diff data/test_feat.csv data/test_feat.csv
|
36 |
|
37 |
test_generation:
|
38 |
runs-on: ubuntu-latest
|
@@ -58,10 +58,15 @@ jobs:
|
|
58 |
|
59 |
- name: Run test
|
60 |
run:
|
61 |
-
python main.py -o config_files/options/baseline.json -a config_files/algorithm/generation.json
|
62 |
|
63 |
-
- name: Compare output
|
64 |
-
run:
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
test_benchmark:
|
67 |
runs-on: ubuntu-latest
|
@@ -83,10 +88,13 @@ jobs:
|
|
83 |
|
84 |
- name: Run test
|
85 |
run:
|
86 |
-
python main.py -o config_files/options/baseline.json -a config_files/algorithm/benchmark.json
|
|
|
|
|
|
|
87 |
|
88 |
- name: Compare output
|
89 |
-
run: diff
|
90 |
|
91 |
test_augmentation:
|
92 |
runs-on: ubuntu-latest
|
@@ -108,7 +116,7 @@ jobs:
|
|
108 |
|
109 |
- name: Run test
|
110 |
run:
|
111 |
-
python main.py -o config_files/options/baseline.json -a config_files/algorithm/augmentation.json
|
112 |
|
113 |
test_evaluation-plotter:
|
114 |
runs-on: ubuntu-latest
|
@@ -134,7 +142,7 @@ jobs:
|
|
134 |
|
135 |
- name: Run test
|
136 |
run:
|
137 |
-
python main.py -o config_files/options/baseline.json -a config_files/algorithm/evaluation_plotter.json
|
138 |
|
139 |
test_integration:
|
140 |
runs-on: ubuntu-latest
|
@@ -161,3 +169,35 @@ jobs:
|
|
161 |
- name: Run test
|
162 |
run:
|
163 |
python main.py -o config_files/options/baseline.json -a config_files/algorithm/experiment_test.json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
- name: Run test
|
31 |
run:
|
32 |
+
python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/feature_extraction.json
|
33 |
|
34 |
- name: Compare output
|
35 |
+
run: diff data/validation/test_feat.csv data/test_feat.csv
|
36 |
|
37 |
test_generation:
|
38 |
runs-on: ubuntu-latest
|
|
|
58 |
|
59 |
- name: Run test
|
60 |
run:
|
61 |
+
python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/generation.json
|
62 |
|
63 |
+
- name: Compare output 1
|
64 |
+
run:
|
65 |
+
diff data/validation/genELexperiment2_07_04.json output/features/grid_feat/2_enself_rt20v/genELexperiment2_07_04.json
|
66 |
+
|
67 |
+
- name: Compare output 2
|
68 |
+
run:
|
69 |
+
diff data/validation/genELexperiment1_04_02.json output/features/grid_feat/2_enself_rt20v/genELexperiment1_04_02.json
|
70 |
|
71 |
test_benchmark:
|
72 |
runs-on: ubuntu-latest
|
|
|
88 |
|
89 |
- name: Run test
|
90 |
run:
|
91 |
+
python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/benchmark.json
|
92 |
+
|
93 |
+
- name: Convert output and validation to same encoding
|
94 |
+
run: iconv -f UTF-8 -t ASCII output/benchmark/test_benchmark.csv > data/validation/test_benchmark.csv
|
95 |
|
96 |
- name: Compare output
|
97 |
+
run: diff data/validation/test_benchmark.csv output/benchmark/test_benchmark.csv
|
98 |
|
99 |
test_augmentation:
|
100 |
runs-on: ubuntu-latest
|
|
|
116 |
|
117 |
- name: Run test
|
118 |
run:
|
119 |
+
python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/augmentation.json
|
120 |
|
121 |
test_evaluation-plotter:
|
122 |
runs-on: ubuntu-latest
|
|
|
142 |
|
143 |
- name: Run test
|
144 |
run:
|
145 |
+
python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/evaluation_plotter.json
|
146 |
|
147 |
test_integration:
|
148 |
runs-on: ubuntu-latest
|
|
|
169 |
- name: Run test
|
170 |
run:
|
171 |
python main.py -o config_files/options/baseline.json -a config_files/algorithm/experiment_test.json
|
172 |
+
|
173 |
+
test_grid_experiments_script:
|
174 |
+
runs-on: ubuntu-latest
|
175 |
+
|
176 |
+
# Setting up a python envronment for the test script to run
|
177 |
+
steps:
|
178 |
+
- name: Checkout code
|
179 |
+
uses: actions/checkout@v4
|
180 |
+
|
181 |
+
- name: Set up Python
|
182 |
+
uses: actions/setup-python@v5
|
183 |
+
with:
|
184 |
+
python-version: 3.9
|
185 |
+
|
186 |
+
- name: Install dependencies
|
187 |
+
run: |
|
188 |
+
sudo apt-get install build-essential python3 python3-dev
|
189 |
+
|
190 |
+
- name: Install feeed
|
191 |
+
run: |
|
192 |
+
python -m pip install --upgrade pip
|
193 |
+
pip install .
|
194 |
+
|
195 |
+
- name: Run test
|
196 |
+
run:
|
197 |
+
python execute_grid_experiments.py config_files/algorithm/test
|
198 |
+
|
199 |
+
- name: Convert output and validation to same encoding
|
200 |
+
run: iconv -f UTF-8 -t ASCII output/features/generated/2_bpic_features/2_ense_enseef_feat.csv > data/validation/2_ense_enseef_feat.csv
|
201 |
+
|
202 |
+
- name: Compare output
|
203 |
+
run: diff data/validation/2_ense_enseef_feat.csv output/features/generated/2_bpic_features/2_ense_enseef_feat.csv
|
config_files/algorithm/augmentation.json
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
[
|
2 |
-
{
|
3 |
-
"pipeline_step": "instance_augmentation",
|
4 |
-
"augmentation_params":
|
5 |
-
{
|
6 |
-
"method":"SMOTE", "no_samples":20,
|
7 |
-
"feature_selection": ["n_traces", "n_unique_traces", "ratio_unique_traces_per_trace", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "entropy_trace", "entropy_prefix", "entropy_global_block", "entropy_lempel_ziv", "entropy_k_block_diff_1", "entropy_k_block_diff_3", "entropy_k_block_diff_5", "entropy_k_block_ratio_1", "entropy_k_block_ratio_3", "entropy_k_block_ratio_5", "entropy_knn_3", "entropy_knn_5", "entropy_knn_7", "epa_variant_entropy", "epa_normalized_variant_entropy", "epa_sequence_entropy", "epa_normalized_sequence_entropy", "epa_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_linear_forgetting", "epa_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]
|
8 |
-
},
|
9 |
-
"input_path": "data/bpic_features.csv",
|
10 |
-
"output_path": "output"
|
11 |
-
}
|
12 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config_files/algorithm/experiment_real_targets.json
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"pipeline_step": "event_logs_generation",
|
4 |
+
"output_path": "output",
|
5 |
+
"generator_params": {
|
6 |
+
"experiment": {
|
7 |
+
"input_path": "data/BaselineED_feat.csv",
|
8 |
+
"objectives":["ratio_variants_per_number_of_traces","ratio_most_common_variant","ratio_top_10_variants","epa_normalized_variant_entropy","epa_normalized_sequence_entropy","epa_normalized_sequence_entropy_linear_forgetting","epa_normalized_sequence_entropy_exponential_forgetting"]},
|
9 |
+
"config_space": {
|
10 |
+
"mode": [5, 20],
|
11 |
+
"sequence": [0.01, 1],
|
12 |
+
"choice": [0.01, 1],
|
13 |
+
"parallel": [0.01, 1],
|
14 |
+
"loop": [0.01, 1],
|
15 |
+
"silent": [0.01, 1],
|
16 |
+
"lt_dependency": [0.01, 1],
|
17 |
+
"num_traces": [10, 10001],
|
18 |
+
"duplicate": [0],
|
19 |
+
"or": [0]
|
20 |
+
},
|
21 |
+
"n_trials": 200,
|
22 |
+
"plot_reference_feature": ""
|
23 |
+
}
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"pipeline_step": "feature_extraction",
|
27 |
+
"input_path": "output/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rutpt/",
|
28 |
+
"input_path": "output/features/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rutpt/",
|
29 |
+
"feature_params": {"feature_set":["ratio_variants_per_number_of_traces","ratio_most_common_variant","ratio_top_10_variants","epa_normalized_variant_entropy","epa_normalized_sequence_entropy","epa_normalized_sequence_entropy_linear_forgetting","epa_normalized_sequence_entropy_exponential_forgetting"]},
|
30 |
+
"output_path": "output/plots",
|
31 |
+
"real_eventlog_path": "data/BaselineED_feat.csv",
|
32 |
+
"plot_type": "boxplot"
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"pipeline_step": "benchmark_test",
|
36 |
+
"benchmark_test": "discovery",
|
37 |
+
"input_path": "output/BaselineED_feat/7_ense_enseef_enself_enve_rmcv_rt10v_rutpt/",
|
38 |
+
"output_path":"output",
|
39 |
+
"miners" : ["heu", "imf", "ilp"]
|
40 |
+
}
|
41 |
+
]
|
config_files/algorithm/experiment_test.json
CHANGED
@@ -47,6 +47,6 @@
|
|
47 |
"benchmark_test": "discovery",
|
48 |
"input_path":"data/test",
|
49 |
"output_path":"output",
|
50 |
-
"miners" : ["inductive", "
|
51 |
}
|
52 |
]
|
|
|
47 |
"benchmark_test": "discovery",
|
48 |
"input_path":"data/test",
|
49 |
"output_path":"output",
|
50 |
+
"miners" : ["inductive", "heu", "imf", "ilp"]
|
51 |
}
|
52 |
]
|
config_files/algorithm/fix_24.json
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
[
|
2 |
-
{
|
3 |
-
"pipeline_step": "event_logs_generation",
|
4 |
-
"output_path":"data/generated",
|
5 |
-
"generator_params": {
|
6 |
-
"objectives": {
|
7 |
-
"normalized_sequence_entropy_linear_forgetting": 0.05,
|
8 |
-
"ratio_top_20_variants": 0.4
|
9 |
-
},
|
10 |
-
"config_space": {
|
11 |
-
"mode": [5, 40],
|
12 |
-
"sequence": [0.01, 1],
|
13 |
-
"choice": [0.01, 1],
|
14 |
-
"parallel": [0.01, 1],
|
15 |
-
"loop": [0.01, 1],
|
16 |
-
"silent": [0.01, 1],
|
17 |
-
"lt_dependency": [0.01, 1],
|
18 |
-
"num_traces": [100, 1001],
|
19 |
-
"duplicate": [0],
|
20 |
-
"or": [0]
|
21 |
-
},
|
22 |
-
"n_trials": 20
|
23 |
-
}
|
24 |
-
},
|
25 |
-
{
|
26 |
-
"pipeline_step": "feature_extraction",
|
27 |
-
"input_path": "data/generated",
|
28 |
-
"feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "entropies", "complexity"]},
|
29 |
-
"feature_params": {"feature_set":["trace_length"]},
|
30 |
-
"output_path": "output/plots",
|
31 |
-
"real_eventlog_path": "data/log_meta_features.csv",
|
32 |
-
"plot_type": "boxplot"
|
33 |
-
}
|
34 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enseef.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_ense_enseef.csv", "objectives": ["epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_ense_enseef/2_ense_enseef", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_ense_enseef/2_ense_enseef", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enself.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_ense_enself.csv", "objectives": ["epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_ense_enself/2_ense_enself", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_ense_enself/2_ense_enself", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enve.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_ense_enve.csv", "objectives": ["epa_normalized_sequence_entropy", "epa_normalized_variant_entropy"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_ense_enve/2_ense_enve", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_ense_enve/2_ense_enve", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rmcv.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_ense_rmcv.csv", "objectives": ["epa_normalized_sequence_entropy", "ratio_most_common_variant"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_ense_rmcv/2_ense_rmcv", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_ense_rmcv/2_ense_rmcv", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rt10v.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_ense_rt10v.csv", "objectives": ["epa_normalized_sequence_entropy", "ratio_top_10_variants"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_ense_rt10v/2_ense_rt10v", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_ense_rt10v/2_ense_rt10v", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rvpnot.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_ense_rvpnot.csv", "objectives": ["epa_normalized_sequence_entropy", "ratio_variants_per_number_of_traces"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_ense_rvpnot/2_ense_rvpnot", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_ense_rvpnot/2_ense_rvpnot", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enself.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enseef_enself.csv", "objectives": ["epa_normalized_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_linear_forgetting"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enseef_enself/2_enseef_enself", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enseef_enself/2_enseef_enself", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enve.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enseef_enve.csv", "objectives": ["epa_normalized_sequence_entropy_exponential_forgetting", "epa_normalized_variant_entropy"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enseef_enve/2_enseef_enve", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enseef_enve/2_enseef_enve", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rmcv.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enseef_rmcv.csv", "objectives": ["epa_normalized_sequence_entropy_exponential_forgetting", "ratio_most_common_variant"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enseef_rmcv/2_enseef_rmcv", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enseef_rmcv/2_enseef_rmcv", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rt10v.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enseef_rt10v.csv", "objectives": ["epa_normalized_sequence_entropy_exponential_forgetting", "ratio_top_10_variants"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enseef_rt10v/2_enseef_rt10v", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enseef_rt10v/2_enseef_rt10v", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rvpnot.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enseef_rvpnot.csv", "objectives": ["epa_normalized_sequence_entropy_exponential_forgetting", "ratio_variants_per_number_of_traces"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enseef_rvpnot/2_enseef_rvpnot", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enseef_rvpnot/2_enseef_rvpnot", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_enve.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enself_enve.csv", "objectives": ["epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_variant_entropy"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enself_enve/2_enself_enve", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enself_enve/2_enself_enve", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rmcv.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enself_rmcv.csv", "objectives": ["epa_normalized_sequence_entropy_linear_forgetting", "ratio_most_common_variant"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enself_rmcv/2_enself_rmcv", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enself_rmcv/2_enself_rmcv", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rt10v.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enself_rt10v.csv", "objectives": ["epa_normalized_sequence_entropy_linear_forgetting", "ratio_top_10_variants"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enself_rt10v/2_enself_rt10v", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enself_rt10v/2_enself_rt10v", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rvpnot.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enself_rvpnot.csv", "objectives": ["epa_normalized_sequence_entropy_linear_forgetting", "ratio_variants_per_number_of_traces"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enself_rvpnot/2_enself_rvpnot", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enself_rvpnot/2_enself_rvpnot", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rmcv.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enve_rmcv.csv", "objectives": ["epa_normalized_variant_entropy", "ratio_most_common_variant"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enve_rmcv/2_enve_rmcv", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enve_rmcv/2_enve_rmcv", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rt10v.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enve_rt10v.csv", "objectives": ["epa_normalized_variant_entropy", "ratio_top_10_variants"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enve_rt10v/2_enve_rt10v", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enve_rt10v/2_enve_rt10v", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rvpnot.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enve_rvpnot.csv", "objectives": ["epa_normalized_variant_entropy", "ratio_variants_per_number_of_traces"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_enve_rvpnot/2_enve_rvpnot", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_enve_rvpnot/2_enve_rvpnot", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rt10v.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_rmcv_rt10v.csv", "objectives": ["ratio_most_common_variant", "ratio_top_10_variants"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_rmcv_rt10v/2_rmcv_rt10v", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_rmcv_rt10v/2_rmcv_rt10v", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rvpnot.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_rmcv_rvpnot.csv", "objectives": ["ratio_most_common_variant", "ratio_variants_per_number_of_traces"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_rmcv_rvpnot/2_rmcv_rvpnot", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_rmcv_rvpnot/2_rmcv_rvpnot", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/grid_2obj/generator_grid_2objectives_rt10v_rvpnot.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/generated/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_rt10v_rvpnot.csv", "objectives": ["ratio_top_10_variants", "ratio_variants_per_number_of_traces"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/generated/grid_2obj/grid_2objectives_rt10v_rvpnot/2_rt10v_rvpnot", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/generated/grid_2obj/grid_2objectives_rt10v_rvpnot/2_rt10v_rvpnot", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/algorithm/pipeline_steps/augmentation.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"pipeline_step": "instance_augmentation",
|
4 |
+
"augmentation_params":
|
5 |
+
{
|
6 |
+
"method":"SMOTE", "no_samples":20,
|
7 |
+
"feature_selection": ["n_traces", "n_unique_traces", "ratio_variants_per_number_of_traces", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "entropy_trace", "entropy_prefix", "entropy_global_block", "entropy_lempel_ziv", "entropy_k_block_diff_1", "entropy_k_block_diff_3", "entropy_k_block_diff_5", "entropy_k_block_ratio_1", "entropy_k_block_ratio_3", "entropy_k_block_ratio_5", "entropy_knn_3", "entropy_knn_5", "entropy_knn_7", "epa_variant_entropy", "epa_normalized_variant_entropy", "epa_sequence_entropy", "epa_normalized_sequence_entropy", "epa_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_linear_forgetting", "epa_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]
|
8 |
+
},
|
9 |
+
"input_path": "data/bpic_features.csv",
|
10 |
+
"output_path": "output"
|
11 |
+
}
|
12 |
+
]
|
config_files/algorithm/{benchmark.json β pipeline_steps/benchmark.json}
RENAMED
@@ -4,6 +4,6 @@
|
|
4 |
"benchmark_test": "discovery",
|
5 |
"input_path":"data/test",
|
6 |
"output_path":"output",
|
7 |
-
"miners" : ["inductive", "
|
8 |
}
|
9 |
]
|
|
|
4 |
"benchmark_test": "discovery",
|
5 |
"input_path":"data/test",
|
6 |
"output_path":"output",
|
7 |
+
"miners" : ["inductive", "heu", "imf", "ilp"]
|
8 |
}
|
9 |
]
|
config_files/algorithm/{evaluation_plotter.json β pipeline_steps/evaluation_plotter.json}
RENAMED
@@ -1,7 +1,7 @@
|
|
1 |
[
|
2 |
{
|
3 |
"pipeline_step": "evaluation_plotter",
|
4 |
-
"input_path": "output/features/generated/
|
5 |
"input_path": "output/features/generated/grid_2obj/",
|
6 |
"input_path": ["output/features/generated/grid_1obj/", "output/features/generated/grid_2obj/"],
|
7 |
"input_path": "output/features/generated/grid_1obj/1_enve_feat.csv",
|
@@ -9,7 +9,7 @@
|
|
9 |
"reference_feature": "epa_normalized_sequence_entropy",
|
10 |
"reference_feature": "epa_normalized_sequence_entropy_exponential_forgetting",
|
11 |
"reference_feature": "epa_normalized_variant_entropy",
|
12 |
-
"targets": "data/
|
13 |
"targets": "data/grid_experiments/grid_2obj/",
|
14 |
"targets": ["data/grid_experiments/grid_1obj/", "data/grid_experiments/grid_2obj/"],
|
15 |
"targets": "data/grid_experiments/grid_1obj/grid_1objectives_enve.csv",
|
|
|
1 |
[
|
2 |
{
|
3 |
"pipeline_step": "evaluation_plotter",
|
4 |
+
"input_path": "output/features/generated/BaselineED_feat/",
|
5 |
"input_path": "output/features/generated/grid_2obj/",
|
6 |
"input_path": ["output/features/generated/grid_1obj/", "output/features/generated/grid_2obj/"],
|
7 |
"input_path": "output/features/generated/grid_1obj/1_enve_feat.csv",
|
|
|
9 |
"reference_feature": "epa_normalized_sequence_entropy",
|
10 |
"reference_feature": "epa_normalized_sequence_entropy_exponential_forgetting",
|
11 |
"reference_feature": "epa_normalized_variant_entropy",
|
12 |
+
"targets": "data/BaselineED_feat.csv",
|
13 |
"targets": "data/grid_experiments/grid_2obj/",
|
14 |
"targets": ["data/grid_experiments/grid_1obj/", "data/grid_experiments/grid_2obj/"],
|
15 |
"targets": "data/grid_experiments/grid_1obj/grid_1objectives_enve.csv",
|
config_files/algorithm/{feature_extraction.json β pipeline_steps/feature_extraction.json}
RENAMED
@@ -2,7 +2,7 @@
|
|
2 |
{
|
3 |
"pipeline_step": "feature_extraction",
|
4 |
"input_path": "data/test",
|
5 |
-
"feature_params": {"feature_set":["
|
6 |
"output_path": "output/plots",
|
7 |
"real_eventlog_path": "data/BaselineED_feat.csv",
|
8 |
"plot_type": "boxplot",
|
|
|
2 |
{
|
3 |
"pipeline_step": "feature_extraction",
|
4 |
"input_path": "data/test",
|
5 |
+
"feature_params": {"feature_set":["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]},
|
6 |
"output_path": "output/plots",
|
7 |
"real_eventlog_path": "data/BaselineED_feat.csv",
|
8 |
"plot_type": "boxplot",
|
config_files/algorithm/{generation.json β pipeline_steps/generation.json}
RENAMED
File without changes
|
dashboard.py
DELETED
@@ -1,295 +0,0 @@
|
|
1 |
-
from copy import deepcopy
|
2 |
-
from meta_feature_extraction.simple_stats import simple_stats
|
3 |
-
from meta_feature_extraction.trace_length import trace_length
|
4 |
-
from meta_feature_extraction.trace_variant import trace_variant
|
5 |
-
from meta_feature_extraction.activities import activities
|
6 |
-
from meta_feature_extraction.start_activities import start_activities
|
7 |
-
from meta_feature_extraction.end_activities import end_activities
|
8 |
-
from meta_feature_extraction.entropies import entropies
|
9 |
-
from pm4py import discover_petri_net_inductive as inductive_miner
|
10 |
-
from pm4py import generate_process_tree
|
11 |
-
from pm4py import save_vis_petri_net, save_vis_process_tree
|
12 |
-
from pm4py.algo.filtering.log.variants import variants_filter
|
13 |
-
from pm4py.algo.simulation.tree_generator import algorithm as tree_generator
|
14 |
-
from pm4py.algo.simulation.playout.process_tree import algorithm as playout
|
15 |
-
from pm4py.objects.conversion.log import converter as log_converter
|
16 |
-
from pm4py.objects.log.exporter.xes import exporter as xes_exporter
|
17 |
-
from pm4py.objects.log.importer.xes import importer as xes_importer
|
18 |
-
from pm4py.objects.log.util import dataframe_utils
|
19 |
-
from pm4py.sim import play_out
|
20 |
-
|
21 |
-
import matplotlib.image as mpimg
|
22 |
-
import os
|
23 |
-
import pandas as pd
|
24 |
-
import streamlit as st
|
25 |
-
|
26 |
-
OUTPUT_PATH = "output"
|
27 |
-
SAMPLE_EVENTS = 500
|
28 |
-
|
29 |
-
@st.cache(allow_output_mutation=True)
|
30 |
-
def load_from_xes(uploaded_file):
|
31 |
-
bytes_data = uploaded_file.getvalue()
|
32 |
-
log1 = xes_importer.deserialize(bytes_data)
|
33 |
-
get_stats(log1)
|
34 |
-
return log1
|
35 |
-
|
36 |
-
@st.cache
|
37 |
-
def load_from_csv(uploaded_file, sep):
|
38 |
-
if uploaded_file is not None:
|
39 |
-
df = pd.read_csv(uploaded_file, sep=sep, index_col=False)
|
40 |
-
return df
|
41 |
-
|
42 |
-
def get_stats(log, save=True):
|
43 |
-
"""Returns the statistics of an event log."""
|
44 |
-
num_traces = len(log)
|
45 |
-
num_events = sum([len(c) for c in log])
|
46 |
-
num_utraces = len(variants_filter.get_variants(log))
|
47 |
-
if save:
|
48 |
-
st.session_state["num_traces"] = num_traces
|
49 |
-
st.session_state["num_events"] = num_events
|
50 |
-
st.session_state["num_utraces"] = num_utraces
|
51 |
-
return num_utraces, num_traces, num_events
|
52 |
-
|
53 |
-
#@st.cache
|
54 |
-
def df_to_log(df, case_id, activity, timestamp):
|
55 |
-
df.rename(columns={case_id: 'case:concept:name',
|
56 |
-
activity: 'concept:name',
|
57 |
-
timestamp: "time:timestamp"}, inplace=True)
|
58 |
-
temp = dataframe_utils.convert_timestamp_columns_in_df(df)
|
59 |
-
#temp = temp.sort_values(timestamp)
|
60 |
-
log = log_converter.apply(temp)
|
61 |
-
return log, 'concept:name', "time:timestamp"
|
62 |
-
|
63 |
-
def read_uploaded_file(uploaded_file):
|
64 |
-
extension = uploaded_file.name.split('.')[-1]
|
65 |
-
log_name = uploaded_file.name.split('.')[-2]
|
66 |
-
|
67 |
-
st.sidebar.write("Loaded ", extension.upper(), '-File: ', uploaded_file.name)
|
68 |
-
if extension == "xes":
|
69 |
-
event_log = load_from_xes(uploaded_file)
|
70 |
-
log_columns = [*list(event_log[0][0].keys())]
|
71 |
-
convert_button = False
|
72 |
-
case_id = "case:concept:name"
|
73 |
-
activity = "concept:name"
|
74 |
-
timestamp = "time:timestamp"
|
75 |
-
default_act_id = log_columns.index("concept:name")
|
76 |
-
default_tst_id = log_columns.index("time:timestamp")
|
77 |
-
|
78 |
-
event_df = log_converter.apply(event_log, variant=log_converter.Variants.TO_DATA_FRAME)
|
79 |
-
df_path = OUTPUT_PATH+"/"+log_name+".csv"
|
80 |
-
event_df.to_csv(df_path, sep =";", index=False)
|
81 |
-
return event_log, event_df, case_id, activity
|
82 |
-
|
83 |
-
elif extension == "csv":
|
84 |
-
sep = st.sidebar.text_input("Columns separator", ";")
|
85 |
-
event_df = load_from_csv(uploaded_file, sep)
|
86 |
-
old_df = deepcopy(event_df)
|
87 |
-
log_columns = event_df.columns
|
88 |
-
|
89 |
-
case_id = st.sidebar.selectbox("Choose 'case' column:", log_columns)
|
90 |
-
activity = st.sidebar.selectbox("Choose 'activity' column:", log_columns, index=0)
|
91 |
-
timestamp = st.sidebar.selectbox("Choose 'timestamp' column:", log_columns, index=0)
|
92 |
-
|
93 |
-
convert_button = st.sidebar.button('Confirm selection')
|
94 |
-
if convert_button:
|
95 |
-
temp = deepcopy(event_df)
|
96 |
-
event_log, activity, timestamp = df_to_log(temp, case_id, activity, timestamp)
|
97 |
-
#xes_exporter.apply(event_log, INPUT_XES)
|
98 |
-
log_columns = [*list(event_log[0][0].keys())]
|
99 |
-
st.session_state['log'] = event_log
|
100 |
-
return event_log, event_df, case_id, activity
|
101 |
-
|
102 |
-
def sample_log_traces(complete_log, sample_size):
|
103 |
-
'''
|
104 |
-
Samples random traces out of logs.
|
105 |
-
So that number of events is slightly over SAMPLE_SIZE.
|
106 |
-
:param complete_log: Log extracted from xes
|
107 |
-
'''
|
108 |
-
|
109 |
-
log_traces = variants_filter.get_variants(complete_log)
|
110 |
-
keys = list(log_traces.keys())
|
111 |
-
sample_traces = {}
|
112 |
-
num_evs = 0
|
113 |
-
while num_evs < sample_size:
|
114 |
-
if len(keys) == 0:
|
115 |
-
break
|
116 |
-
random_trace = keys.pop()
|
117 |
-
sample_traces[random_trace] = log_traces[random_trace]
|
118 |
-
evs = sum([len(case_id) for case_id in sample_traces[random_trace]])
|
119 |
-
num_evs += evs
|
120 |
-
log1 = variants_filter.apply(complete_log, sample_traces)
|
121 |
-
return log1
|
122 |
-
|
123 |
-
def show_process_petrinet(event_log, filter_info, OUTPUT_PATH):
|
124 |
-
OUTPUT_PLOT = f"{OUTPUT_PATH}_{filter_info}".replace(":","").replace(".","")+".png" # OUTPUT_PATH is OUTPUT_PATH+INPUT_FILE
|
125 |
-
|
126 |
-
try:
|
127 |
-
fig_pt = mpimg.imread(OUTPUT_PLOT)
|
128 |
-
st.write("Loaded from memory")
|
129 |
-
except FileNotFoundError:
|
130 |
-
net, im, fm = inductive_miner(event_log)
|
131 |
-
# parameters={heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99,
|
132 |
-
# pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"})
|
133 |
-
#parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"}
|
134 |
-
save_vis_petri_net(net, im, fm, OUTPUT_PLOT)
|
135 |
-
st.write("Saved in: ", OUTPUT_PLOT)
|
136 |
-
fig_pt = mpimg.imread(OUTPUT_PLOT)
|
137 |
-
st.image(fig_pt)
|
138 |
-
|
139 |
-
def show_loaded_event_log(event_log, event_df):
|
140 |
-
get_stats(event_log)
|
141 |
-
st.write("### Loaded event-log")
|
142 |
-
col1, col2 = st.columns(2)
|
143 |
-
with col2:
|
144 |
-
st.dataframe(event_df)
|
145 |
-
with col1:
|
146 |
-
show_process_petrinet(event_log, None, OUTPUT_PATH+"running-example")
|
147 |
-
|
148 |
-
def extract_meta_features(log, log_name):
|
149 |
-
mtf_cols = ["log", "n_traces", "n_unique_traces", "ratio_unique_traces_per_trace", "n_events", "trace_len_min", "trace_len_max",
|
150 |
-
"trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1",
|
151 |
-
"trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean",
|
152 |
-
"trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1",
|
153 |
-
"trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7",
|
154 |
-
"trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist",
|
155 |
-
"ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants",
|
156 |
-
"ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence",
|
157 |
-
"kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median",
|
158 |
-
"activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness",
|
159 |
-
"activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean",
|
160 |
-
"start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3",
|
161 |
-
"start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min",
|
162 |
-
"end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance",
|
163 |
-
"end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "entropy_trace",
|
164 |
-
"entropy_prefix", "entropy_global_block", "entropy_lempel_ziv", "entropy_k_block_diff_1", "entropy_k_block_diff_3",
|
165 |
-
"entropy_k_block_diff_5", "entropy_k_block_ratio_1", "entropy_k_block_ratio_3", "entropy_k_block_ratio_5", "entropy_knn_3",
|
166 |
-
"entropy_knn_5", "entropy_knn_7"]
|
167 |
-
features = [log_name]
|
168 |
-
features.extend(simple_stats(log))
|
169 |
-
features.extend(trace_length(log))
|
170 |
-
features.extend(trace_variant(log))
|
171 |
-
features.extend(activities(log))
|
172 |
-
features.extend(start_activities(log))
|
173 |
-
features.extend(end_activities(log))
|
174 |
-
features.extend(entropies(log_name, OUTPUT_PATH))
|
175 |
-
|
176 |
-
mtf = pd.DataFrame([features], columns=mtf_cols)
|
177 |
-
|
178 |
-
st.dataframe(mtf)
|
179 |
-
return mtf
|
180 |
-
|
181 |
-
def generate_pt(mtf):
|
182 |
-
OUTPUT_PLOT = f"{OUTPUT_PATH}/generated_pt".replace(":","").replace(".","")#+".png" # OUTPUT_PATH is OUTPUT_PATH+INPUT_FILE
|
183 |
-
|
184 |
-
st.write("### PT Gen configurations")
|
185 |
-
col1, col2, col3, col4, col5, col6 = st.columns(6)
|
186 |
-
with col1:
|
187 |
-
param_mode = st.text_input('Mode', str(round(mtf['activities_median'].iat[0]))) #?
|
188 |
-
st.write("Sum of probabilities must be one")
|
189 |
-
with col2:
|
190 |
-
param_min = st.text_input('Min', str(mtf['activities_min'].iat[0]))
|
191 |
-
param_seq = st.text_input('Probability Sequence', 0.25)
|
192 |
-
with col3:
|
193 |
-
param_max = st.text_input('Max', str(mtf['activities_max'].iat[0]))
|
194 |
-
param_cho = st.text_input('Probability Choice (XOR)', 0.25)
|
195 |
-
with col4:
|
196 |
-
param_nmo = st.text_input('Number of models', 1)
|
197 |
-
param_par = st.text_input('Probability Parallel', 0.25)
|
198 |
-
with col5:
|
199 |
-
param_dup = st.text_input('Duplicates', 0)
|
200 |
-
param_lop = st.text_input('Probability Loop', 0.25)
|
201 |
-
with col6:
|
202 |
-
param_sil = st.text_input('Silent', 0.2)
|
203 |
-
param_or = st.text_input('Probability Or', 0.0)
|
204 |
-
|
205 |
-
PT_PARAMS = {tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MODE: round(float(param_mode)), #most frequent number of visible activities
|
206 |
-
tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MIN: int(param_min), #minimum number of visible activities
|
207 |
-
tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MAX: int(param_max), #maximum number of visible activities
|
208 |
-
tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.SEQUENCE: float(param_seq), #probability to add a sequence operator to tree
|
209 |
-
tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.CHOICE: float(param_cho), #probability to add a choice (XOR) operator to tree
|
210 |
-
tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.PARALLEL: float(param_par), #probability to add a parallel operator to tree
|
211 |
-
tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.LOOP: float(param_lop), #probability to add a loop operator to tree
|
212 |
-
tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.OR: float(param_or), #probability to add an or operator to tree
|
213 |
-
tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.SILENT: float(param_sil), #probability to add silent activity to a choice or loop operator
|
214 |
-
tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.DUPLICATE: int(param_dup), #probability to duplicate an activity label
|
215 |
-
tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.NO_MODELS: int(param_nmo)} #number of trees to generate from model population
|
216 |
-
|
217 |
-
process_tree = generate_process_tree(parameters=PT_PARAMS)
|
218 |
-
save_vis_process_tree(process_tree, OUTPUT_PLOT+"_tree.png")
|
219 |
-
|
220 |
-
st.write("### Playout configurations")
|
221 |
-
|
222 |
-
param_ntraces = st.text_input('Number of traces', str(mtf['n_traces'].iat[0]))
|
223 |
-
PO_PARAMS = {playout.Variants.BASIC_PLAYOUT.value.Parameters.NO_TRACES : int(param_ntraces)}
|
224 |
-
|
225 |
-
ptgen_log = play_out(process_tree, parameters=PO_PARAMS)
|
226 |
-
|
227 |
-
net, im, fm = inductive_miner(ptgen_log)
|
228 |
-
save_vis_petri_net(net, im, fm, OUTPUT_PLOT+".png")
|
229 |
-
st.write("Saved in: ", OUTPUT_PLOT)
|
230 |
-
fig_pt_net = mpimg.imread(OUTPUT_PLOT+".png")
|
231 |
-
fig_pt_tree = mpimg.imread(OUTPUT_PLOT+"_tree.png")
|
232 |
-
|
233 |
-
fcol1, fcol2 = st.columns(2)
|
234 |
-
with fcol1:
|
235 |
-
st.image(fig_pt_tree)
|
236 |
-
with fcol2:
|
237 |
-
st.image(fig_pt_net)
|
238 |
-
extract_meta_features(ptgen_log, "gen_pt")
|
239 |
-
|
240 |
-
|
241 |
-
if __name__ == '__main__':
|
242 |
-
st.set_page_config(layout='wide')
|
243 |
-
"""
|
244 |
-
# Event Log Generator
|
245 |
-
"""
|
246 |
-
start_options = ['Event-Log', 'Meta-features']
|
247 |
-
start_preference = st.sidebar.selectbox("Do you want to start with a log or with metafeatures?", start_options,0)
|
248 |
-
#lets_start = st.sidebar.button("Let's start with "+start_preference+'!')
|
249 |
-
|
250 |
-
if start_preference==start_options[0]:
|
251 |
-
st.sidebar.write("Upload a dataset in csv or xes-format:")
|
252 |
-
uploaded_file = st.sidebar.file_uploader("Pick a logfile")
|
253 |
-
|
254 |
-
bar = st.progress(0)
|
255 |
-
|
256 |
-
os.makedirs(OUTPUT_PATH, exist_ok=True)
|
257 |
-
event_log = st.session_state['log'] if "log" in st.session_state else None
|
258 |
-
if uploaded_file:
|
259 |
-
event_log, event_df, case_id, activity_id = read_uploaded_file(uploaded_file)
|
260 |
-
#event_log = deepcopy(event_log)
|
261 |
-
|
262 |
-
use_sample = st.sidebar.checkbox('Use random sample', True)
|
263 |
-
if use_sample:
|
264 |
-
sample_size = st.sidebar.text_input('Sample size of approx number of events', str(SAMPLE_EVENTS))
|
265 |
-
sample_size = int(sample_size)
|
266 |
-
|
267 |
-
event_log = sample_log_traces(event_log, sample_size)
|
268 |
-
sample_cases = [event_log[i].attributes['concept:name'] for i in range(0, len(event_log))]
|
269 |
-
event_df = event_df[event_df[case_id].isin(sample_cases)]
|
270 |
-
|
271 |
-
show_loaded_event_log(event_log, event_df)
|
272 |
-
ext_mtf = extract_meta_features(event_log, "running-example")
|
273 |
-
generate_pt(ext_mtf)
|
274 |
-
|
275 |
-
elif start_preference==start_options[1]:
|
276 |
-
LOG_COL = 'log'
|
277 |
-
st.sidebar.write("Upload a dataset in csv-format")
|
278 |
-
uploaded_file = st.sidebar.file_uploader("Pick a file containing meta-features")
|
279 |
-
|
280 |
-
bar = st.progress(0)
|
281 |
-
|
282 |
-
os.makedirs(OUTPUT_PATH, exist_ok=True)
|
283 |
-
event_log = st.session_state[LOG_COL] if "log" in st.session_state else None
|
284 |
-
if uploaded_file:
|
285 |
-
sep = st.sidebar.text_input("Columns separator", ";")
|
286 |
-
mtf = load_from_csv(uploaded_file, sep)
|
287 |
-
st.dataframe(mtf)
|
288 |
-
|
289 |
-
log_options = mtf['log'].unique()
|
290 |
-
log_preference = st.selectbox("What log should we use for generating a new event-log?", log_options,1)
|
291 |
-
mtf_selection = mtf[mtf[LOG_COL]==log_preference]
|
292 |
-
generate_pt(mtf_selection)
|
293 |
-
st.write("##### Original")
|
294 |
-
st.write(mtf_selection)
|
295 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/2_bpic_features.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
log,n_traces,n_unique_traces,ratio_unique_traces_per_trace,trace_len_min,trace_len_max,trace_len_mean,trace_len_median,trace_len_mode,trace_len_std,trace_len_variance,trace_len_q1,trace_len_q3,trace_len_iqr,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_skewness,trace_len_kurtosis,trace_len_coefficient_variation,trace_len_entropy,trace_len_hist1,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_hist10,trace_len_skewness_hist,trace_len_kurtosis_hist,ratio_most_common_variant,ratio_top_1_variants,ratio_top_5_variants,ratio_top_10_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_75_variants,mean_variant_occurrence,std_variant_occurrence,skewness_variant_occurrence,kurtosis_variant_occurrence,n_unique_activities,activities_min,activities_max,activities_mean,activities_median,activities_std,activities_variance,activities_q1,activities_q3,activities_iqr,activities_skewness,activities_kurtosis,n_unique_start_activities,start_activities_min,start_activities_max,start_activities_mean,start_activities_median,start_activities_std,start_activities_variance,start_activities_q1,start_activities_q3,start_activities_iqr,start_activities_skewness,start_activities_kurtosis,n_unique_end_activities,end_activities_min,end_activities_max,end_activities_mean,end_activities_median,end_activities_std,end_activities_variance,end_activities_q1,end_activities_q3,end_activities_iqr,end_activities_skewness,end_activities_kurtosis,eventropy_trace,eventropy_prefix,eventropy_global_block,eventropy_lempel_ziv,eventropy_k_block_diff_1,eventropy_k_block_diff_3,eventropy_k_block_diff_5,eventropy_k_block_ratio_1,eventropy_k_block_ratio_3,eventropy_k_block_ratio_5,eventropy_knn_3,eventropy_knn_5,eventropy_knn_7,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,eventropy_global_block_flattened,eventropy_lempel_ziv_flattened,eventropy_prefix_flattened
|
2 |
+
Sepsis_Cases_Event_Log,1050,846,0.805714285714285,3,185,14.48952380952381,13,8,11.470474925273926,131.57179501133788,9,16,7,12.281860759040903,1.7464004837799152,10.47731701485374,7.250526815880918,87.0376906898399,0.791639192292468,6.769403523350811,0.04861329147043401,0.005285190999476001,0.000575614861329,0.000209314495028,0.000104657247514,0.0,5.2328623757195225e-05,0.0,0.0,0.000104657247514,2.612850778156251,4.931206347805768,0.033333333333333,0.12,0.215238095238095,0.274285714285714,0.355238095238095,0.5971428571428571,0.7980952380952381,1.241134751773049,1.759408518249193,13.637101374069475,217.44268017168216,16,6,3383,950.875,788.0,1008.5815457239935,1017236.734375,101.75,1085.25,983.5,1.391238560701821,1.05777753209275,6,6,995,175.0,12.0,366.73787187399483,134496.66666666666,7.75,17.0,9.25,1.7883562472303312,1.199106773708694,14,2,393,75.0,32.5,112.91400014423114,12749.57142857143,14.0,53.5,39.5,2.004413358907822,2.500757934341361,9.334,10.227,14.501,1.7269999999999999,3.238,1.712,1.104,3.238,2.262,1.871,4.956,4.49,4.191,40624.49329803771,0.6957588422064961,76528.6794749776,0.5223430410751391,32139.284589305265,0.219365233602993,43880.53919110408,0.299504635939686,,,
|
3 |
+
CoSeLoG_WABO_1,937,916,0.9775880469583771,2,95,41.56243329775881,43,40,16.678023092416094,278.1564542711645,36,51,15,36.71275216938179,1.784073253119976,28.84499612652788,-0.16821637154603802,0.17918482321640303,0.40127638757174006,6.750635463329985,0.006311609919555001,0.009524793151329002,0.006311609919555001,0.014229811454998001,0.039820520765196,0.016869211966812,0.008147714623426,0.0037869659517330003,0.002065617791854,0.00045902617596700005,1.7771796608234571,2.353958246469541,0.009605122732123,0.032017075773746004,0.07043756670224101,0.11953041622198501,0.21771611526147203,0.511205976520811,0.7556029882604051,1.022925764192139,0.33126487599778903,19.52280427642022,422.82376078444236,381,1,937,102.21522309711285,15.0,193.12603388747905,37297.6649651077,3.0,81.0,78.0,2.463005335171609,5.5066536611772605,11,1,899,85.18181818181819,2.0,257.3832721066592,66246.14876033057,1.0,7.5,6.5,2.844783898567343,6.0957042298129664,101,1,292,9.277227722772277,2.0,31.163929012921322,971.1904715223994,1.0,5.0,4.0,7.672745189703872,64.72182800579148,9.806000000000001,13.867,18.357,3.2640000000000002,6.888,1.299,0.582,6.888,3.542,2.403,5.413,4.929,4.629,195166.2442745276,0.6466967918841,247624.8365497508,0.601566424410453,120536.03113478613,0.292823733970692,154887.76808660102,0.37627599125765404,18.361,3.276,13.885
|
data/GenED_bench.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/GenED_feat.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
data/bpic_features.csv
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
log,n_traces,n_unique_traces,
|
2 |
BPIC15_2,832,828,0.9951923076923076,1,132,53.31009615384615,54.0,61,19.89497651105348,395.8100903753698,44.0,62.0,18.0,48.15011097917017,1.6953108255055442,37.583741492631816,0.0541383907866727,0.8049916722455452,0.3731934088739797,6.6467154289258925,0.0038534938344098,0.0048627422196124,0.0046792425132119,0.0239467116852613,0.0237632119788608,0.0082574867880211,0.0047709923664122,0.0013762477980035,0.0006422489724016,0.0001834997064004,0.0541383907866727,0.8049916722455452,0.0024038461538461,0.0144230769230769,0.0540865384615384,0.1033653846153846,0.203125,0.5024038461538461,0.7512019230769231,1.0048309178743962,0.0693367154319194,14.283026792978164,202.00485436893203,410,1,830,108.18048780487806,12.0,187.5881623228515,35189.31864366448,3.0,125.5,122.5,2.1294119001489484,3.808278466770415,14,1,731,59.42857142857143,1.0,186.71740078284623,34863.387755102034,1.0,8.25,7.25,3.300411469802443,8.960767075527839,82,1,216,10.146341463414634,1.0,35.31879964786925,1247.4176085663291,1.0,3.0,2.0,5.098791193232185,25.861991394282988,9.691,14.524,19.448,3.859,7.105,7.105,7.105,7.105,7.105,7.105,5.545,5.039,4.721,Real,240512.2242485009,0.6279728735030676,285876.9226982823,0.6023712370019746,150546.57168151825,0.3172166670686898,185312.93742252485,0.3904728730604407,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
3 |
BPI_Challenge_2018,43809,28457,0.6495697231162546,24,2973,57.39154055102833,49.0,49,34.87213051501663,1216.065486656354,44.0,59.0,15.0,53.775007740790905,1.3673968195217023,51.6515023255421,26.12645867504185,1720.3996647748236,0.6076179551934296,10.59875768208314,0.0033846328873849,5.263453617722996e-06,9.28844756068764e-07,0.0,0.0,0.0,0.0,0.0,7.740372967239698e-08,7.740372967239698e-08,26.12645867504185,1720.3996647748236,0.0269807573786208,0.2903741240384396,0.3730055468054509,0.4153712707434545,0.4803350909630441,0.6752037252619325,0.837590449451026,1.53948061988263,12.487438103768865,64.62568045475237,5083.4558063165005,41,17,466141,61323.56097560976,7530.0,120522.24741658216,14525612122.343842,902.0,45907.0,45005.0,2.444006846537922,4.7732537682944125,4,2,38623,10952.25,2592.0,16111.407548302535,259577453.1875,36.5,13507.75,13471.25,1.098736017040351,-0.714799753613248,21,1,34830,2086.1428571428573,13.0,7431.744980540056,55230833.45578231,2.0,193.0,191.0,4.062386890920656,14.95282428002514,13.191,16.272,20.972,1.023,-0.01,1.855,0.511,1.403,3.572,2.001,7.849,7.371,7.067,Real,11563842.153239768,0.7120788464629594,21146257.119093828,0.5706879719331716,14140225.903138256,0.3816115919659581,15576076.832943872,0.4203618469408319,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
4 |
Receipt_WABO_CoSeLoG,1434,116,0.0808926080892608,1,25,5.981171548117155,6.0,6,2.166128830112964,4.692114108646557,6.0,6.0,0.0,5.414708441482159,1.7049649652198722,4.356444755372117,1.276525010246869,12.296005610487518,0.3621579506100023,7.197192878385,0.0360297536029753,0.008135750813575,0.341120409112041,0.0235355648535564,0.0037773128777312,0.0017433751743375,0.0002905625290562,0.0014528126452812,0.0,0.0005811250581125,1.276525010246869,12.296005610487518,0.4972105997210599,0.4972105997210599,0.796373779637378,0.8870292887029289,0.9302649930264992,0.9595536959553695,0.9797768479776848,12.362068965517242,68.36027740401485,9.380686726353323,92.2819193173858,27,1,1434,317.6666666666667,27.0,553.3898230870318,306240.2962962963,8.0,50.0,42.0,1.342950616318748,-0.1780942423969453,1,1434,1434,1434.0,1434.0,0.0,0.0,1434.0,1434.0,0.0,,,14,1,828,102.42857142857144,6.0,225.87155461384123,51017.95918367348,1.25,33.25,32.0,2.471765166310402,4.8465409223704325,3.209,4.746,7.019,0.385,2.672,2.966,0.804,1.484,2.966,2.966,3.26,2.845,2.584,Real,2382.325855313024,0.6893625408247437,18296.27229411094,0.235532333261429,7814.867608807029,0.1006026786464005,10728.696951225804,0.1381131076951861,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
|
|
1 |
+
log,n_traces,n_unique_traces,ratio_variants_per_number_of_traces,trace_len_min,trace_len_max,trace_len_mean,trace_len_median,trace_len_mode,trace_len_std,trace_len_variance,trace_len_q1,trace_len_q3,trace_len_iqr,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_skewness,trace_len_kurtosis,trace_len_coefficient_variation,trace_len_entropy,trace_len_hist1,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_hist10,trace_len_skewness_hist,trace_len_kurtosis_hist,ratio_most_common_variant,ratio_top_1_variants,ratio_top_5_variants,ratio_top_10_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_75_variants,mean_variant_occurrence,std_variant_occurrence,skewness_variant_occurrence,kurtosis_variant_occurrence,n_unique_activities,activities_min,activities_max,activities_mean,activities_median,activities_std,activities_variance,activities_q1,activities_q3,activities_iqr,activities_skewness,activities_kurtosis,n_unique_start_activities,start_activities_min,start_activities_max,start_activities_mean,start_activities_median,start_activities_std,start_activities_variance,start_activities_q1,start_activities_q3,start_activities_iqr,start_activities_skewness,start_activities_kurtosis,n_unique_end_activities,end_activities_min,end_activities_max,end_activities_mean,end_activities_median,end_activities_std,end_activities_variance,end_activities_q1,end_activities_q3,end_activities_iqr,end_activities_skewness,end_activities_kurtosis,entropy_trace,entropy_prefix,entropy_global_block,entropy_lempel_ziv,entropy_k_block_diff_1,entropy_k_block_diff_3,entropy_k_block_diff_5,entropy_k_block_ratio_1,entropy_k_block_ratio_3,entropy_k_block_ratio_5,entropy_knn_3,entropy_knn_5,entropy_knn_7,Log Nature,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,accumulated_time_time_min,accumulated_time_time_max,accumulated_time_time_mean,accumulated_time_time_median,accumulated_time_time_mode,accumulated_time_time_std,accumulated_time_time_variance,accumulated_time_time_q1,accumulated_time_time_q3,accumulated_time_time_iqr,accumulated_time_time_geometric_mean,accumulated_time_time_geometric_std,accumulated_time_time_harmonic_mean,accumulated_time_time_skewness,accumulated_time_time_kurtosis,accumulated_time_time_coefficient_variation,accumulated_time_time_entropy,accumulated_time_time_skewness_hist,accumulated_time_time_kurtosis_hist,execution_time_time_min,execution_time_time_max,execution_time_time_mean,execution_time_time_median,execution_time_time_mode,execution_time_time_std,execution_time_time_variance,execution_time_time_q1,execution_time_time_q3,execution_time_time_iqr,execution_time_time_geometric_mean,execution_time_time_geometric_std,execution_time_time_harmonic_mean,execution_time_time_skewness,execution_time_time_kurtosis,execution_time_time_coefficient_variation,execution_time_time_entropy,execution_time_time_skewness_hist,execution_time_time_kurtosis_hist,remaining_time_time_min,remaining_time_time_max,remaining_time_time_mean,remaining_time_time_median,remaining_time_time_mode,remaining_time_time_std,remaining_time_time_variance,remaining_time_time_q1,remaining_time_time_q3,remaining_time_time_iqr,remaining_time_time_geometric_mean,remaining_time_time_geometric_std,remaining_time_time_harmonic_mean,remaining_time_time_skewness,remaining_time_time_kurtosis,remaining_time_time_coefficient_variation,remaining_time_time_entropy,remaining_time_time_skewness_hist,remaining_time_time_kurtosis_hist,within_day_time_min,within_day_time_max,within_day_time_mean,within_day_time_median,within_day_time_mode,within_day_time_std,within_day_time_variance,within_day_time_q1,within_day_time_q3,within_day_time_iqr,within_day_time_geometric_mean,within_day_time_geometric_std,within_day_time_harmonic_mean,within_day_time_skewness,within_day_time_kurtosis,within_day_time_coefficient_variation,within_day_time_entropy,within_day_time_skewness_hist,within_day_time_kurtosis_hist
|
2 |
BPIC15_2,832,828,0.9951923076923076,1,132,53.31009615384615,54.0,61,19.89497651105348,395.8100903753698,44.0,62.0,18.0,48.15011097917017,1.6953108255055442,37.583741492631816,0.0541383907866727,0.8049916722455452,0.3731934088739797,6.6467154289258925,0.0038534938344098,0.0048627422196124,0.0046792425132119,0.0239467116852613,0.0237632119788608,0.0082574867880211,0.0047709923664122,0.0013762477980035,0.0006422489724016,0.0001834997064004,0.0541383907866727,0.8049916722455452,0.0024038461538461,0.0144230769230769,0.0540865384615384,0.1033653846153846,0.203125,0.5024038461538461,0.7512019230769231,1.0048309178743962,0.0693367154319194,14.283026792978164,202.00485436893203,410,1,830,108.18048780487806,12.0,187.5881623228515,35189.31864366448,3.0,125.5,122.5,2.1294119001489484,3.808278466770415,14,1,731,59.42857142857143,1.0,186.71740078284623,34863.387755102034,1.0,8.25,7.25,3.300411469802443,8.960767075527839,82,1,216,10.146341463414634,1.0,35.31879964786925,1247.4176085663291,1.0,3.0,2.0,5.098791193232185,25.861991394282988,9.691,14.524,19.448,3.859,7.105,7.105,7.105,7.105,7.105,7.105,5.545,5.039,4.721,Real,240512.2242485009,0.6279728735030676,285876.9226982823,0.6023712370019746,150546.57168151825,0.3172166670686898,185312.93742252485,0.3904728730604407,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
3 |
BPI_Challenge_2018,43809,28457,0.6495697231162546,24,2973,57.39154055102833,49.0,49,34.87213051501663,1216.065486656354,44.0,59.0,15.0,53.775007740790905,1.3673968195217023,51.6515023255421,26.12645867504185,1720.3996647748236,0.6076179551934296,10.59875768208314,0.0033846328873849,5.263453617722996e-06,9.28844756068764e-07,0.0,0.0,0.0,0.0,0.0,7.740372967239698e-08,7.740372967239698e-08,26.12645867504185,1720.3996647748236,0.0269807573786208,0.2903741240384396,0.3730055468054509,0.4153712707434545,0.4803350909630441,0.6752037252619325,0.837590449451026,1.53948061988263,12.487438103768865,64.62568045475237,5083.4558063165005,41,17,466141,61323.56097560976,7530.0,120522.24741658216,14525612122.343842,902.0,45907.0,45005.0,2.444006846537922,4.7732537682944125,4,2,38623,10952.25,2592.0,16111.407548302535,259577453.1875,36.5,13507.75,13471.25,1.098736017040351,-0.714799753613248,21,1,34830,2086.1428571428573,13.0,7431.744980540056,55230833.45578231,2.0,193.0,191.0,4.062386890920656,14.95282428002514,13.191,16.272,20.972,1.023,-0.01,1.855,0.511,1.403,3.572,2.001,7.849,7.371,7.067,Real,11563842.153239768,0.7120788464629594,21146257.119093828,0.5706879719331716,14140225.903138256,0.3816115919659581,15576076.832943872,0.4203618469408319,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
4 |
Receipt_WABO_CoSeLoG,1434,116,0.0808926080892608,1,25,5.981171548117155,6.0,6,2.166128830112964,4.692114108646557,6.0,6.0,0.0,5.414708441482159,1.7049649652198722,4.356444755372117,1.276525010246869,12.296005610487518,0.3621579506100023,7.197192878385,0.0360297536029753,0.008135750813575,0.341120409112041,0.0235355648535564,0.0037773128777312,0.0017433751743375,0.0002905625290562,0.0014528126452812,0.0,0.0005811250581125,1.276525010246869,12.296005610487518,0.4972105997210599,0.4972105997210599,0.796373779637378,0.8870292887029289,0.9302649930264992,0.9595536959553695,0.9797768479776848,12.362068965517242,68.36027740401485,9.380686726353323,92.2819193173858,27,1,1434,317.6666666666667,27.0,553.3898230870318,306240.2962962963,8.0,50.0,42.0,1.342950616318748,-0.1780942423969453,1,1434,1434,1434.0,1434.0,0.0,0.0,1434.0,1434.0,0.0,,,14,1,828,102.42857142857144,6.0,225.87155461384123,51017.95918367348,1.25,33.25,32.0,2.471765166310402,4.8465409223704325,3.209,4.746,7.019,0.385,2.672,2.966,0.804,1.484,2.966,2.966,3.26,2.845,2.584,Real,2382.325855313024,0.6893625408247437,18296.27229411094,0.235532333261429,7814.867608807029,0.1006026786464005,10728.696951225804,0.1381131076951861,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
|
data/grid_1obj/{grid_1objectives_rutpt.csv β grid_1objectives_rvpnot.csv}
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
-
task,
|
2 |
task_1,0.0
|
3 |
task_2,0.1
|
4 |
task_3,0.2
|
|
|
1 |
+
task,ratio_variants_per_number_of_traces
|
2 |
task_1,0.0
|
3 |
task_2,0.1
|
4 |
task_3,0.2
|
data/grid_2obj/{grid_2objectives_enve_rutpt.csv β grid_2objectives_ense_rvpnot.csv}
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
-
task,
|
2 |
task_1,0.0,0.0
|
3 |
task_2,0.0,0.1
|
4 |
task_3,0.0,0.2
|
|
|
1 |
+
task,epa_normalized_sequence_entropy,ratio_variants_per_number_of_traces
|
2 |
task_1,0.0,0.0
|
3 |
task_2,0.0,0.1
|
4 |
task_3,0.0,0.2
|
data/grid_2obj/{grid_2objectives_enseef_rutpt.csv β grid_2objectives_enseef_rvpnot.csv}
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
-
task,epa_normalized_sequence_entropy_exponential_forgetting,
|
2 |
task_1,0.0,0.0
|
3 |
task_2,0.0,0.1
|
4 |
task_3,0.0,0.2
|
|
|
1 |
+
task,epa_normalized_sequence_entropy_exponential_forgetting,ratio_variants_per_number_of_traces
|
2 |
task_1,0.0,0.0
|
3 |
task_2,0.0,0.1
|
4 |
task_3,0.0,0.2
|
data/grid_2obj/{grid_2objectives_enself_rutpt.csv β grid_2objectives_enself_rvpnot.csv}
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
-
task,epa_normalized_sequence_entropy_linear_forgetting,
|
2 |
task_1,0.0,0.0
|
3 |
task_2,0.0,0.1
|
4 |
task_3,0.0,0.2
|
|
|
1 |
+
task,epa_normalized_sequence_entropy_linear_forgetting,ratio_variants_per_number_of_traces
|
2 |
task_1,0.0,0.0
|
3 |
task_2,0.0,0.1
|
4 |
task_3,0.0,0.2
|
data/grid_2obj/{grid_2objectives_rt10v_rutpt.csv β grid_2objectives_enve_rvpnot.csv}
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
-
task,
|
2 |
task_1,0.0,0.0
|
3 |
task_2,0.0,0.1
|
4 |
task_3,0.0,0.2
|
|
|
1 |
+
task,epa_normalized_variant_entropy,ratio_variants_per_number_of_traces
|
2 |
task_1,0.0,0.0
|
3 |
task_2,0.0,0.1
|
4 |
task_3,0.0,0.2
|
data/grid_2obj/{grid_2objectives_ense_rutpt.csv β grid_2objectives_rmcv_rvpnot.csv}
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
-
task,
|
2 |
task_1,0.0,0.0
|
3 |
task_2,0.0,0.1
|
4 |
task_3,0.0,0.2
|
|
|
1 |
+
task,ratio_most_common_variant,ratio_variants_per_number_of_traces
|
2 |
task_1,0.0,0.0
|
3 |
task_2,0.0,0.1
|
4 |
task_3,0.0,0.2
|
data/grid_2obj/{grid_2objectives_rmcv_rutpt.csv β grid_2objectives_rt10v_rvpnot.csv}
RENAMED
@@ -1,4 +1,4 @@
|
|
1 |
-
task,
|
2 |
task_1,0.0,0.0
|
3 |
task_2,0.0,0.1
|
4 |
task_3,0.0,0.2
|
|
|
1 |
+
task,ratio_top_10_variants,ratio_variants_per_number_of_traces
|
2 |
task_1,0.0,0.0
|
3 |
task_2,0.0,0.1
|
4 |
task_3,0.0,0.2
|
data/validation/2_ense_enseef_feat.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_exponential_forgetting,log
|
2 |
+
0.617035580430171,0.25759383686118104,CoSeLoG_WABO_1
|
3 |
+
0.547597168193871,0.22387845232743803,Sepsis_Cases_Event_Log
|
data/validation/genELexperiment1_04_02.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"ratio_top_20_variants": 0.20017714791851196, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "experiment1"}
|
data/validation/genELexperiment2_07_04.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"ratio_top_20_variants": 0.38863337713534823, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "experiment2"}
|
data/validation/test_benchmark.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
log,fitness_inductive,precision_inductive,fscore_inductive,size_inductive,pnsize_inductive,cfc_inductive,fitness_heu,precision_heu,fscore_heu,size_heu,pnsize_heu,cfc_heu,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp
|
2 |
+
gen_el_169,0.9998052420892378,0.6662312989788649,0.7996241723917423,34,24,22,0.9383563249832565,0.5979149389882715,0.7304143193451293,22,14,13,0.9358843752091403,0.6513022517490741,0.7680805654451066,28,18,16,0.9999637006454563,0.432690150325331,0.6040181215566763,27,7,9
|
3 |
+
gen_el_168,0.9997678338833808,0.6033523537803138,0.7525477883058467,61,34,20,0.48155419290534085,0.9449078138718174,0.6379760800037585,60,35,32,0.9479094601490539,0.5169524053224155,0.669037930473001,67,38,24,0.9999513902099882,0.4283471743974073,0.5997714527549697,93,30,28
|
data/validation/test_feat.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
log,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,ratio_variants_per_number_of_traces
|
2 |
+
gen_el_168,0.13580246913580246,0.5709876543209876,0.6920749183939835,0.6241163465815115,0.06011912975523125,0.2577500062839078,0.44135802469135804
|
3 |
+
gen_el_169,0.25813692480359146,0.6846240179573513,0.6517697077716751,0.4929433574247866,0.06332152226023505,0.21109493857555106,0.3153759820426487
|
execute_grid_experiments.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import multiprocessing
|
2 |
import os
|
|
|
3 |
|
4 |
from datetime import datetime as dt
|
5 |
from gedi.utils.io_helpers import sort_files
|
@@ -13,15 +14,18 @@ def multi_experiment_wrapper(config_file, i=0):
|
|
13 |
print(f"=========================FINISHED EXPERIMENT #{i+1}=======================")
|
14 |
|
15 |
if __name__ == '__main__':
|
16 |
-
EXPERIMENTS_FOLDER =
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
20 |
start = dt.now()
|
21 |
|
22 |
experiment_list = list(tqdm(sort_files(os.listdir(EXPERIMENTS_FOLDER))))
|
23 |
experiment_list = [os.path.join(EXPERIMENTS_FOLDER, config_file) for config_file in experiment_list]
|
24 |
-
experiment_list = experiment_list[:10]
|
25 |
|
26 |
print(f"========================STARTING MULTIPLE EXPERIMENTS=========================")
|
27 |
print(f"INFO: {EXPERIMENTS_FOLDER} contains config files for {len(experiment_list)}.")
|
|
|
1 |
import multiprocessing
|
2 |
import os
|
3 |
+
import sys
|
4 |
|
5 |
from datetime import datetime as dt
|
6 |
from gedi.utils.io_helpers import sort_files
|
|
|
14 |
print(f"=========================FINISHED EXPERIMENT #{i+1}=======================")
|
15 |
|
16 |
if __name__ == '__main__':
|
17 |
+
EXPERIMENTS_FOLDER = sys.argv[1]
|
18 |
+
"""
|
19 |
+
Following args run the following experiments:
|
20 |
+
- config_files/algorithm/grid_1obj
|
21 |
+
- config_files/algorithm/grid_experiments
|
22 |
+
- config_files/algorithm/test
|
23 |
+
"""
|
24 |
start = dt.now()
|
25 |
|
26 |
experiment_list = list(tqdm(sort_files(os.listdir(EXPERIMENTS_FOLDER))))
|
27 |
experiment_list = [os.path.join(EXPERIMENTS_FOLDER, config_file) for config_file in experiment_list]
|
28 |
+
#experiment_list = experiment_list[:10]
|
29 |
|
30 |
print(f"========================STARTING MULTIPLE EXPERIMENTS=========================")
|
31 |
print(f"INFO: {EXPERIMENTS_FOLDER} contains config files for {len(experiment_list)}.")
|
gedi/benchmark.py
CHANGED
@@ -34,7 +34,7 @@ class BenchmarkTest:
|
|
34 |
event_logs = [""]
|
35 |
else:
|
36 |
try:
|
37 |
-
event_logs =[filename for filename in os.listdir(log_path) if filename.endswith(".xes")]
|
38 |
except FileNotFoundError:
|
39 |
print(f" FAILED: Cannot find {params[INPUT_PATH]}" )
|
40 |
return
|
@@ -94,7 +94,7 @@ class BenchmarkTest:
|
|
94 |
else:
|
95 |
log_name = "gen_el_"+str(log_counter)
|
96 |
results = {"log": event_log}
|
97 |
-
|
98 |
for miner in miners:
|
99 |
miner_cols = [f"fitness_{miner}", f"precision_{miner}", f"fscore_{miner}", f"size_{miner}", f"cfc_{miner}", f"pnsize_{miner}"]# f"generalization_{miner}",f"simplicity_{miner}"]
|
100 |
start_miner = dt.now()
|
@@ -186,6 +186,8 @@ class BenchmarkTest:
|
|
186 |
if miner == 'imf':
|
187 |
miner = 'inductive'
|
188 |
miner_params = f', noise_threshold={NOISE_THRESHOLD}'
|
|
|
|
|
189 |
net, im, fm = eval(f"discover_petri_net_{miner}(log {miner_params})")
|
190 |
bpmn_graph = convert_to_bpmn(net, im, fm)
|
191 |
fitness = fitness_alignments(log, net, im, fm)['log_fitness']
|
|
|
34 |
event_logs = [""]
|
35 |
else:
|
36 |
try:
|
37 |
+
event_logs =sorted([filename for filename in os.listdir(log_path) if filename.endswith(".xes")])
|
38 |
except FileNotFoundError:
|
39 |
print(f" FAILED: Cannot find {params[INPUT_PATH]}" )
|
40 |
return
|
|
|
94 |
else:
|
95 |
log_name = "gen_el_"+str(log_counter)
|
96 |
results = {"log": event_log}
|
97 |
+
|
98 |
for miner in miners:
|
99 |
miner_cols = [f"fitness_{miner}", f"precision_{miner}", f"fscore_{miner}", f"size_{miner}", f"cfc_{miner}", f"pnsize_{miner}"]# f"generalization_{miner}",f"simplicity_{miner}"]
|
100 |
start_miner = dt.now()
|
|
|
186 |
if miner == 'imf':
|
187 |
miner = 'inductive'
|
188 |
miner_params = f', noise_threshold={NOISE_THRESHOLD}'
|
189 |
+
elif miner == 'heu':
|
190 |
+
miner = 'heuristics'
|
191 |
net, im, fm = eval(f"discover_petri_net_{miner}(log {miner_params})")
|
192 |
bpmn_graph = convert_to_bpmn(net, im, fm)
|
193 |
fitness = fitness_alignments(log, net, im, fm)['log_fitness']
|