Andrea Maldonado commited on
Commit
bdf9096
·
1 Parent(s): 4a814d8

Moving private repo to public

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. LICENSE +1 -1
  2. README.md +24 -2
  3. config.py +88 -0
  4. config_files/algorithm/augmentation.json +12 -0
  5. config_files/algorithm/benchmark.json +10 -0
  6. config_files/algorithm/evaluation_plotter.json +17 -0
  7. config_files/algorithm/experiment_test.json +51 -0
  8. config_files/algorithm/feature_extraction.json +10 -0
  9. config_files/algorithm/fix_24.json +34 -0
  10. config_files/algorithm/generation.json +30 -0
  11. config_files/algorithm/test/generator_2bpic_2objectives_ense_enseef.json +15 -0
  12. config_files/algorithm/test/generator_grid_1objectives_rt10v.json +16 -0
  13. config_files/algorithm/test/generator_grid_2objectives_ense_enself.json +19 -0
  14. config_files/options/baseline.json +9 -0
  15. config_files/options/run_params.json +9 -0
  16. dashboard.py +295 -0
  17. execute_grid_experiments.py +40 -0
  18. main.py +73 -0
  19. notebooks/.ipynb_checkpoints/augmentation-checkpoint.ipynb +0 -0
  20. notebooks/.ipynb_checkpoints/benchmarking_process_discovery-checkpoint.ipynb +0 -0
  21. notebooks/.ipynb_checkpoints/bpic_generability_pdm-checkpoint.ipynb +0 -0
  22. notebooks/.ipynb_checkpoints/data_exploration-checkpoint.ipynb +0 -0
  23. notebooks/.ipynb_checkpoints/experiment_generator-checkpoint.ipynb +0 -0
  24. notebooks/.ipynb_checkpoints/feature_distributions-checkpoint.ipynb +0 -0
  25. notebooks/.ipynb_checkpoints/feature_exploration-checkpoint.ipynb +0 -0
  26. notebooks/.ipynb_checkpoints/feature_performance_similarity-checkpoint.ipynb +0 -0
  27. notebooks/.ipynb_checkpoints/feature_selection-checkpoint.ipynb +0 -0
  28. notebooks/.ipynb_checkpoints/feature_variance-checkpoint.ipynb +0 -0
  29. notebooks/.ipynb_checkpoints/gedi_representativeness-checkpoint.ipynb +0 -0
  30. notebooks/.ipynb_checkpoints/grid_objectives-checkpoint.ipynb +376 -0
  31. notebooks/.ipynb_checkpoints/oversampling-checkpoint.ipynb +6 -0
  32. notebooks/.ipynb_checkpoints/performance_feature_correlation-checkpoint.ipynb +6 -0
  33. notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb +0 -0
  34. notebooks/.ipynb_checkpoints/statistics_tasks_to_datasets-checkpoint.ipynb +818 -0
  35. notebooks/.ipynb_checkpoints/test_feed-checkpoint.ipynb +0 -0
  36. notebooks/benchmarking_process_discovery.ipynb +0 -0
  37. notebooks/bpic_generability_pdm.ipynb +0 -0
  38. notebooks/experiment_generator.ipynb +0 -0
  39. notebooks/feature_distributions.ipynb +0 -0
  40. notebooks/feature_exploration.ipynb +0 -0
  41. notebooks/feature_performance_similarity.ipynb +0 -0
  42. notebooks/feature_selection.ipynb +0 -0
  43. notebooks/gedi_representativeness.ipynb +0 -0
  44. smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/configspace.json +90 -0
  45. smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/intensifier.json +112 -0
  46. smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/optimization.json +6 -0
  47. smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/runhistory.json +153 -0
  48. smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/scenario.json +180 -0
  49. smac3_output/07ecbfc3dc7bfceedce234fe2b508af8/0/configspace.json +90 -0
  50. smac3_output/07ecbfc3dc7bfceedce234fe2b508af8/0/intensifier.json +27 -0
LICENSE CHANGED
@@ -1,6 +1,6 @@
1
  MIT License
2
 
3
- Copyright (c) 2024 lmu-dbs
4
 
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
  of this software and associated documentation files (the "Software"), to deal
 
1
  MIT License
2
 
3
+ Copyright (c) 2023 lmu-dbs
4
 
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
  of this software and associated documentation files (the "Software"), to deal
README.md CHANGED
@@ -1,2 +1,24 @@
1
- # gedi
2
- Generating Event Data with Intentional Features for Benchmarking Process Mining
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GEDI
2
+
3
+ ### Requirements
4
+ - [Meta-feature Extractor](https://github.com/gbrltv/process_meta_learning/tree/main/meta_feature_extraction)
5
+ - [Miniconda](https://docs.conda.io/en/latest/miniconda.html)
6
+ - Graphviz on your OS e.g.
7
+ For MacOS:
8
+ ```console
9
+ brew install graphviz
10
+ ```
11
+
12
+ ## Installation
13
+ - For smac:
14
+ ```console
15
+ conda install pyrfr swig
16
+ ```
17
+ - `conda env create -f .conda.yml`
18
+ - Install [Feature Extractor for Event Data (feeed)](https://github.com/lmu-dbs/feeed) in the newly installed conda environment: `pip install feeed`
19
+
20
+ ### Startup
21
+ ```console
22
+ conda activate tag
23
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/experiment_test.json
24
+ ```
config.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import warnings
4
+
5
+ from tag.utils.io_helpers import sort_files
6
+ from tqdm import tqdm
7
+ from utils.param_keys import INPUT_NAME, FILENAME, FOLDER_PATH, PARAMS
8
+
9
+ def get_model_params_list(alg_json_file: str) :#-> list[dict]:
10
+ """
11
+ Loads the list of model configurations given from a json file or the default list of dictionary from the code.
12
+ @param alg_json_file: str
13
+ Path to the json data with the running configuration
14
+ @return: list[dict]
15
+ list of model configurations
16
+ """
17
+ if alg_json_file is not None:
18
+ return json.load(open(alg_json_file))
19
+ else:
20
+ warnings.warn('The default model parameter list is used instead of a .json-file.\n'
21
+ ' Use a configuration from the `config_files`-folder together with the args `-a`.')
22
+ return [
23
+ {ALGORITHM_NAME: 'pca', NDIM: TENSOR_NDIM},
24
+ ]
25
+ def get_run_params(alg_params_json: str) -> dict:
26
+ """
27
+ Loads the running configuration given from a json file or the default dictionary from the code.
28
+ @param alg_params_json: str
29
+ Path to the json data with the running configuration
30
+ @return: dict
31
+ Running Configuration
32
+ """
33
+ if alg_params_json is not None:
34
+ return json.load(open(alg_params_json))
35
+ else:
36
+ warnings.warn('The default run option is used instead of a .json-file.\n'
37
+ ' Use a configuration from the `config_files`-folder together with the args `-o`.')
38
+ return {
39
+ RUN_OPTION: COMPARE,
40
+ PLOT_TYPE: COLOR_MAP, # 'heat_map', 'color_map', '3d_map', 'explained_var_plot'
41
+ PLOT_TICS: True,
42
+ N_COMPONENTS: 2,
43
+ INPUT_NAME: 'runningExample',
44
+ SAVE_RESULTS: True,
45
+ LOAD_RESULTS: True
46
+ }
47
+
48
+ def get_files_and_kwargs(params: dict):
49
+ """
50
+ This method returns the filename list of the trajectory and generates the kwargs for the DataTrajectory.
51
+ The method is individually created for the available data set.
52
+ Add new trajectory options, if different data set are used.
53
+ @param params: dict
54
+ running configuration
55
+ @return: tuple
56
+ list of filenames of the trajectories AND
57
+ kwargs with the important arguments for the classes
58
+ """
59
+ try:
60
+ input_name = params[INPUT_NAME]
61
+ except KeyError as e:
62
+ raise KeyError(f'Run option parameter is missing the key: `{e}`. This parameter is mandatory.')
63
+
64
+ #TODO: generate parent directories if they don't exist
65
+ if input_name == 'test':
66
+ filename_list = list(tqdm(sort_files(os.listdir('data/test_2'))))
67
+ kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/test_2'}
68
+ elif input_name == 'realLogs':
69
+ filename_list = list(tqdm(sort_files(os.listdir('data/real_event_logs'))))
70
+ kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/real_event_logs'}
71
+ elif input_name == 'gen5':
72
+ filename_list = list(tqdm(sort_files(os.listdir('data/event_log'))))[:5]
73
+ kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/event_log'}
74
+ elif input_name == 'gen20':
75
+ filename_list = list(tqdm(sort_files(os.listdir('data/event_log'))))[:20]
76
+ kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/event_log'}
77
+ elif input_name == 'runningExample':
78
+ filename_list = ['running-example.xes']
79
+ kwargs = {FILENAME: filename_list[0], FOLDER_PATH: 'data/'}
80
+ elif input_name == 'metaFeatures':
81
+ filename_list = ['log_features.csv']
82
+ kwargs = {FILENAME: filename_list[0], FOLDER_PATH: 'results/'}
83
+ else:
84
+ raise ValueError(f'No data trajectory was found with the name `{input_name}`.')
85
+
86
+ #filename_list.pop(file_element)
87
+ kwargs[PARAMS] = params
88
+ return filename_list, kwargs
config_files/algorithm/augmentation.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "pipeline_step": "instance_augmentation",
4
+ "augmentation_params":
5
+ {
6
+ "method":"SMOTE", "no_samples":20,
7
+ "feature_selection": ["n_traces", "n_unique_traces", "ratio_unique_traces_per_trace", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "entropy_trace", "entropy_prefix", "entropy_global_block", "entropy_lempel_ziv", "entropy_k_block_diff_1", "entropy_k_block_diff_3", "entropy_k_block_diff_5", "entropy_k_block_ratio_1", "entropy_k_block_ratio_3", "entropy_k_block_ratio_5", "entropy_knn_3", "entropy_knn_5", "entropy_knn_7", "variant_entropy", "normalized_variant_entropy", "sequence_entropy", "normalized_sequence_entropy", "sequence_entropy_linear_forgetting", "normalized_sequence_entropy_linear_forgetting", "sequence_entropy_exponential_forgetting", "normalized_sequence_entropy_exponential_forgetting"]
8
+ },
9
+ "input_path": "data/bpic_features.csv",
10
+ "output_path": "output"
11
+ }
12
+ ]
config_files/algorithm/benchmark.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "pipeline_step": "benchmark_test",
4
+ "benchmark_test": "discovery",
5
+ "input_path":"data/test_2",
6
+ "input_path":"data/test_2/gen_el_168.xes",
7
+ "output_path":"output",
8
+ "miners" : ["inductive", "heuristics", "imf", "ilp"]
9
+ }
10
+ ]
config_files/algorithm/evaluation_plotter.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "pipeline_step": "evaluation_plotter",
4
+ "input_path": "output/features/generated/34_bpic_features/",
5
+ "input_path": "output/features/generated/grid_1obj/1_enve_feat.csv",
6
+ "input_path": "output/features/generated/grid_2obj/",
7
+ "input_path": ["output/features/generated/grid_1obj/", "output/features/generated/grid_2obj/"],
8
+ "output_path": "output/plots",
9
+ "reference_feature": "epa_normalized_sequence_entropy",
10
+ "reference_feature": "epa_normalized_variant_entropy",
11
+ "reference_feature": "epa_normalized_sequence_entropy_exponential_forgetting",
12
+ "targets": "data/34_bpic_features.csv",
13
+ "targets": "data/grid_experiments/grid_1obj/grid_1objectives_enve.csv",
14
+ "targets": "data/grid_experiments/grid_2obj/",
15
+ "targets": ["data/grid_experiments/grid_1obj/", "data/grid_experiments/grid_2obj/"]
16
+ }
17
+ ]
config_files/algorithm/experiment_test.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "pipeline_step": "instance_augmentation",
4
+ "augmentation_params":{"method":"SMOTE", "no_samples":2,
5
+ "feature_selection": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
6
+ "input_path": "data/bpic_features.csv",
7
+ "output_path": "output"
8
+ },
9
+ {
10
+ "pipeline_step": "event_logs_generation",
11
+ "output_path": "output/features/2_bpic_features/2_ense_rmcv_feat.csv",
12
+ "output_path": "data/test_2",
13
+ "generator_params": {
14
+ "experiment": "data/grid_objectives.csv",
15
+ "experiment": {"input_path": "data/2_bpic_features.csv",
16
+ "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
17
+ "experiment": [
18
+ {"epa_normalized_sequence_entropy_linear_forgetting": 0.05, "ratio_top_20_variants": 0.4},
19
+ {"epa_normalized_sequence_entropy_linear_forgetting": 0.5, "ratio_top_20_variants": 0.04}
20
+ ],
21
+ "experiment": {"epa_normalized_sequence_entropy_linear_forgetting": 0.05, "ratio_top_20_variants": 0.4},
22
+ "config_space": {
23
+ "mode": [5, 20],
24
+ "sequence": [0.01, 1],
25
+ "choice": [0.01, 1],
26
+ "parallel": [0.01, 1],
27
+ "loop": [0.01, 1],
28
+ "silent": [0.01, 1],
29
+ "lt_dependency": [0.01, 1],
30
+ "num_traces": [100, 10001],
31
+ "duplicate": [0],
32
+ "or": [0]
33
+ },
34
+ "n_trials": 2
35
+ }
36
+ },
37
+ {
38
+ "pipeline_step": "feature_extraction",
39
+ "input_path": "data/test_2",
40
+ "feature_params": {"feature_set":["trace_length"]},
41
+ "output_path": "output/plots",
42
+ "real_eventlog_path": "data/bpic_features.csv",
43
+ "plot_type": "boxplot"
44
+ },
45
+ {
46
+ "pipeline_step": "benchmark_test",
47
+ "benchmark_test": "discovery",
48
+ "input_path":"data/test_2",
49
+ "miners" : ["inductive", "heuristics", "imf", "ilp", "sm"]
50
+ }
51
+ ]
config_files/algorithm/feature_extraction.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "pipeline_step": "feature_extraction",
4
+ "input_path": "output/features/generated/34_bpic_features/2_rt10v_rutpt",
5
+ "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
6
+ "output_path": "output/plots",
7
+ "real_eventlog_path": "data/34_bpic_features.csv",
8
+ "plot_type": "boxplot"
9
+ }
10
+ ]
config_files/algorithm/fix_24.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "pipeline_step": "event_logs_generation",
4
+ "output_path":"data/generated",
5
+ "generator_params": {
6
+ "objectives": {
7
+ "normalized_sequence_entropy_linear_forgetting": 0.05,
8
+ "ratio_top_20_variants": 0.4
9
+ },
10
+ "config_space": {
11
+ "mode": [5, 40],
12
+ "sequence": [0.01, 1],
13
+ "choice": [0.01, 1],
14
+ "parallel": [0.01, 1],
15
+ "loop": [0.01, 1],
16
+ "silent": [0.01, 1],
17
+ "lt_dependency": [0.01, 1],
18
+ "num_traces": [100, 1001],
19
+ "duplicate": [0],
20
+ "or": [0]
21
+ },
22
+ "n_trials": 20
23
+ }
24
+ },
25
+ {
26
+ "pipeline_step": "feature_extraction",
27
+ "input_path": "data/generated",
28
+ "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "entropies", "complexity"]},
29
+ "feature_params": {"feature_set":["trace_length"]},
30
+ "output_path": "output/plots",
31
+ "real_eventlog_path": "data/log_meta_features.csv",
32
+ "plot_type": "boxplot"
33
+ }
34
+ ]
config_files/algorithm/generation.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "pipeline_step": "event_logs_generation",
4
+ "output_path": "output",
5
+ "generator_params": {
6
+ "experiment": {
7
+ "input_path": "data/grid_objectives_debug.csv",
8
+ "objectives": ["epa_normalized_variant_entropy"],
9
+ "objectives": ["ratio_most_common_variant", "epa_normalized_sequence_entropy"],
10
+ "objectives": ["ratio_top_20_variants","epa_normalized_sequence_entropy_linear_forgetting"]
11
+ },
12
+ "config_space": {
13
+ "mode": [5, 20],
14
+ "sequence": [0.01, 1],
15
+ "choice": [0.01, 1],
16
+ "parallel": [0.01, 1],
17
+ "loop": [0.01, 1],
18
+ "silent": [0.01, 1],
19
+ "lt_dependency": [0.01, 1],
20
+ "num_traces": [10, 10001],
21
+ "duplicate": [0],
22
+ "or": [0]
23
+ },
24
+ "n_trials": 50,
25
+ "plot_reference_feature": "epa_normalized_sequence_entropy",
26
+ "plot_reference_feature": "epa_normalized_sequence_entropy_linear_forgetting",
27
+ "plot_reference_feature": ""
28
+ }
29
+ }
30
+ ]
config_files/algorithm/test/generator_2bpic_2objectives_ense_enseef.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [{"pipeline_step": "event_logs_generation",
2
+ "output_path": "output/generated",
3
+ "generator_params": {"experiment":
4
+ {"input_path": "data/2_bpic_features.csv",
5
+ "objectives": ["epa_normalized_sequence_entropy",
6
+ "epa_normalized_sequence_entropy_exponential_forgetting"]},
7
+ "config_space": {"mode": [5, 20], "sequence": [0.01, 1],
8
+ "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1],
9
+ "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001],
10
+ "duplicate": [0], "or": [0]}, "n_trials": 2}},
11
+ {"pipeline_step": "feature_extraction",
12
+ "input_path": "output/features/generated/2_bpic_features/2_ense_enseef",
13
+ "feature_params": {"feature_set": ["simple_stats", "trace_length", "trace_variant", "activities",
14
+ "start_activities", "end_activities", "eventropies", "epa_based"]}, "output_path": "output/plots",
15
+ "real_eventlog_path": "data/2_bpic_features.csv", "plot_type": "boxplot"}]
config_files/algorithm/test/generator_grid_1objectives_rt10v.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [{"pipeline_step": "event_logs_generation",
2
+ "output_path": "output/generated/grid_1obj",
3
+ "generator_params": {"experiment":
4
+ {"input_path": "data/grid_experiments/grid_1objectives_rt10v.csv",
5
+ "objectives": ["ratio_top_10_variants"]},
6
+ "config_space": {"mode": [5, 20], "sequence": [0.01, 1],
7
+ "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1],
8
+ "silent": [0.01, 1], "lt_dependency": [0.01, 1],
9
+ "num_traces": [10, 10001], "duplicate": [0],
10
+ "or": [0]}, "n_trials": 2}},
11
+ {"pipeline_step": "feature_extraction",
12
+ "input_path": "output/features/generated/grid_1obj/grid_1objectives_rt10v/1_rt10v",
13
+ "feature_params": {"feature_set": ["simple_stats", "trace_length", "trace_variant",
14
+ "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
15
+ "output_path": "output/plots", "real_eventlog_path": "data/2_bpic_features.csv",
16
+ "plot_type": "boxplot"}]
config_files/algorithm/test/generator_grid_2objectives_ense_enself.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [{"pipeline_step": "event_logs_generation",
2
+ "output_path": "output/generated/grid_2obj",
3
+ "generator_params": {"experiment":
4
+ {"input_path": "data/2_grid_test.csv",
5
+ "objectives": ["epa_normalized_sequence_entropy",
6
+ "epa_normalized_sequence_entropy_linear_forgetting"]},
7
+ "config_space": {"mode": [5, 20], "sequence": [0.01, 1],
8
+ "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1],
9
+ "silent": [0.01, 1], "lt_dependency": [0.01, 1],
10
+ "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 3}},
11
+ {"pipeline_step": "feature_extraction",
12
+ "input_path": "output/features/generated/2_grid_test/2_ense_enself",
13
+ "feature_params": {"feature_set": ["epa_normalized_sequence_entropy",
14
+ "epa_normalized_sequence_entropy_linear_forgetting"]},
15
+ "feature_params": {"feature_set": ["simple_stats", "trace_length", "trace_variant",
16
+ "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
17
+ "output_path": "output/plots",
18
+ "real_eventlog_path": "data/2_bpic_features.csv",
19
+ "plot_type": "boxplot"}]
config_files/options/baseline.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_option": "baseline",
3
+ "plot_type": "color_map",
4
+ "plot_tics": true,
5
+ "n_components": 2,
6
+ "input_name": "test",
7
+ "save_results": false,
8
+ "load_results": false
9
+ }
config_files/options/run_params.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_option": "compare",
3
+ "plot_type": "color_map",
4
+ "plot_tics": true,
5
+ "n_components": 2,
6
+ "input_name": "gen20",
7
+ "save_results": false,
8
+ "load_results": true
9
+ }
dashboard.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from copy import deepcopy
2
+ from meta_feature_extraction.simple_stats import simple_stats
3
+ from meta_feature_extraction.trace_length import trace_length
4
+ from meta_feature_extraction.trace_variant import trace_variant
5
+ from meta_feature_extraction.activities import activities
6
+ from meta_feature_extraction.start_activities import start_activities
7
+ from meta_feature_extraction.end_activities import end_activities
8
+ from meta_feature_extraction.entropies import entropies
9
+ from pm4py import discover_petri_net_inductive as inductive_miner
10
+ from pm4py import generate_process_tree
11
+ from pm4py import save_vis_petri_net, save_vis_process_tree
12
+ from pm4py.algo.filtering.log.variants import variants_filter
13
+ from pm4py.algo.simulation.tree_generator import algorithm as tree_generator
14
+ from pm4py.algo.simulation.playout.process_tree import algorithm as playout
15
+ from pm4py.objects.conversion.log import converter as log_converter
16
+ from pm4py.objects.log.exporter.xes import exporter as xes_exporter
17
+ from pm4py.objects.log.importer.xes import importer as xes_importer
18
+ from pm4py.objects.log.util import dataframe_utils
19
+ from pm4py.sim import play_out
20
+
21
+ import matplotlib.image as mpimg
22
+ import os
23
+ import pandas as pd
24
+ import streamlit as st
25
+
26
+ OUTPUT_PATH = "output"
27
+ SAMPLE_EVENTS = 500
28
+
29
+ @st.cache(allow_output_mutation=True)
30
+ def load_from_xes(uploaded_file):
31
+ bytes_data = uploaded_file.getvalue()
32
+ log1 = xes_importer.deserialize(bytes_data)
33
+ get_stats(log1)
34
+ return log1
35
+
36
+ @st.cache
37
+ def load_from_csv(uploaded_file, sep):
38
+ if uploaded_file is not None:
39
+ df = pd.read_csv(uploaded_file, sep=sep, index_col=False)
40
+ return df
41
+
42
+ def get_stats(log, save=True):
43
+ """Returns the statistics of an event log."""
44
+ num_traces = len(log)
45
+ num_events = sum([len(c) for c in log])
46
+ num_utraces = len(variants_filter.get_variants(log))
47
+ if save:
48
+ st.session_state["num_traces"] = num_traces
49
+ st.session_state["num_events"] = num_events
50
+ st.session_state["num_utraces"] = num_utraces
51
+ return num_utraces, num_traces, num_events
52
+
53
+ #@st.cache
54
+ def df_to_log(df, case_id, activity, timestamp):
55
+ df.rename(columns={case_id: 'case:concept:name',
56
+ activity: 'concept:name',
57
+ timestamp: "time:timestamp"}, inplace=True)
58
+ temp = dataframe_utils.convert_timestamp_columns_in_df(df)
59
+ #temp = temp.sort_values(timestamp)
60
+ log = log_converter.apply(temp)
61
+ return log, 'concept:name', "time:timestamp"
62
+
63
+ def read_uploaded_file(uploaded_file):
64
+ extension = uploaded_file.name.split('.')[-1]
65
+ log_name = uploaded_file.name.split('.')[-2]
66
+
67
+ st.sidebar.write("Loaded ", extension.upper(), '-File: ', uploaded_file.name)
68
+ if extension == "xes":
69
+ event_log = load_from_xes(uploaded_file)
70
+ log_columns = [*list(event_log[0][0].keys())]
71
+ convert_button = False
72
+ case_id = "case:concept:name"
73
+ activity = "concept:name"
74
+ timestamp = "time:timestamp"
75
+ default_act_id = log_columns.index("concept:name")
76
+ default_tst_id = log_columns.index("time:timestamp")
77
+
78
+ event_df = log_converter.apply(event_log, variant=log_converter.Variants.TO_DATA_FRAME)
79
+ df_path = OUTPUT_PATH+"/"+log_name+".csv"
80
+ event_df.to_csv(df_path, sep =";", index=False)
81
+ return event_log, event_df, case_id, activity
82
+
83
+ elif extension == "csv":
84
+ sep = st.sidebar.text_input("Columns separator", ";")
85
+ event_df = load_from_csv(uploaded_file, sep)
86
+ old_df = deepcopy(event_df)
87
+ log_columns = event_df.columns
88
+
89
+ case_id = st.sidebar.selectbox("Choose 'case' column:", log_columns)
90
+ activity = st.sidebar.selectbox("Choose 'activity' column:", log_columns, index=0)
91
+ timestamp = st.sidebar.selectbox("Choose 'timestamp' column:", log_columns, index=0)
92
+
93
+ convert_button = st.sidebar.button('Confirm selection')
94
+ if convert_button:
95
+ temp = deepcopy(event_df)
96
+ event_log, activity, timestamp = df_to_log(temp, case_id, activity, timestamp)
97
+ #xes_exporter.apply(event_log, INPUT_XES)
98
+ log_columns = [*list(event_log[0][0].keys())]
99
+ st.session_state['log'] = event_log
100
+ return event_log, event_df, case_id, activity
101
+
102
+ def sample_log_traces(complete_log, sample_size):
103
+ '''
104
+ Samples random traces out of logs.
105
+ So that number of events is slightly over SAMPLE_SIZE.
106
+ :param complete_log: Log extracted from xes
107
+ '''
108
+
109
+ log_traces = variants_filter.get_variants(complete_log)
110
+ keys = list(log_traces.keys())
111
+ sample_traces = {}
112
+ num_evs = 0
113
+ while num_evs < sample_size:
114
+ if len(keys) == 0:
115
+ break
116
+ random_trace = keys.pop()
117
+ sample_traces[random_trace] = log_traces[random_trace]
118
+ evs = sum([len(case_id) for case_id in sample_traces[random_trace]])
119
+ num_evs += evs
120
+ log1 = variants_filter.apply(complete_log, sample_traces)
121
+ return log1
122
+
123
+ def show_process_petrinet(event_log, filter_info, OUTPUT_PATH):
124
+ OUTPUT_PLOT = f"{OUTPUT_PATH}_{filter_info}".replace(":","").replace(".","")+".png" # OUTPUT_PATH is OUTPUT_PATH+INPUT_FILE
125
+
126
+ try:
127
+ fig_pt = mpimg.imread(OUTPUT_PLOT)
128
+ st.write("Loaded from memory")
129
+ except FileNotFoundError:
130
+ net, im, fm = inductive_miner(event_log)
131
+ # parameters={heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99,
132
+ # pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"})
133
+ #parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"}
134
+ save_vis_petri_net(net, im, fm, OUTPUT_PLOT)
135
+ st.write("Saved in: ", OUTPUT_PLOT)
136
+ fig_pt = mpimg.imread(OUTPUT_PLOT)
137
+ st.image(fig_pt)
138
+
139
+ def show_loaded_event_log(event_log, event_df):
140
+ get_stats(event_log)
141
+ st.write("### Loaded event-log")
142
+ col1, col2 = st.columns(2)
143
+ with col2:
144
+ st.dataframe(event_df)
145
+ with col1:
146
+ show_process_petrinet(event_log, None, OUTPUT_PATH+"running-example")
147
+
148
+ def extract_meta_features(log, log_name):
149
+ mtf_cols = ["log", "n_traces", "n_unique_traces", "ratio_unique_traces_per_trace", "n_events", "trace_len_min", "trace_len_max",
150
+ "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1",
151
+ "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean",
152
+ "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1",
153
+ "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7",
154
+ "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist",
155
+ "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants",
156
+ "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence",
157
+ "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median",
158
+ "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness",
159
+ "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean",
160
+ "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3",
161
+ "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min",
162
+ "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance",
163
+ "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "entropy_trace",
164
+ "entropy_prefix", "entropy_global_block", "entropy_lempel_ziv", "entropy_k_block_diff_1", "entropy_k_block_diff_3",
165
+ "entropy_k_block_diff_5", "entropy_k_block_ratio_1", "entropy_k_block_ratio_3", "entropy_k_block_ratio_5", "entropy_knn_3",
166
+ "entropy_knn_5", "entropy_knn_7"]
167
+ features = [log_name]
168
+ features.extend(simple_stats(log))
169
+ features.extend(trace_length(log))
170
+ features.extend(trace_variant(log))
171
+ features.extend(activities(log))
172
+ features.extend(start_activities(log))
173
+ features.extend(end_activities(log))
174
+ features.extend(entropies(log_name, OUTPUT_PATH))
175
+
176
+ mtf = pd.DataFrame([features], columns=mtf_cols)
177
+
178
+ st.dataframe(mtf)
179
+ return mtf
180
+
181
+ def generate_pt(mtf):
182
+ OUTPUT_PLOT = f"{OUTPUT_PATH}/generated_pt".replace(":","").replace(".","")#+".png" # OUTPUT_PATH is OUTPUT_PATH+INPUT_FILE
183
+
184
+ st.write("### PT Gen configurations")
185
+ col1, col2, col3, col4, col5, col6 = st.columns(6)
186
+ with col1:
187
+ param_mode = st.text_input('Mode', str(round(mtf['activities_median'].iat[0]))) #?
188
+ st.write("Sum of probabilities must be one")
189
+ with col2:
190
+ param_min = st.text_input('Min', str(mtf['activities_min'].iat[0]))
191
+ param_seq = st.text_input('Probability Sequence', 0.25)
192
+ with col3:
193
+ param_max = st.text_input('Max', str(mtf['activities_max'].iat[0]))
194
+ param_cho = st.text_input('Probability Choice (XOR)', 0.25)
195
+ with col4:
196
+ param_nmo = st.text_input('Number of models', 1)
197
+ param_par = st.text_input('Probability Parallel', 0.25)
198
+ with col5:
199
+ param_dup = st.text_input('Duplicates', 0)
200
+ param_lop = st.text_input('Probability Loop', 0.25)
201
+ with col6:
202
+ param_sil = st.text_input('Silent', 0.2)
203
+ param_or = st.text_input('Probability Or', 0.0)
204
+
205
+ PT_PARAMS = {tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MODE: round(float(param_mode)), #most frequent number of visible activities
206
+ tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MIN: int(param_min), #minimum number of visible activities
207
+ tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MAX: int(param_max), #maximum number of visible activities
208
+ tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.SEQUENCE: float(param_seq), #probability to add a sequence operator to tree
209
+ tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.CHOICE: float(param_cho), #probability to add a choice (XOR) operator to tree
210
+ tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.PARALLEL: float(param_par), #probability to add a parallel operator to tree
211
+ tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.LOOP: float(param_lop), #probability to add a loop operator to tree
212
+ tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.OR: float(param_or), #probability to add an or operator to tree
213
+ tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.SILENT: float(param_sil), #probability to add silent activity to a choice or loop operator
214
+ tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.DUPLICATE: int(param_dup), #probability to duplicate an activity label
215
+ tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.NO_MODELS: int(param_nmo)} #number of trees to generate from model population
216
+
217
+ process_tree = generate_process_tree(parameters=PT_PARAMS)
218
+ save_vis_process_tree(process_tree, OUTPUT_PLOT+"_tree.png")
219
+
220
+ st.write("### Playout configurations")
221
+
222
+ param_ntraces = st.text_input('Number of traces', str(mtf['n_traces'].iat[0]))
223
+ PO_PARAMS = {playout.Variants.BASIC_PLAYOUT.value.Parameters.NO_TRACES : int(param_ntraces)}
224
+
225
+ ptgen_log = play_out(process_tree, parameters=PO_PARAMS)
226
+
227
+ net, im, fm = inductive_miner(ptgen_log)
228
+ save_vis_petri_net(net, im, fm, OUTPUT_PLOT+".png")
229
+ st.write("Saved in: ", OUTPUT_PLOT)
230
+ fig_pt_net = mpimg.imread(OUTPUT_PLOT+".png")
231
+ fig_pt_tree = mpimg.imread(OUTPUT_PLOT+"_tree.png")
232
+
233
+ fcol1, fcol2 = st.columns(2)
234
+ with fcol1:
235
+ st.image(fig_pt_tree)
236
+ with fcol2:
237
+ st.image(fig_pt_net)
238
+ extract_meta_features(ptgen_log, "gen_pt")
239
+
240
+
241
+ if __name__ == '__main__':
242
+ st.set_page_config(layout='wide')
243
+ """
244
+ # Event Log Generator
245
+ """
246
+ start_options = ['Event-Log', 'Meta-features']
247
+ start_preference = st.sidebar.selectbox("Do you want to start with a log or with metafeatures?", start_options,0)
248
+ #lets_start = st.sidebar.button("Let's start with "+start_preference+'!')
249
+
250
+ if start_preference==start_options[0]:
251
+ st.sidebar.write("Upload a dataset in csv or xes-format:")
252
+ uploaded_file = st.sidebar.file_uploader("Pick a logfile")
253
+
254
+ bar = st.progress(0)
255
+
256
+ os.makedirs(OUTPUT_PATH, exist_ok=True)
257
+ event_log = st.session_state['log'] if "log" in st.session_state else None
258
+ if uploaded_file:
259
+ event_log, event_df, case_id, activity_id = read_uploaded_file(uploaded_file)
260
+ #event_log = deepcopy(event_log)
261
+
262
+ use_sample = st.sidebar.checkbox('Use random sample', True)
263
+ if use_sample:
264
+ sample_size = st.sidebar.text_input('Sample size of approx number of events', str(SAMPLE_EVENTS))
265
+ sample_size = int(sample_size)
266
+
267
+ event_log = sample_log_traces(event_log, sample_size)
268
+ sample_cases = [event_log[i].attributes['concept:name'] for i in range(0, len(event_log))]
269
+ event_df = event_df[event_df[case_id].isin(sample_cases)]
270
+
271
+ show_loaded_event_log(event_log, event_df)
272
+ ext_mtf = extract_meta_features(event_log, "running-example")
273
+ generate_pt(ext_mtf)
274
+
275
+ elif start_preference==start_options[1]:
276
+ LOG_COL = 'log'
277
+ st.sidebar.write("Upload a dataset in csv-format")
278
+ uploaded_file = st.sidebar.file_uploader("Pick a file containing meta-features")
279
+
280
+ bar = st.progress(0)
281
+
282
+ os.makedirs(OUTPUT_PATH, exist_ok=True)
283
+ event_log = st.session_state[LOG_COL] if "log" in st.session_state else None
284
+ if uploaded_file:
285
+ sep = st.sidebar.text_input("Columns separator", ";")
286
+ mtf = load_from_csv(uploaded_file, sep)
287
+ st.dataframe(mtf)
288
+
289
+ log_options = mtf['log'].unique()
290
+ log_preference = st.selectbox("What log should we use for generating a new event-log?", log_options,1)
291
+ mtf_selection = mtf[mtf[LOG_COL]==log_preference]
292
+ generate_pt(mtf_selection)
293
+ st.write("##### Original")
294
+ st.write(mtf_selection)
295
+
execute_grid_experiments.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import multiprocessing
2
+ import os
3
+
4
+ from datetime import datetime as dt
5
+ from tag.utils.io_helpers import sort_files
6
+ from tqdm import tqdm
7
+
8
+ #TODO: Pass i properly
9
+ def multi_experiment_wrapper(config_file, i=0):
10
+ print(f"=========================STARTING EXPERIMENT #{i+1}=======================")
11
+ print(f"INFO: Executing with {config_file}")
12
+ os.system(f"python -W ignore main.py -o config_files/options/baseline.json -a {config_file}")
13
+ print(f"=========================FINISHED EXPERIMENT #{i+1}=======================")
14
+
15
+ if __name__ == '__main__':
16
+ EXPERIMENTS_FOLDER = os.path.join('config_files','algorithm','34_bpic_features')
17
+ EXPERIMENTS_FOLDER = os.path.join('config_files','algorithm','grid_1obj')
18
+ EXPERIMENTS_FOLDER = os.path.join('config_files','algorithm','grid_experiments')
19
+ EXPERIMENTS_FOLDER = os.path.join('config_files','algorithm','test')
20
+ start = dt.now()
21
+
22
+ experiment_list = list(tqdm(sort_files(os.listdir(EXPERIMENTS_FOLDER))))
23
+ experiment_list = [os.path.join(EXPERIMENTS_FOLDER, config_file) for config_file in experiment_list]
24
+ experiment_list = experiment_list[:10]
25
+
26
+ print(f"========================STARTING MULTIPLE EXPERIMENTS=========================")
27
+ print(f"INFO: {EXPERIMENTS_FOLDER} contains config files for {len(experiment_list)}.")
28
+ try:
29
+ num_cores = multiprocessing.cpu_count() if len(
30
+ experiment_list) >= multiprocessing.cpu_count() else len(experiment_list)
31
+ with multiprocessing.Pool(num_cores) as p:
32
+ try:
33
+ print(f"INFO: Multi Experiments starting at {start.strftime('%H:%M:%S')} using {num_cores} cores for {len(experiment_list)} experiments...")
34
+ result = p.map(multi_experiment_wrapper, experiment_list)
35
+ except Exception as e:
36
+ print(e)
37
+ except Exception as e:
38
+ print("pare", e)
39
+
40
+ #for i, config_file in enumerate(experiment_list[:2]):
main.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import config
2
+ import pandas as pd
3
+ from datetime import datetime as dt
4
+ from tag.generator import GenerateEventLogs
5
+ from tag.features import EventLogFeatures
6
+ from tag.analyser import FeatureAnalyser
7
+ from tag.augmentation import InstanceAugmentator
8
+ from tag.benchmark import BenchmarkTest
9
+ from tag.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
10
+ from utils.default_argparse import ArgParser
11
+ from utils.param_keys import *
12
+ from utils.param_keys.run_options import *
13
+
14
+ def run(kwargs:dict, model_paramas_list: list, filename_list:list):
15
+ """
16
+ This function chooses the running option for the program.
17
+ @param kwargs: dict
18
+ contains the running parameters and the event-log file information
19
+ @param model_params_list: list
20
+ contains a list of model parameters, which are used to analyse this different models.
21
+ @param filename_list: list
22
+ contains the list of the filenames to load multiple event-logs
23
+ @return:
24
+ """
25
+ params = kwargs[PARAMS]
26
+ run_option = params[RUN_OPTION]
27
+ ft = EventLogFeatures(None)
28
+ augmented_ft = InstanceAugmentator()
29
+ gen = pd.DataFrame(columns=['log'])
30
+
31
+ if run_option == BASELINE:
32
+ for model_params in model_params_list:
33
+ if model_params.get(PIPELINE_STEP) == 'instance_augmentation':
34
+ augmented_ft = InstanceAugmentator(aug_params=model_params, samples=ft.feat)
35
+ AugmentationPlotter(augmented_ft, model_params)
36
+ elif model_params.get(PIPELINE_STEP) == 'event_logs_generation':
37
+ gen = pd.DataFrame(GenerateEventLogs(model_params).log_config)
38
+ #gen = pd.read_csv("output/features/generated/grid_2objectives_enseef_enve/2_enseef_enve_feat.csv")
39
+ GenerationPlotter(gen, model_params, output_path="output/plots")
40
+ elif model_params.get(PIPELINE_STEP) == 'benchmark_test':
41
+ benchmark = BenchmarkTest(model_params, event_logs=gen['log'])
42
+ # BenchmarkPlotter(benchmark.features, output_path="output/plots")
43
+ elif model_params.get(PIPELINE_STEP) == 'feature_extraction':
44
+ ft = EventLogFeatures(**kwargs, logs=gen['log'], ft_params=model_params)
45
+ FeaturesPlotter(ft.feat, model_params)
46
+ elif model_params.get(PIPELINE_STEP) == "evaluation_plotter":
47
+ GenerationPlotter(gen, model_params, output_path=model_params['output_path'], input_path=model_params['input_path'])
48
+
49
+ elif run_option == COMPARE:
50
+ if params[N_COMPONENTS] != 2:
51
+ raise ValueError(f'The parameter `{N_COMPONENTS}` has to be 2, but it\'s {params[N_COMPONENTS]}.')
52
+ ft = EventLogFeatures(**kwargs)
53
+ FeatureAnalyser(ft, params).compare(model_params_list)
54
+ else:
55
+ raise InvalidRunningOptionError(f'The run_option: `{run_option}` in the (json) configuration '
56
+ f'does not exists or it is not a loading option.\n')
57
+
58
+
59
+ if __name__=='__main__':
60
+ start_tag = dt.now()
61
+ print(f'INFO: TAG starting {start_tag}')
62
+
63
+ args = ArgParser().parse('GEDI main')
64
+ run_params = config.get_run_params(args.run_params_json)
65
+ filename_list, kwargs = config.get_files_and_kwargs(run_params)
66
+
67
+ if args.result_load_files is None:
68
+ model_params_list = config.get_model_params_list(args.alg_params_json)
69
+ run(kwargs, model_params_list, filename_list)
70
+ else:
71
+ load(args.result_load_files, kwargs)
72
+
73
+ print(f'SUCCESS: TAG took {dt.now()-start_tag} sec.')
notebooks/.ipynb_checkpoints/augmentation-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/benchmarking_process_discovery-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/bpic_generability_pdm-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/data_exploration-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/experiment_generator-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/feature_distributions-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/feature_exploration-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/feature_performance_similarity-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/feature_selection-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/feature_variance-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/gedi_representativeness-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/grid_objectives-checkpoint.ipynb ADDED
@@ -0,0 +1,376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 9,
6
+ "id": "e5aa7223",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd\n",
11
+ "import numpy as np"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 10,
17
+ "id": "dfd1a302",
18
+ "metadata": {},
19
+ "outputs": [],
20
+ "source": [
21
+ "df = pd.DataFrame(columns=[\"log\",\"ratio_top_20_variants\", \"normalized_sequence_entropy_linear_forgetting\"]) "
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 28,
27
+ "id": "218946b7",
28
+ "metadata": {},
29
+ "outputs": [],
30
+ "source": [
31
+ "k=0\n",
32
+ "for i in np.arange(0.2, 1.1,0.2):\n",
33
+ " for j in np.arange(0,0.55,0.1):\n",
34
+ " k+=1\n",
35
+ " new_entry = pd.Series({'log':f\"objective_{k}\", \"ratio_top_20_variants\":round(i,1),\n",
36
+ " \"normalized_sequence_entropy_linear_forgetting\":round(j,1)})\n",
37
+ " df = pd.concat([\n",
38
+ " df, \n",
39
+ " pd.DataFrame([new_entry], columns=new_entry.index)]\n",
40
+ " ).reset_index(drop=True)\n",
41
+ " "
42
+ ]
43
+ },
44
+ {
45
+ "cell_type": "code",
46
+ "execution_count": 31,
47
+ "id": "b1e3bb5a",
48
+ "metadata": {},
49
+ "outputs": [],
50
+ "source": [
51
+ "df.to_csv(\"../data/grid_objectives.csv\" ,index=False)"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "code",
56
+ "execution_count": 32,
57
+ "id": "5de45389",
58
+ "metadata": {},
59
+ "outputs": [
60
+ {
61
+ "data": {
62
+ "text/html": [
63
+ "<div>\n",
64
+ "<style scoped>\n",
65
+ " .dataframe tbody tr th:only-of-type {\n",
66
+ " vertical-align: middle;\n",
67
+ " }\n",
68
+ "\n",
69
+ " .dataframe tbody tr th {\n",
70
+ " vertical-align: top;\n",
71
+ " }\n",
72
+ "\n",
73
+ " .dataframe thead th {\n",
74
+ " text-align: right;\n",
75
+ " }\n",
76
+ "</style>\n",
77
+ "<table border=\"1\" class=\"dataframe\">\n",
78
+ " <thead>\n",
79
+ " <tr style=\"text-align: right;\">\n",
80
+ " <th></th>\n",
81
+ " <th>log</th>\n",
82
+ " <th>ratio_top_20_variants</th>\n",
83
+ " <th>normalized_sequence_entropy_linear_forgetting</th>\n",
84
+ " </tr>\n",
85
+ " </thead>\n",
86
+ " <tbody>\n",
87
+ " <tr>\n",
88
+ " <th>0</th>\n",
89
+ " <td>objective_1</td>\n",
90
+ " <td>0.2</td>\n",
91
+ " <td>0.0</td>\n",
92
+ " </tr>\n",
93
+ " <tr>\n",
94
+ " <th>1</th>\n",
95
+ " <td>objective_2</td>\n",
96
+ " <td>0.2</td>\n",
97
+ " <td>0.1</td>\n",
98
+ " </tr>\n",
99
+ " <tr>\n",
100
+ " <th>2</th>\n",
101
+ " <td>objective_3</td>\n",
102
+ " <td>0.2</td>\n",
103
+ " <td>0.2</td>\n",
104
+ " </tr>\n",
105
+ " <tr>\n",
106
+ " <th>3</th>\n",
107
+ " <td>objective_4</td>\n",
108
+ " <td>0.2</td>\n",
109
+ " <td>0.3</td>\n",
110
+ " </tr>\n",
111
+ " <tr>\n",
112
+ " <th>4</th>\n",
113
+ " <td>objective_5</td>\n",
114
+ " <td>0.2</td>\n",
115
+ " <td>0.4</td>\n",
116
+ " </tr>\n",
117
+ " <tr>\n",
118
+ " <th>5</th>\n",
119
+ " <td>objective_6</td>\n",
120
+ " <td>0.2</td>\n",
121
+ " <td>0.5</td>\n",
122
+ " </tr>\n",
123
+ " <tr>\n",
124
+ " <th>6</th>\n",
125
+ " <td>objective_7</td>\n",
126
+ " <td>0.4</td>\n",
127
+ " <td>0.0</td>\n",
128
+ " </tr>\n",
129
+ " <tr>\n",
130
+ " <th>7</th>\n",
131
+ " <td>objective_8</td>\n",
132
+ " <td>0.4</td>\n",
133
+ " <td>0.1</td>\n",
134
+ " </tr>\n",
135
+ " <tr>\n",
136
+ " <th>8</th>\n",
137
+ " <td>objective_9</td>\n",
138
+ " <td>0.4</td>\n",
139
+ " <td>0.2</td>\n",
140
+ " </tr>\n",
141
+ " <tr>\n",
142
+ " <th>9</th>\n",
143
+ " <td>objective_10</td>\n",
144
+ " <td>0.4</td>\n",
145
+ " <td>0.3</td>\n",
146
+ " </tr>\n",
147
+ " <tr>\n",
148
+ " <th>10</th>\n",
149
+ " <td>objective_11</td>\n",
150
+ " <td>0.4</td>\n",
151
+ " <td>0.4</td>\n",
152
+ " </tr>\n",
153
+ " <tr>\n",
154
+ " <th>11</th>\n",
155
+ " <td>objective_12</td>\n",
156
+ " <td>0.4</td>\n",
157
+ " <td>0.5</td>\n",
158
+ " </tr>\n",
159
+ " <tr>\n",
160
+ " <th>12</th>\n",
161
+ " <td>objective_13</td>\n",
162
+ " <td>0.6</td>\n",
163
+ " <td>0.0</td>\n",
164
+ " </tr>\n",
165
+ " <tr>\n",
166
+ " <th>13</th>\n",
167
+ " <td>objective_14</td>\n",
168
+ " <td>0.6</td>\n",
169
+ " <td>0.1</td>\n",
170
+ " </tr>\n",
171
+ " <tr>\n",
172
+ " <th>14</th>\n",
173
+ " <td>objective_15</td>\n",
174
+ " <td>0.6</td>\n",
175
+ " <td>0.2</td>\n",
176
+ " </tr>\n",
177
+ " <tr>\n",
178
+ " <th>15</th>\n",
179
+ " <td>objective_16</td>\n",
180
+ " <td>0.6</td>\n",
181
+ " <td>0.3</td>\n",
182
+ " </tr>\n",
183
+ " <tr>\n",
184
+ " <th>16</th>\n",
185
+ " <td>objective_17</td>\n",
186
+ " <td>0.6</td>\n",
187
+ " <td>0.4</td>\n",
188
+ " </tr>\n",
189
+ " <tr>\n",
190
+ " <th>17</th>\n",
191
+ " <td>objective_18</td>\n",
192
+ " <td>0.6</td>\n",
193
+ " <td>0.5</td>\n",
194
+ " </tr>\n",
195
+ " <tr>\n",
196
+ " <th>18</th>\n",
197
+ " <td>objective_19</td>\n",
198
+ " <td>0.8</td>\n",
199
+ " <td>0.0</td>\n",
200
+ " </tr>\n",
201
+ " <tr>\n",
202
+ " <th>19</th>\n",
203
+ " <td>objective_20</td>\n",
204
+ " <td>0.8</td>\n",
205
+ " <td>0.1</td>\n",
206
+ " </tr>\n",
207
+ " <tr>\n",
208
+ " <th>20</th>\n",
209
+ " <td>objective_21</td>\n",
210
+ " <td>0.8</td>\n",
211
+ " <td>0.2</td>\n",
212
+ " </tr>\n",
213
+ " <tr>\n",
214
+ " <th>21</th>\n",
215
+ " <td>objective_22</td>\n",
216
+ " <td>0.8</td>\n",
217
+ " <td>0.3</td>\n",
218
+ " </tr>\n",
219
+ " <tr>\n",
220
+ " <th>22</th>\n",
221
+ " <td>objective_23</td>\n",
222
+ " <td>0.8</td>\n",
223
+ " <td>0.4</td>\n",
224
+ " </tr>\n",
225
+ " <tr>\n",
226
+ " <th>23</th>\n",
227
+ " <td>objective_24</td>\n",
228
+ " <td>0.8</td>\n",
229
+ " <td>0.5</td>\n",
230
+ " </tr>\n",
231
+ " <tr>\n",
232
+ " <th>24</th>\n",
233
+ " <td>objective_25</td>\n",
234
+ " <td>1.0</td>\n",
235
+ " <td>0.0</td>\n",
236
+ " </tr>\n",
237
+ " <tr>\n",
238
+ " <th>25</th>\n",
239
+ " <td>objective_26</td>\n",
240
+ " <td>1.0</td>\n",
241
+ " <td>0.1</td>\n",
242
+ " </tr>\n",
243
+ " <tr>\n",
244
+ " <th>26</th>\n",
245
+ " <td>objective_27</td>\n",
246
+ " <td>1.0</td>\n",
247
+ " <td>0.2</td>\n",
248
+ " </tr>\n",
249
+ " <tr>\n",
250
+ " <th>27</th>\n",
251
+ " <td>objective_28</td>\n",
252
+ " <td>1.0</td>\n",
253
+ " <td>0.3</td>\n",
254
+ " </tr>\n",
255
+ " <tr>\n",
256
+ " <th>28</th>\n",
257
+ " <td>objective_29</td>\n",
258
+ " <td>1.0</td>\n",
259
+ " <td>0.4</td>\n",
260
+ " </tr>\n",
261
+ " <tr>\n",
262
+ " <th>29</th>\n",
263
+ " <td>objective_30</td>\n",
264
+ " <td>1.0</td>\n",
265
+ " <td>0.5</td>\n",
266
+ " </tr>\n",
267
+ " </tbody>\n",
268
+ "</table>\n",
269
+ "</div>"
270
+ ],
271
+ "text/plain": [
272
+ " log ratio_top_20_variants \n",
273
+ "0 objective_1 0.2 \\\n",
274
+ "1 objective_2 0.2 \n",
275
+ "2 objective_3 0.2 \n",
276
+ "3 objective_4 0.2 \n",
277
+ "4 objective_5 0.2 \n",
278
+ "5 objective_6 0.2 \n",
279
+ "6 objective_7 0.4 \n",
280
+ "7 objective_8 0.4 \n",
281
+ "8 objective_9 0.4 \n",
282
+ "9 objective_10 0.4 \n",
283
+ "10 objective_11 0.4 \n",
284
+ "11 objective_12 0.4 \n",
285
+ "12 objective_13 0.6 \n",
286
+ "13 objective_14 0.6 \n",
287
+ "14 objective_15 0.6 \n",
288
+ "15 objective_16 0.6 \n",
289
+ "16 objective_17 0.6 \n",
290
+ "17 objective_18 0.6 \n",
291
+ "18 objective_19 0.8 \n",
292
+ "19 objective_20 0.8 \n",
293
+ "20 objective_21 0.8 \n",
294
+ "21 objective_22 0.8 \n",
295
+ "22 objective_23 0.8 \n",
296
+ "23 objective_24 0.8 \n",
297
+ "24 objective_25 1.0 \n",
298
+ "25 objective_26 1.0 \n",
299
+ "26 objective_27 1.0 \n",
300
+ "27 objective_28 1.0 \n",
301
+ "28 objective_29 1.0 \n",
302
+ "29 objective_30 1.0 \n",
303
+ "\n",
304
+ " normalized_sequence_entropy_linear_forgetting \n",
305
+ "0 0.0 \n",
306
+ "1 0.1 \n",
307
+ "2 0.2 \n",
308
+ "3 0.3 \n",
309
+ "4 0.4 \n",
310
+ "5 0.5 \n",
311
+ "6 0.0 \n",
312
+ "7 0.1 \n",
313
+ "8 0.2 \n",
314
+ "9 0.3 \n",
315
+ "10 0.4 \n",
316
+ "11 0.5 \n",
317
+ "12 0.0 \n",
318
+ "13 0.1 \n",
319
+ "14 0.2 \n",
320
+ "15 0.3 \n",
321
+ "16 0.4 \n",
322
+ "17 0.5 \n",
323
+ "18 0.0 \n",
324
+ "19 0.1 \n",
325
+ "20 0.2 \n",
326
+ "21 0.3 \n",
327
+ "22 0.4 \n",
328
+ "23 0.5 \n",
329
+ "24 0.0 \n",
330
+ "25 0.1 \n",
331
+ "26 0.2 \n",
332
+ "27 0.3 \n",
333
+ "28 0.4 \n",
334
+ "29 0.5 "
335
+ ]
336
+ },
337
+ "execution_count": 32,
338
+ "metadata": {},
339
+ "output_type": "execute_result"
340
+ }
341
+ ],
342
+ "source": [
343
+ "df"
344
+ ]
345
+ },
346
+ {
347
+ "cell_type": "code",
348
+ "execution_count": null,
349
+ "id": "d726a5ae",
350
+ "metadata": {},
351
+ "outputs": [],
352
+ "source": []
353
+ }
354
+ ],
355
+ "metadata": {
356
+ "kernelspec": {
357
+ "display_name": "Python 3 (ipykernel)",
358
+ "language": "python",
359
+ "name": "python3"
360
+ },
361
+ "language_info": {
362
+ "codemirror_mode": {
363
+ "name": "ipython",
364
+ "version": 3
365
+ },
366
+ "file_extension": ".py",
367
+ "mimetype": "text/x-python",
368
+ "name": "python",
369
+ "nbconvert_exporter": "python",
370
+ "pygments_lexer": "ipython3",
371
+ "version": "3.9.7"
372
+ }
373
+ },
374
+ "nbformat": 4,
375
+ "nbformat_minor": 5
376
+ }
notebooks/.ipynb_checkpoints/oversampling-checkpoint.ipynb ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [],
3
+ "metadata": {},
4
+ "nbformat": 4,
5
+ "nbformat_minor": 5
6
+ }
notebooks/.ipynb_checkpoints/performance_feature_correlation-checkpoint.ipynb ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [],
3
+ "metadata": {},
4
+ "nbformat": 4,
5
+ "nbformat_minor": 5
6
+ }
notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/statistics_tasks_to_datasets-checkpoint.ipynb ADDED
@@ -0,0 +1,818 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 4,
6
+ "id": "4827785f",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "data": {
11
+ "text/html": [
12
+ "<div>\n",
13
+ "<style scoped>\n",
14
+ " .dataframe tbody tr th:only-of-type {\n",
15
+ " vertical-align: middle;\n",
16
+ " }\n",
17
+ "\n",
18
+ " .dataframe tbody tr th {\n",
19
+ " vertical-align: top;\n",
20
+ " }\n",
21
+ "\n",
22
+ " .dataframe thead th {\n",
23
+ " text-align: right;\n",
24
+ " }\n",
25
+ "</style>\n",
26
+ "<table border=\"1\" class=\"dataframe\">\n",
27
+ " <thead>\n",
28
+ " <tr style=\"text-align: right;\">\n",
29
+ " <th></th>\n",
30
+ " <th>Name</th>\n",
31
+ " <th>Short description</th>\n",
32
+ " <th>data link</th>\n",
33
+ " <th>challenge link</th>\n",
34
+ " <th>Citations (Stand Februar 2023)</th>\n",
35
+ " <th>Publications</th>\n",
36
+ " <th>Process Discovery/ Declarative</th>\n",
37
+ " <th>Conformance Checking / Alignment / Replay</th>\n",
38
+ " <th>Online / Streaming / Realtime</th>\n",
39
+ " <th>Performance (Analysis) / Temporal / Time</th>\n",
40
+ " <th>Predict(ive)/ Monitoring/ Prescriptive</th>\n",
41
+ " <th>Trace clustering / Clustering</th>\n",
42
+ " <th>Preprocessing / Event Abstraction / Event Data Correlation</th>\n",
43
+ " <th>Further keywords:</th>\n",
44
+ " </tr>\n",
45
+ " </thead>\n",
46
+ " <tbody>\n",
47
+ " <tr>\n",
48
+ " <th>0</th>\n",
49
+ " <td>Sepsis Cases - Event Log</td>\n",
50
+ " <td>This real-life event log contains events of se...</td>\n",
51
+ " <td>https://data.4tu.nl/articles/dataset/Sepsis_Ca...</td>\n",
52
+ " <td>https://data.4tu.nl/articles/dataset/Sepsis_Ca...</td>\n",
53
+ " <td>61</td>\n",
54
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
55
+ " <td>17</td>\n",
56
+ " <td>7</td>\n",
57
+ " <td>4</td>\n",
58
+ " <td>1</td>\n",
59
+ " <td>8</td>\n",
60
+ " <td>2</td>\n",
61
+ " <td>2</td>\n",
62
+ " <td>(machine) learning, (online process) monitorin...</td>\n",
63
+ " </tr>\n",
64
+ " <tr>\n",
65
+ " <th>1</th>\n",
66
+ " <td>BPI 2017 - Offer Log</td>\n",
67
+ " <td>Contains data from a financial institute inclu...</td>\n",
68
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
69
+ " <td>https://www.win.tue.nl/bpi/doku.php?id=2017:ch...</td>\n",
70
+ " <td>4</td>\n",
71
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
72
+ " <td>1</td>\n",
73
+ " <td>0</td>\n",
74
+ " <td>0</td>\n",
75
+ " <td>1</td>\n",
76
+ " <td>1</td>\n",
77
+ " <td>0</td>\n",
78
+ " <td>0</td>\n",
79
+ " <td>(machine) learning, cloud computing</td>\n",
80
+ " </tr>\n",
81
+ " <tr>\n",
82
+ " <th>2</th>\n",
83
+ " <td>Road Traffic Fine Management Process (not BPI)</td>\n",
84
+ " <td>A real-life event log taken from an informatio...</td>\n",
85
+ " <td>https://data.4tu.nl/articles/dataset/Road_Traf...</td>\n",
86
+ " <td>NaN</td>\n",
87
+ " <td>95</td>\n",
88
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
89
+ " <td>32</td>\n",
90
+ " <td>9</td>\n",
91
+ " <td>4</td>\n",
92
+ " <td>8</td>\n",
93
+ " <td>15</td>\n",
94
+ " <td>1</td>\n",
95
+ " <td>2</td>\n",
96
+ " <td>alarm-based prescriptive process monitoring, b...</td>\n",
97
+ " </tr>\n",
98
+ " <tr>\n",
99
+ " <th>3</th>\n",
100
+ " <td>BPI 2011</td>\n",
101
+ " <td>Contains data from from a Dutch Academic Hospi...</td>\n",
102
+ " <td>https://data.4tu.nl/articles/dataset/Real-life...</td>\n",
103
+ " <td>https://www.win.tue.nl/bpi/doku.php?id=2011:ch...</td>\n",
104
+ " <td>57</td>\n",
105
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
106
+ " <td>13</td>\n",
107
+ " <td>1</td>\n",
108
+ " <td>3</td>\n",
109
+ " <td>4</td>\n",
110
+ " <td>12</td>\n",
111
+ " <td>4</td>\n",
112
+ " <td>1</td>\n",
113
+ " <td>(compliance) monitoring, (machine) learning, d...</td>\n",
114
+ " </tr>\n",
115
+ " <tr>\n",
116
+ " <th>4</th>\n",
117
+ " <td>BPI 2012</td>\n",
118
+ " <td>Contains the event log of an application proce...</td>\n",
119
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
120
+ " <td>https://www.win.tue.nl/bpi/doku.php?id=2012:ch...</td>\n",
121
+ " <td>151</td>\n",
122
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
123
+ " <td>40</td>\n",
124
+ " <td>15</td>\n",
125
+ " <td>4</td>\n",
126
+ " <td>13</td>\n",
127
+ " <td>46</td>\n",
128
+ " <td>0</td>\n",
129
+ " <td>1</td>\n",
130
+ " <td>(in)frequent patterns in process models, (mach...</td>\n",
131
+ " </tr>\n",
132
+ " <tr>\n",
133
+ " <th>5</th>\n",
134
+ " <td>BPI 2013 - Open Problems</td>\n",
135
+ " <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
136
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
137
+ " <td>https://www.win.tue.nl/bpi/2013/challenge.html</td>\n",
138
+ " <td>6</td>\n",
139
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
140
+ " <td>1</td>\n",
141
+ " <td>0</td>\n",
142
+ " <td>0</td>\n",
143
+ " <td>0</td>\n",
144
+ " <td>1</td>\n",
145
+ " <td>0</td>\n",
146
+ " <td>0</td>\n",
147
+ " <td>(in)frequent patterns in process models, (mach...</td>\n",
148
+ " </tr>\n",
149
+ " <tr>\n",
150
+ " <th>6</th>\n",
151
+ " <td>BPI 2013 - Closed Problems</td>\n",
152
+ " <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
153
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
154
+ " <td>https://www.win.tue.nl/bpi/doku.php?id=2013:ch...</td>\n",
155
+ " <td>12</td>\n",
156
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
157
+ " <td>3</td>\n",
158
+ " <td>2</td>\n",
159
+ " <td>1</td>\n",
160
+ " <td>2</td>\n",
161
+ " <td>0</td>\n",
162
+ " <td>0</td>\n",
163
+ " <td>3</td>\n",
164
+ " <td>(in)frequent patterns in process models</td>\n",
165
+ " </tr>\n",
166
+ " <tr>\n",
167
+ " <th>7</th>\n",
168
+ " <td>BPI 2013 - Incidents</td>\n",
169
+ " <td>The log contains events from an incident and p...</td>\n",
170
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
171
+ " <td>https://www.win.tue.nl/bpi/2013/challenge.html</td>\n",
172
+ " <td>36</td>\n",
173
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
174
+ " <td>14</td>\n",
175
+ " <td>5</td>\n",
176
+ " <td>1</td>\n",
177
+ " <td>1</td>\n",
178
+ " <td>7</td>\n",
179
+ " <td>0</td>\n",
180
+ " <td>2</td>\n",
181
+ " <td>(machine) learning, rule mining</td>\n",
182
+ " </tr>\n",
183
+ " <tr>\n",
184
+ " <th>8</th>\n",
185
+ " <td>BPI 2014 - Incident Records</td>\n",
186
+ " <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
187
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
188
+ " <td>https://www.win.tue.nl/bpi/doku.php?id=2014:ch...</td>\n",
189
+ " <td>5</td>\n",
190
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
191
+ " <td>1</td>\n",
192
+ " <td>0</td>\n",
193
+ " <td>0</td>\n",
194
+ " <td>0</td>\n",
195
+ " <td>0</td>\n",
196
+ " <td>0</td>\n",
197
+ " <td>0</td>\n",
198
+ " <td>privacy preservation, security</td>\n",
199
+ " </tr>\n",
200
+ " <tr>\n",
201
+ " <th>9</th>\n",
202
+ " <td>BPI 2014 - Interaction Records</td>\n",
203
+ " <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
204
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
205
+ " <td>https://www.win.tue.nl/bpi/doku.php?id=2014:ch...</td>\n",
206
+ " <td>1</td>\n",
207
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
208
+ " <td>0</td>\n",
209
+ " <td>0</td>\n",
210
+ " <td>0</td>\n",
211
+ " <td>0</td>\n",
212
+ " <td>0</td>\n",
213
+ " <td>0</td>\n",
214
+ " <td>0</td>\n",
215
+ " <td>(machine) learning, hidden Markov models</td>\n",
216
+ " </tr>\n",
217
+ " <tr>\n",
218
+ " <th>10</th>\n",
219
+ " <td>BPI 2015 - Log 3</td>\n",
220
+ " <td>Provided by 5 Dutch municipalities. The data c...</td>\n",
221
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
222
+ " <td>https://www.win.tue.nl/bpi/doku.php?id=2015:ch...</td>\n",
223
+ " <td>1</td>\n",
224
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
225
+ " <td>0</td>\n",
226
+ " <td>0</td>\n",
227
+ " <td>0</td>\n",
228
+ " <td>0</td>\n",
229
+ " <td>1</td>\n",
230
+ " <td>0</td>\n",
231
+ " <td>0</td>\n",
232
+ " <td>specification-driven predictive business proce...</td>\n",
233
+ " </tr>\n",
234
+ " <tr>\n",
235
+ " <th>11</th>\n",
236
+ " <td>BPI 2015 - Log 1</td>\n",
237
+ " <td>Provided by 5 Dutch municipalities. The data c...</td>\n",
238
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
239
+ " <td>https://www.win.tue.nl/bpi/doku.php?id=2015:ch...</td>\n",
240
+ " <td>8</td>\n",
241
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
242
+ " <td>1</td>\n",
243
+ " <td>1</td>\n",
244
+ " <td>0</td>\n",
245
+ " <td>0</td>\n",
246
+ " <td>3</td>\n",
247
+ " <td>0</td>\n",
248
+ " <td>3</td>\n",
249
+ " <td>(machine) learning</td>\n",
250
+ " </tr>\n",
251
+ " <tr>\n",
252
+ " <th>12</th>\n",
253
+ " <td>BPI 2016 - Clicks Logged In</td>\n",
254
+ " <td>Contains clicks of users that are logged in fr...</td>\n",
255
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
256
+ " <td>https://www.win.tue.nl/bpi/doku.php?id=2016:ch...</td>\n",
257
+ " <td>1</td>\n",
258
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
259
+ " <td>1</td>\n",
260
+ " <td>0</td>\n",
261
+ " <td>1</td>\n",
262
+ " <td>0</td>\n",
263
+ " <td>0</td>\n",
264
+ " <td>0</td>\n",
265
+ " <td>0</td>\n",
266
+ " <td>automation</td>\n",
267
+ " </tr>\n",
268
+ " <tr>\n",
269
+ " <th>13</th>\n",
270
+ " <td>BPI 2017 - Application Log</td>\n",
271
+ " <td>Contains data from a financial institute inclu...</td>\n",
272
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
273
+ " <td>https://www.win.tue.nl/bpi/doku.php?id=2017:ch...</td>\n",
274
+ " <td>73</td>\n",
275
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
276
+ " <td>11</td>\n",
277
+ " <td>5</td>\n",
278
+ " <td>2</td>\n",
279
+ " <td>14</td>\n",
280
+ " <td>23</td>\n",
281
+ " <td>1</td>\n",
282
+ " <td>1</td>\n",
283
+ " <td>(machine) learning, alarm-based prescriptive p...</td>\n",
284
+ " </tr>\n",
285
+ " <tr>\n",
286
+ " <th>14</th>\n",
287
+ " <td>BPI 2018</td>\n",
288
+ " <td>The process covers the handling of application...</td>\n",
289
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
290
+ " <td>https://www.win.tue.nl/bpi/doku.php?id=2018:ch...</td>\n",
291
+ " <td>26</td>\n",
292
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
293
+ " <td>7</td>\n",
294
+ " <td>1</td>\n",
295
+ " <td>2</td>\n",
296
+ " <td>0</td>\n",
297
+ " <td>8</td>\n",
298
+ " <td>0</td>\n",
299
+ " <td>2</td>\n",
300
+ " <td>(machine) learning, automation</td>\n",
301
+ " </tr>\n",
302
+ " <tr>\n",
303
+ " <th>15</th>\n",
304
+ " <td>BPI 2020 - Travel Permits</td>\n",
305
+ " <td>Contains 2 years of data from the reimbursemen...</td>\n",
306
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
307
+ " <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
308
+ " <td>2</td>\n",
309
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
310
+ " <td>0</td>\n",
311
+ " <td>0</td>\n",
312
+ " <td>0</td>\n",
313
+ " <td>1</td>\n",
314
+ " <td>0</td>\n",
315
+ " <td>0</td>\n",
316
+ " <td>0</td>\n",
317
+ " <td>stage-based process performance analysis</td>\n",
318
+ " </tr>\n",
319
+ " <tr>\n",
320
+ " <th>16</th>\n",
321
+ " <td>BPI 2019</td>\n",
322
+ " <td>Contains the purchase order handling process o...</td>\n",
323
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
324
+ " <td>https://icpmconference.org/2019/icpm-2019/cont...</td>\n",
325
+ " <td>35</td>\n",
326
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
327
+ " <td>3</td>\n",
328
+ " <td>1</td>\n",
329
+ " <td>6</td>\n",
330
+ " <td>6</td>\n",
331
+ " <td>9</td>\n",
332
+ " <td>4</td>\n",
333
+ " <td>1</td>\n",
334
+ " <td>(online process) monitoring, remaining time pr...</td>\n",
335
+ " </tr>\n",
336
+ " <tr>\n",
337
+ " <th>17</th>\n",
338
+ " <td>BPI 2020 - International Declarations</td>\n",
339
+ " <td>Contains 2 years of data from the reimbursemen...</td>\n",
340
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
341
+ " <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
342
+ " <td>2</td>\n",
343
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
344
+ " <td>0</td>\n",
345
+ " <td>0</td>\n",
346
+ " <td>0</td>\n",
347
+ " <td>1</td>\n",
348
+ " <td>2</td>\n",
349
+ " <td>0</td>\n",
350
+ " <td>0</td>\n",
351
+ " <td>(machine) learning, remaining time prediction</td>\n",
352
+ " </tr>\n",
353
+ " <tr>\n",
354
+ " <th>18</th>\n",
355
+ " <td>BPI 2020 - Domestic Declarations</td>\n",
356
+ " <td>Contains 2 years of data from the reimbursemen...</td>\n",
357
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
358
+ " <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
359
+ " <td>7</td>\n",
360
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
361
+ " <td>0</td>\n",
362
+ " <td>2</td>\n",
363
+ " <td>2</td>\n",
364
+ " <td>2</td>\n",
365
+ " <td>3</td>\n",
366
+ " <td>0</td>\n",
367
+ " <td>0</td>\n",
368
+ " <td>(machine) learning, remaining time prediction</td>\n",
369
+ " </tr>\n",
370
+ " <tr>\n",
371
+ " <th>19</th>\n",
372
+ " <td>BPI 2020 - Prepaid Travel Cost</td>\n",
373
+ " <td>Contains 2 years of data from the reimbursemen...</td>\n",
374
+ " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
375
+ " <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
376
+ " <td>2</td>\n",
377
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
378
+ " <td>0</td>\n",
379
+ " <td>0</td>\n",
380
+ " <td>0</td>\n",
381
+ " <td>0</td>\n",
382
+ " <td>0</td>\n",
383
+ " <td>0</td>\n",
384
+ " <td>0</td>\n",
385
+ " <td>multi-perspective</td>\n",
386
+ " </tr>\n",
387
+ " <tr>\n",
388
+ " <th>20</th>\n",
389
+ " <td>Helpdesk</td>\n",
390
+ " <td>Ticketing management process of the Help desk ...</td>\n",
391
+ " <td>https://data.4tu.nl/articles/dataset/Dataset_b...</td>\n",
392
+ " <td>NaN</td>\n",
393
+ " <td>20</td>\n",
394
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
395
+ " <td>4</td>\n",
396
+ " <td>1</td>\n",
397
+ " <td>3</td>\n",
398
+ " <td>1</td>\n",
399
+ " <td>8</td>\n",
400
+ " <td>0</td>\n",
401
+ " <td>0</td>\n",
402
+ " <td>(machine) learning, drift detection</td>\n",
403
+ " </tr>\n",
404
+ " <tr>\n",
405
+ " <th>21</th>\n",
406
+ " <td>Receipt phase of an environmental permit appli...</td>\n",
407
+ " <td>Data originates from the CoSeLoG project where...</td>\n",
408
+ " <td>https://data.4tu.nl/articles/dataset/Receipt_p...</td>\n",
409
+ " <td>NaN</td>\n",
410
+ " <td>15</td>\n",
411
+ " <td>https://data.4tu.nl/articles/dataset/Receipt_p...</td>\n",
412
+ " <td>-1</td>\n",
413
+ " <td>-1</td>\n",
414
+ " <td>-1</td>\n",
415
+ " <td>-1</td>\n",
416
+ " <td>-1</td>\n",
417
+ " <td>-1</td>\n",
418
+ " <td>-1</td>\n",
419
+ " <td>NaN</td>\n",
420
+ " </tr>\n",
421
+ " <tr>\n",
422
+ " <th>22</th>\n",
423
+ " <td>Environmental permit application process (‘WAB...</td>\n",
424
+ " <td>Data originates from the CoSeLoG project where...</td>\n",
425
+ " <td>https://data.4tu.nl/articles/dataset/Environme...</td>\n",
426
+ " <td>NaN</td>\n",
427
+ " <td>2</td>\n",
428
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
429
+ " <td>0</td>\n",
430
+ " <td>0</td>\n",
431
+ " <td>0</td>\n",
432
+ " <td>0</td>\n",
433
+ " <td>1</td>\n",
434
+ " <td>0</td>\n",
435
+ " <td>0</td>\n",
436
+ " <td>predictions with a-priori knowledge</td>\n",
437
+ " </tr>\n",
438
+ " <tr>\n",
439
+ " <th>23</th>\n",
440
+ " <td>Environmental permit application process (‘WAB...</td>\n",
441
+ " <td>Data originates from the CoSeLoG project where...</td>\n",
442
+ " <td>https://data.4tu.nl/articles/dataset/Environme...</td>\n",
443
+ " <td>NaN</td>\n",
444
+ " <td>2</td>\n",
445
+ " <td>https://app.dimensions.ai/discover/publication...</td>\n",
446
+ " <td>1</td>\n",
447
+ " <td>0</td>\n",
448
+ " <td>0</td>\n",
449
+ " <td>0</td>\n",
450
+ " <td>0</td>\n",
451
+ " <td>0</td>\n",
452
+ " <td>0</td>\n",
453
+ " <td>multidimensional process mining, process cubes</td>\n",
454
+ " </tr>\n",
455
+ " <tr>\n",
456
+ " <th>24</th>\n",
457
+ " <td>NaN</td>\n",
458
+ " <td>NaN</td>\n",
459
+ " <td>NaN</td>\n",
460
+ " <td>NaN</td>\n",
461
+ " <td>NaN</td>\n",
462
+ " <td>NaN</td>\n",
463
+ " <td>NaN</td>\n",
464
+ " <td>NaN</td>\n",
465
+ " <td>NaN</td>\n",
466
+ " <td>NaN</td>\n",
467
+ " <td>NaN</td>\n",
468
+ " <td>NaN</td>\n",
469
+ " <td>NaN</td>\n",
470
+ " <td>NaN</td>\n",
471
+ " </tr>\n",
472
+ " </tbody>\n",
473
+ "</table>\n",
474
+ "</div>"
475
+ ],
476
+ "text/plain": [
477
+ " Name \\\n",
478
+ "0 Sepsis Cases - Event Log \n",
479
+ "1 BPI 2017 - Offer Log \n",
480
+ "2 Road Traffic Fine Management Process (not BPI) \n",
481
+ "3 BPI 2011 \n",
482
+ "4 BPI 2012 \n",
483
+ "5 BPI 2013 - Open Problems \n",
484
+ "6 BPI 2013 - Closed Problems \n",
485
+ "7 BPI 2013 - Incidents \n",
486
+ "8 BPI 2014 - Incident Records \n",
487
+ "9 BPI 2014 - Interaction Records \n",
488
+ "10 BPI 2015 - Log 3 \n",
489
+ "11 BPI 2015 - Log 1 \n",
490
+ "12 BPI 2016 - Clicks Logged In \n",
491
+ "13 BPI 2017 - Application Log \n",
492
+ "14 BPI 2018 \n",
493
+ "15 BPI 2020 - Travel Permits \n",
494
+ "16 BPI 2019 \n",
495
+ "17 BPI 2020 - International Declarations \n",
496
+ "18 BPI 2020 - Domestic Declarations \n",
497
+ "19 BPI 2020 - Prepaid Travel Cost \n",
498
+ "20 Helpdesk \n",
499
+ "21 Receipt phase of an environmental permit appli... \n",
500
+ "22 Environmental permit application process (‘WAB... \n",
501
+ "23 Environmental permit application process (‘WAB... \n",
502
+ "24 NaN \n",
503
+ "\n",
504
+ " Short description \\\n",
505
+ "0 This real-life event log contains events of se... \n",
506
+ "1 Contains data from a financial institute inclu... \n",
507
+ "2 A real-life event log taken from an informatio... \n",
508
+ "3 Contains data from from a Dutch Academic Hospi... \n",
509
+ "4 Contains the event log of an application proce... \n",
510
+ "5 Rabobank Group ICT implemented ITIL processes ... \n",
511
+ "6 Rabobank Group ICT implemented ITIL processes ... \n",
512
+ "7 The log contains events from an incident and p... \n",
513
+ "8 Rabobank Group ICT implemented ITIL processes ... \n",
514
+ "9 Rabobank Group ICT implemented ITIL processes ... \n",
515
+ "10 Provided by 5 Dutch municipalities. The data c... \n",
516
+ "11 Provided by 5 Dutch municipalities. The data c... \n",
517
+ "12 Contains clicks of users that are logged in fr... \n",
518
+ "13 Contains data from a financial institute inclu... \n",
519
+ "14 The process covers the handling of application... \n",
520
+ "15 Contains 2 years of data from the reimbursemen... \n",
521
+ "16 Contains the purchase order handling process o... \n",
522
+ "17 Contains 2 years of data from the reimbursemen... \n",
523
+ "18 Contains 2 years of data from the reimbursemen... \n",
524
+ "19 Contains 2 years of data from the reimbursemen... \n",
525
+ "20 Ticketing management process of the Help desk ... \n",
526
+ "21 Data originates from the CoSeLoG project where... \n",
527
+ "22 Data originates from the CoSeLoG project where... \n",
528
+ "23 Data originates from the CoSeLoG project where... \n",
529
+ "24 NaN \n",
530
+ "\n",
531
+ " data link \\\n",
532
+ "0 https://data.4tu.nl/articles/dataset/Sepsis_Ca... \n",
533
+ "1 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
534
+ "2 https://data.4tu.nl/articles/dataset/Road_Traf... \n",
535
+ "3 https://data.4tu.nl/articles/dataset/Real-life... \n",
536
+ "4 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
537
+ "5 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
538
+ "6 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
539
+ "7 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
540
+ "8 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
541
+ "9 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
542
+ "10 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
543
+ "11 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
544
+ "12 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
545
+ "13 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
546
+ "14 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
547
+ "15 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
548
+ "16 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
549
+ "17 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
550
+ "18 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
551
+ "19 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
552
+ "20 https://data.4tu.nl/articles/dataset/Dataset_b... \n",
553
+ "21 https://data.4tu.nl/articles/dataset/Receipt_p... \n",
554
+ "22 https://data.4tu.nl/articles/dataset/Environme... \n",
555
+ "23 https://data.4tu.nl/articles/dataset/Environme... \n",
556
+ "24 NaN \n",
557
+ "\n",
558
+ " challenge link \\\n",
559
+ "0 https://data.4tu.nl/articles/dataset/Sepsis_Ca... \n",
560
+ "1 https://www.win.tue.nl/bpi/doku.php?id=2017:ch... \n",
561
+ "2 NaN \n",
562
+ "3 https://www.win.tue.nl/bpi/doku.php?id=2011:ch... \n",
563
+ "4 https://www.win.tue.nl/bpi/doku.php?id=2012:ch... \n",
564
+ "5 https://www.win.tue.nl/bpi/2013/challenge.html \n",
565
+ "6 https://www.win.tue.nl/bpi/doku.php?id=2013:ch... \n",
566
+ "7 https://www.win.tue.nl/bpi/2013/challenge.html \n",
567
+ "8 https://www.win.tue.nl/bpi/doku.php?id=2014:ch... \n",
568
+ "9 https://www.win.tue.nl/bpi/doku.php?id=2014:ch... \n",
569
+ "10 https://www.win.tue.nl/bpi/doku.php?id=2015:ch... \n",
570
+ "11 https://www.win.tue.nl/bpi/doku.php?id=2015:ch... \n",
571
+ "12 https://www.win.tue.nl/bpi/doku.php?id=2016:ch... \n",
572
+ "13 https://www.win.tue.nl/bpi/doku.php?id=2017:ch... \n",
573
+ "14 https://www.win.tue.nl/bpi/doku.php?id=2018:ch... \n",
574
+ "15 https://icpmconference.org/2020/bpi-challenge/ \n",
575
+ "16 https://icpmconference.org/2019/icpm-2019/cont... \n",
576
+ "17 https://icpmconference.org/2020/bpi-challenge/ \n",
577
+ "18 https://icpmconference.org/2020/bpi-challenge/ \n",
578
+ "19 https://icpmconference.org/2020/bpi-challenge/ \n",
579
+ "20 NaN \n",
580
+ "21 NaN \n",
581
+ "22 NaN \n",
582
+ "23 NaN \n",
583
+ "24 NaN \n",
584
+ "\n",
585
+ " Citations (Stand Februar 2023) \\\n",
586
+ "0 61 \n",
587
+ "1 4 \n",
588
+ "2 95 \n",
589
+ "3 57 \n",
590
+ "4 151 \n",
591
+ "5 6 \n",
592
+ "6 12 \n",
593
+ "7 36 \n",
594
+ "8 5 \n",
595
+ "9 1 \n",
596
+ "10 1 \n",
597
+ "11 8 \n",
598
+ "12 1 \n",
599
+ "13 73 \n",
600
+ "14 26 \n",
601
+ "15 2 \n",
602
+ "16 35 \n",
603
+ "17 2 \n",
604
+ "18 7 \n",
605
+ "19 2 \n",
606
+ "20 20 \n",
607
+ "21 15 \n",
608
+ "22 2 \n",
609
+ "23 2 \n",
610
+ "24 NaN \n",
611
+ "\n",
612
+ " Publications \\\n",
613
+ "0 https://app.dimensions.ai/discover/publication... \n",
614
+ "1 https://app.dimensions.ai/discover/publication... \n",
615
+ "2 https://app.dimensions.ai/discover/publication... \n",
616
+ "3 https://app.dimensions.ai/discover/publication... \n",
617
+ "4 https://app.dimensions.ai/discover/publication... \n",
618
+ "5 https://app.dimensions.ai/discover/publication... \n",
619
+ "6 https://app.dimensions.ai/discover/publication... \n",
620
+ "7 https://app.dimensions.ai/discover/publication... \n",
621
+ "8 https://app.dimensions.ai/discover/publication... \n",
622
+ "9 https://app.dimensions.ai/discover/publication... \n",
623
+ "10 https://app.dimensions.ai/discover/publication... \n",
624
+ "11 https://app.dimensions.ai/discover/publication... \n",
625
+ "12 https://app.dimensions.ai/discover/publication... \n",
626
+ "13 https://app.dimensions.ai/discover/publication... \n",
627
+ "14 https://app.dimensions.ai/discover/publication... \n",
628
+ "15 https://app.dimensions.ai/discover/publication... \n",
629
+ "16 https://app.dimensions.ai/discover/publication... \n",
630
+ "17 https://app.dimensions.ai/discover/publication... \n",
631
+ "18 https://app.dimensions.ai/discover/publication... \n",
632
+ "19 https://app.dimensions.ai/discover/publication... \n",
633
+ "20 https://app.dimensions.ai/discover/publication... \n",
634
+ "21 https://data.4tu.nl/articles/dataset/Receipt_p... \n",
635
+ "22 https://app.dimensions.ai/discover/publication... \n",
636
+ "23 https://app.dimensions.ai/discover/publication... \n",
637
+ "24 NaN \n",
638
+ "\n",
639
+ " Process Discovery/ Declarative Conformance Checking / Alignment / Replay \\\n",
640
+ "0 17 7 \n",
641
+ "1 1 0 \n",
642
+ "2 32 9 \n",
643
+ "3 13 1 \n",
644
+ "4 40 15 \n",
645
+ "5 1 0 \n",
646
+ "6 3 2 \n",
647
+ "7 14 5 \n",
648
+ "8 1 0 \n",
649
+ "9 0 0 \n",
650
+ "10 0 0 \n",
651
+ "11 1 1 \n",
652
+ "12 1 0 \n",
653
+ "13 11 5 \n",
654
+ "14 7 1 \n",
655
+ "15 0 0 \n",
656
+ "16 3 1 \n",
657
+ "17 0 0 \n",
658
+ "18 0 2 \n",
659
+ "19 0 0 \n",
660
+ "20 4 1 \n",
661
+ "21 -1 -1 \n",
662
+ "22 0 0 \n",
663
+ "23 1 0 \n",
664
+ "24 NaN NaN \n",
665
+ "\n",
666
+ " Online / Streaming / Realtime Performance (Analysis) / Temporal / Time \\\n",
667
+ "0 4 1 \n",
668
+ "1 0 1 \n",
669
+ "2 4 8 \n",
670
+ "3 3 4 \n",
671
+ "4 4 13 \n",
672
+ "5 0 0 \n",
673
+ "6 1 2 \n",
674
+ "7 1 1 \n",
675
+ "8 0 0 \n",
676
+ "9 0 0 \n",
677
+ "10 0 0 \n",
678
+ "11 0 0 \n",
679
+ "12 1 0 \n",
680
+ "13 2 14 \n",
681
+ "14 2 0 \n",
682
+ "15 0 1 \n",
683
+ "16 6 6 \n",
684
+ "17 0 1 \n",
685
+ "18 2 2 \n",
686
+ "19 0 0 \n",
687
+ "20 3 1 \n",
688
+ "21 -1 -1 \n",
689
+ "22 0 0 \n",
690
+ "23 0 0 \n",
691
+ "24 NaN NaN \n",
692
+ "\n",
693
+ " Predict(ive)/ Monitoring/ Prescriptive Trace clustering / Clustering \\\n",
694
+ "0 8 2 \n",
695
+ "1 1 0 \n",
696
+ "2 15 1 \n",
697
+ "3 12 4 \n",
698
+ "4 46 0 \n",
699
+ "5 1 0 \n",
700
+ "6 0 0 \n",
701
+ "7 7 0 \n",
702
+ "8 0 0 \n",
703
+ "9 0 0 \n",
704
+ "10 1 0 \n",
705
+ "11 3 0 \n",
706
+ "12 0 0 \n",
707
+ "13 23 1 \n",
708
+ "14 8 0 \n",
709
+ "15 0 0 \n",
710
+ "16 9 4 \n",
711
+ "17 2 0 \n",
712
+ "18 3 0 \n",
713
+ "19 0 0 \n",
714
+ "20 8 0 \n",
715
+ "21 -1 -1 \n",
716
+ "22 1 0 \n",
717
+ "23 0 0 \n",
718
+ "24 NaN NaN \n",
719
+ "\n",
720
+ " Preprocessing / Event Abstraction / Event Data Correlation \\\n",
721
+ "0 2 \n",
722
+ "1 0 \n",
723
+ "2 2 \n",
724
+ "3 1 \n",
725
+ "4 1 \n",
726
+ "5 0 \n",
727
+ "6 3 \n",
728
+ "7 2 \n",
729
+ "8 0 \n",
730
+ "9 0 \n",
731
+ "10 0 \n",
732
+ "11 3 \n",
733
+ "12 0 \n",
734
+ "13 1 \n",
735
+ "14 2 \n",
736
+ "15 0 \n",
737
+ "16 1 \n",
738
+ "17 0 \n",
739
+ "18 0 \n",
740
+ "19 0 \n",
741
+ "20 0 \n",
742
+ "21 -1 \n",
743
+ "22 0 \n",
744
+ "23 0 \n",
745
+ "24 NaN \n",
746
+ "\n",
747
+ " Further keywords: \n",
748
+ "0 (machine) learning, (online process) monitorin... \n",
749
+ "1 (machine) learning, cloud computing \n",
750
+ "2 alarm-based prescriptive process monitoring, b... \n",
751
+ "3 (compliance) monitoring, (machine) learning, d... \n",
752
+ "4 (in)frequent patterns in process models, (mach... \n",
753
+ "5 (in)frequent patterns in process models, (mach... \n",
754
+ "6 (in)frequent patterns in process models \n",
755
+ "7 (machine) learning, rule mining \n",
756
+ "8 privacy preservation, security \n",
757
+ "9 (machine) learning, hidden Markov models \n",
758
+ "10 specification-driven predictive business proce... \n",
759
+ "11 (machine) learning \n",
760
+ "12 automation \n",
761
+ "13 (machine) learning, alarm-based prescriptive p... \n",
762
+ "14 (machine) learning, automation \n",
763
+ "15 stage-based process performance analysis \n",
764
+ "16 (online process) monitoring, remaining time pr... \n",
765
+ "17 (machine) learning, remaining time prediction \n",
766
+ "18 (machine) learning, remaining time prediction \n",
767
+ "19 multi-perspective \n",
768
+ "20 (machine) learning, drift detection \n",
769
+ "21 NaN \n",
770
+ "22 predictions with a-priori knowledge \n",
771
+ "23 multidimensional process mining, process cubes \n",
772
+ "24 NaN "
773
+ ]
774
+ },
775
+ "execution_count": 4,
776
+ "metadata": {},
777
+ "output_type": "execute_result"
778
+ }
779
+ ],
780
+ "source": [
781
+ "#import pm4py\n",
782
+ "import pandas as pd\n",
783
+ "INPUT_PATH = \"../data/mappings.csv\"\n",
784
+ "df = pd.read_csv(INPUT_PATH, sep = \";\", dtype = \"unicode\")\n",
785
+ "df"
786
+ ]
787
+ },
788
+ {
789
+ "cell_type": "code",
790
+ "execution_count": null,
791
+ "id": "04a97f37",
792
+ "metadata": {},
793
+ "outputs": [],
794
+ "source": []
795
+ }
796
+ ],
797
+ "metadata": {
798
+ "kernelspec": {
799
+ "display_name": "Python 3 (ipykernel)",
800
+ "language": "python",
801
+ "name": "python3"
802
+ },
803
+ "language_info": {
804
+ "codemirror_mode": {
805
+ "name": "ipython",
806
+ "version": 3
807
+ },
808
+ "file_extension": ".py",
809
+ "mimetype": "text/x-python",
810
+ "name": "python",
811
+ "nbconvert_exporter": "python",
812
+ "pygments_lexer": "ipython3",
813
+ "version": "3.10.7"
814
+ }
815
+ },
816
+ "nbformat": 4,
817
+ "nbformat_minor": 5
818
+ }
notebooks/.ipynb_checkpoints/test_feed-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/benchmarking_process_discovery.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/bpic_generability_pdm.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/experiment_generator.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/feature_distributions.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/feature_exploration.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/feature_performance_similarity.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/feature_selection.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/gedi_representativeness.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/configspace.json ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hyperparameters": [
3
+ {
4
+ "name": "choice",
5
+ "type": "uniform_float",
6
+ "log": false,
7
+ "lower": 0.01,
8
+ "upper": 1.0,
9
+ "default": 0.505,
10
+ "q": null
11
+ },
12
+ {
13
+ "name": "duplicate",
14
+ "type": "constant",
15
+ "value": 0
16
+ },
17
+ {
18
+ "name": "loop",
19
+ "type": "uniform_float",
20
+ "log": false,
21
+ "lower": 0.01,
22
+ "upper": 1.0,
23
+ "default": 0.505,
24
+ "q": null
25
+ },
26
+ {
27
+ "name": "lt_dependency",
28
+ "type": "uniform_float",
29
+ "log": false,
30
+ "lower": 0.01,
31
+ "upper": 1.0,
32
+ "default": 0.505,
33
+ "q": null
34
+ },
35
+ {
36
+ "name": "mode",
37
+ "type": "uniform_int",
38
+ "log": false,
39
+ "lower": 5,
40
+ "upper": 40,
41
+ "default": 22,
42
+ "q": null
43
+ },
44
+ {
45
+ "name": "num_traces",
46
+ "type": "uniform_int",
47
+ "log": false,
48
+ "lower": 100,
49
+ "upper": 1001,
50
+ "default": 550,
51
+ "q": null
52
+ },
53
+ {
54
+ "name": "or",
55
+ "type": "constant",
56
+ "value": 0
57
+ },
58
+ {
59
+ "name": "parallel",
60
+ "type": "uniform_float",
61
+ "log": false,
62
+ "lower": 0.01,
63
+ "upper": 1.0,
64
+ "default": 0.505,
65
+ "q": null
66
+ },
67
+ {
68
+ "name": "sequence",
69
+ "type": "uniform_float",
70
+ "log": false,
71
+ "lower": 0.01,
72
+ "upper": 1.0,
73
+ "default": 0.505,
74
+ "q": null
75
+ },
76
+ {
77
+ "name": "silent",
78
+ "type": "uniform_float",
79
+ "log": false,
80
+ "lower": 0.01,
81
+ "upper": 1.0,
82
+ "default": 0.505,
83
+ "q": null
84
+ }
85
+ ],
86
+ "conditions": [],
87
+ "forbiddens": [],
88
+ "python_module_version": "0.6.1",
89
+ "json_format_version": 0.4
90
+ }
smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/intensifier.json ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "incumbent_ids": [
3
+ 2,
4
+ 4,
5
+ 5
6
+ ],
7
+ "rejected_config_ids": [
8
+ 1,
9
+ 3
10
+ ],
11
+ "incumbents_changed": 5,
12
+ "trajectory": [
13
+ {
14
+ "config_ids": [
15
+ 1
16
+ ],
17
+ "costs": [
18
+ [
19
+ 0.24521995849967804,
20
+ 0.03375422897872052
21
+ ]
22
+ ],
23
+ "trial": 1,
24
+ "walltime": 1.7022318840026855
25
+ },
26
+ {
27
+ "config_ids": [
28
+ 1,
29
+ 2
30
+ ],
31
+ "costs": [
32
+ [
33
+ 0.24521995849967804,
34
+ 0.03375422897872052
35
+ ],
36
+ [
37
+ 0.18052799504097683,
38
+ 0.036256498795491074
39
+ ]
40
+ ],
41
+ "trial": 2,
42
+ "walltime": 1.8762779235839844
43
+ },
44
+ {
45
+ "config_ids": [
46
+ 2,
47
+ 3
48
+ ],
49
+ "costs": [
50
+ [
51
+ 0.18052799504097683,
52
+ 0.036256498795491074
53
+ ],
54
+ [
55
+ 0.23787143561833685,
56
+ 0.007951827349262967
57
+ ]
58
+ ],
59
+ "trial": 3,
60
+ "walltime": 2.9373037815093994
61
+ },
62
+ {
63
+ "config_ids": [
64
+ 2,
65
+ 4
66
+ ],
67
+ "costs": [
68
+ [
69
+ 0.18052799504097683,
70
+ 0.036256498795491074
71
+ ],
72
+ [
73
+ 0.22916682187677923,
74
+ 0.005954652831009627
75
+ ]
76
+ ],
77
+ "trial": 4,
78
+ "walltime": 4.301177978515625
79
+ },
80
+ {
81
+ "config_ids": [
82
+ 2,
83
+ 4,
84
+ 5
85
+ ],
86
+ "costs": [
87
+ [
88
+ 0.18052799504097683,
89
+ 0.036256498795491074
90
+ ],
91
+ [
92
+ 0.22916682187677923,
93
+ 0.005954652831009627
94
+ ],
95
+ [
96
+ 0.22057271745765183,
97
+ 0.021149784678614475
98
+ ]
99
+ ],
100
+ "trial": 5,
101
+ "walltime": 4.334147930145264
102
+ }
103
+ ],
104
+ "state": {
105
+ "queue": [
106
+ [
107
+ 5,
108
+ 1
109
+ ]
110
+ ]
111
+ }
112
+ }
smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/optimization.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "used_walltime": 4.335055828094482,
3
+ "used_target_function_walltime": 4.198468208312988,
4
+ "last_update": 1706650671.728907,
5
+ "finished": true
6
+ }
smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/runhistory.json ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stats": {
3
+ "submitted": 5,
4
+ "finished": 5,
5
+ "running": 0
6
+ },
7
+ "data": [
8
+ [
9
+ 1,
10
+ null,
11
+ 209652396,
12
+ null,
13
+ [
14
+ 0.24521995849967804,
15
+ 0.03375422897872052
16
+ ],
17
+ 1.7000741958618164,
18
+ 1,
19
+ 1706650667.394413,
20
+ 1706650669.0954418,
21
+ {}
22
+ ],
23
+ [
24
+ 2,
25
+ null,
26
+ 209652396,
27
+ null,
28
+ [
29
+ 0.18052799504097683,
30
+ 0.036256498795491074
31
+ ],
32
+ 0.05061626434326172,
33
+ 1,
34
+ 1706650669.2182012,
35
+ 1706650669.2693639,
36
+ {}
37
+ ],
38
+ [
39
+ 3,
40
+ null,
41
+ 209652396,
42
+ null,
43
+ [
44
+ 0.23787143561833685,
45
+ 0.007951827349262967
46
+ ],
47
+ 1.0581114292144775,
48
+ 1,
49
+ 1706650669.2713861,
50
+ 1706650670.3300622,
51
+ {}
52
+ ],
53
+ [
54
+ 4,
55
+ null,
56
+ 209652396,
57
+ null,
58
+ [
59
+ 0.22916682187677923,
60
+ 0.005954652831009627
61
+ ],
62
+ 1.3603012561798096,
63
+ 1,
64
+ 1706650670.332902,
65
+ 1706650671.6938748,
66
+ {}
67
+ ],
68
+ [
69
+ 5,
70
+ null,
71
+ 209652396,
72
+ null,
73
+ [
74
+ 0.22057271745765183,
75
+ 0.021149784678614475
76
+ ],
77
+ 0.029365062713623047,
78
+ 1,
79
+ 1706650671.696975,
80
+ 1706650671.7269728,
81
+ {}
82
+ ]
83
+ ],
84
+ "configs": {
85
+ "1": {
86
+ "choice": 0.27420973585918546,
87
+ "duplicate": 0,
88
+ "loop": 0.6243590484932066,
89
+ "lt_dependency": 0.11643280289135872,
90
+ "mode": 12,
91
+ "num_traces": 876,
92
+ "or": 0,
93
+ "parallel": 0.5005605065450072,
94
+ "sequence": 0.9603757594153285,
95
+ "silent": 0.9968127990048379
96
+ },
97
+ "2": {
98
+ "choice": 0.892855270774259,
99
+ "duplicate": 0,
100
+ "loop": 0.09625800670452529,
101
+ "lt_dependency": 0.7827238845235909,
102
+ "mode": 32,
103
+ "num_traces": 715,
104
+ "or": 0,
105
+ "parallel": 0.1376370346783048,
106
+ "sequence": 0.1696964227061463,
107
+ "silent": 0.14680112183512767
108
+ },
109
+ "3": {
110
+ "choice": 0.7180374727086953,
111
+ "duplicate": 0,
112
+ "loop": 0.38960710363751994,
113
+ "lt_dependency": 0.8342936470924586,
114
+ "mode": 28,
115
+ "num_traces": 612,
116
+ "or": 0,
117
+ "parallel": 0.442661634261348,
118
+ "sequence": 0.3700736632331964,
119
+ "silent": 0.2607586865143843
120
+ },
121
+ "4": {
122
+ "choice": 0.6494351719359895,
123
+ "duplicate": 0,
124
+ "loop": 0.9263406719097345,
125
+ "lt_dependency": 0.8011669785745563,
126
+ "mode": 19,
127
+ "num_traces": 656,
128
+ "or": 0,
129
+ "parallel": 0.6739314909219778,
130
+ "sequence": 0.11102436264054778,
131
+ "silent": 0.11927138975266208
132
+ },
133
+ "5": {
134
+ "choice": 0.6067357423109274,
135
+ "duplicate": 0,
136
+ "loop": 0.7938077877018379,
137
+ "lt_dependency": 0.780375183440352,
138
+ "mode": 10,
139
+ "num_traces": 116,
140
+ "or": 0,
141
+ "parallel": 0.7006548839679922,
142
+ "sequence": 0.5744948027137008,
143
+ "silent": 0.47164766512774325
144
+ }
145
+ },
146
+ "config_origins": {
147
+ "1": "Initial Design: Sobol",
148
+ "2": "Acquisition Function Maximizer: Random Search (sorted)",
149
+ "3": "Acquisition Function Maximizer: Random Search (sorted)",
150
+ "4": "Acquisition Function Maximizer: Random Search (sorted)",
151
+ "5": "Acquisition Function Maximizer: Random Search (sorted)"
152
+ }
153
+ }
smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/scenario.json ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "006b6145d4159dd008d68ca2cd3a4441",
3
+ "deterministic": true,
4
+ "objectives": [
5
+ "normalized_sequence_entropy_exponential_forgetting",
6
+ "normalized_sequence_entropy_linear_forgetting"
7
+ ],
8
+ "crash_cost": Infinity,
9
+ "termination_cost_threshold": Infinity,
10
+ "walltime_limit": Infinity,
11
+ "cputime_limit": Infinity,
12
+ "trial_walltime_limit": null,
13
+ "trial_memory_limit": null,
14
+ "n_trials": 5,
15
+ "use_default_config": false,
16
+ "instances": null,
17
+ "instance_features": null,
18
+ "min_budget": null,
19
+ "max_budget": null,
20
+ "seed": 0,
21
+ "n_workers": -1,
22
+ "_meta": {
23
+ "facade": {
24
+ "name": "HyperparameterOptimizationFacade"
25
+ },
26
+ "runner": {
27
+ "name": "TargetFunctionRunner",
28
+ "code": "b't\\x00|\\x01d\\x01\\x19\\x00|\\x01d\\x01\\x19\\x00|\\x01d\\x01\\x19\\x00|\\x01d\\x02\\x19\\x00|\\x01d\\x03\\x19\\x00|\\x01d\\x04\\x19\\x00|\\x01d\\x05\\x19\\x00|\\x01d\\x06\\x19\\x00|\\x01d\\x07\\x19\\x00|\\x01d\\x08\\x19\\x00|\\x01d\\t\\x19\\x00d\\nd\\x0b\\x9c\\x0cd\\x0c\\x8d\\x01}\\x03t\\x01|\\x03d\\r|\\x01d\\r\\x19\\x00i\\x01d\\x0c\\x8d\\x02}\\x04|\\x00\\xa0\\x02|\\x04\\xa1\\x01S\\x00'"
29
+ },
30
+ "model": {
31
+ "name": "RandomForest",
32
+ "types": [
33
+ 0,
34
+ 0,
35
+ 0,
36
+ 0,
37
+ 0,
38
+ 0,
39
+ 0,
40
+ 0,
41
+ 0,
42
+ 0
43
+ ],
44
+ "bounds": [
45
+ [
46
+ 0,
47
+ 1.0
48
+ ],
49
+ [
50
+ 0,
51
+ NaN
52
+ ],
53
+ [
54
+ 0,
55
+ 1.0
56
+ ],
57
+ [
58
+ 0,
59
+ 1.0
60
+ ],
61
+ [
62
+ 0,
63
+ 1.0
64
+ ],
65
+ [
66
+ 0,
67
+ 1.0
68
+ ],
69
+ [
70
+ 0,
71
+ NaN
72
+ ],
73
+ [
74
+ 0,
75
+ 1.0
76
+ ],
77
+ [
78
+ 0,
79
+ 1.0
80
+ ],
81
+ [
82
+ 0,
83
+ 1.0
84
+ ]
85
+ ],
86
+ "pca_components": 7,
87
+ "n_trees": 10,
88
+ "n_points_per_tree": -1,
89
+ "ratio_features": 1.0,
90
+ "min_samples_split": 2,
91
+ "min_samples_leaf": 1,
92
+ "max_depth": 1048576,
93
+ "eps_purity": 1e-08,
94
+ "max_nodes": 1048576,
95
+ "bootstrapping": true
96
+ },
97
+ "acquisition_maximizer": {
98
+ "name": "LocalAndSortedRandomSearch",
99
+ "acquisition_function": {
100
+ "name": "EI",
101
+ "xi": 0.0,
102
+ "log": true
103
+ },
104
+ "challengers": 10000,
105
+ "seed": 0,
106
+ "random_search": {
107
+ "name": "RandomSearch",
108
+ "acquisition_function": {
109
+ "name": "EI",
110
+ "xi": 0.0,
111
+ "log": true
112
+ },
113
+ "challengers": 5000,
114
+ "seed": 0
115
+ },
116
+ "local_search": {
117
+ "name": "LocalSearch",
118
+ "acquisition_function": {
119
+ "name": "EI",
120
+ "xi": 0.0,
121
+ "log": true
122
+ },
123
+ "challengers": 5000,
124
+ "seed": 0,
125
+ "max_steps": null,
126
+ "n_steps_plateau_walk": 10,
127
+ "vectorization_min_obtain": 2,
128
+ "vectorization_max_obtain": 64
129
+ }
130
+ },
131
+ "acquisition_function": {
132
+ "name": "EI",
133
+ "xi": 0.0,
134
+ "log": true
135
+ },
136
+ "intensifier": {
137
+ "name": "Intensifier",
138
+ "max_incumbents": 10,
139
+ "seed": 0
140
+ },
141
+ "initial_design": {
142
+ "name": "SobolInitialDesign",
143
+ "n_configs": 1,
144
+ "n_configs_per_hyperparameter": 10,
145
+ "additional_configs": [],
146
+ "seed": 0
147
+ },
148
+ "random_design": {
149
+ "name": "ProbabilityRandomDesign",
150
+ "seed": 0,
151
+ "probability": 0.2
152
+ },
153
+ "runhistory_encoder": {
154
+ "name": "RunHistoryLogScaledEncoder",
155
+ "considered_states": [
156
+ 1,
157
+ 2,
158
+ 4
159
+ ],
160
+ "lower_budget_states": [],
161
+ "scale_percentage": 5,
162
+ "seed": 0
163
+ },
164
+ "multi_objective_algorithm": {
165
+ "name": "MeanAggregationStrategy",
166
+ "objective_weights": [
167
+ 1,
168
+ 1
169
+ ]
170
+ },
171
+ "config_selector": {
172
+ "name": "ConfigSelector",
173
+ "retrain_after": 8,
174
+ "retries": 16,
175
+ "min_trials": 1
176
+ },
177
+ "version": "2.0.2"
178
+ },
179
+ "output_directory": "smac3_output/006b6145d4159dd008d68ca2cd3a4441/0"
180
+ }
smac3_output/07ecbfc3dc7bfceedce234fe2b508af8/0/configspace.json ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "hyperparameters": [
3
+ {
4
+ "name": "choice",
5
+ "type": "uniform_float",
6
+ "log": false,
7
+ "lower": 0.01,
8
+ "upper": 1.0,
9
+ "default": 0.505,
10
+ "q": null
11
+ },
12
+ {
13
+ "name": "duplicate",
14
+ "type": "constant",
15
+ "value": 0
16
+ },
17
+ {
18
+ "name": "loop",
19
+ "type": "uniform_float",
20
+ "log": false,
21
+ "lower": 0.01,
22
+ "upper": 1.0,
23
+ "default": 0.505,
24
+ "q": null
25
+ },
26
+ {
27
+ "name": "lt_dependency",
28
+ "type": "uniform_float",
29
+ "log": false,
30
+ "lower": 0.01,
31
+ "upper": 1.0,
32
+ "default": 0.505,
33
+ "q": null
34
+ },
35
+ {
36
+ "name": "mode",
37
+ "type": "uniform_int",
38
+ "log": false,
39
+ "lower": 5,
40
+ "upper": 20,
41
+ "default": 12,
42
+ "q": null
43
+ },
44
+ {
45
+ "name": "num_traces",
46
+ "type": "uniform_int",
47
+ "log": false,
48
+ "lower": 10,
49
+ "upper": 10001,
50
+ "default": 5006,
51
+ "q": null
52
+ },
53
+ {
54
+ "name": "or",
55
+ "type": "constant",
56
+ "value": 0
57
+ },
58
+ {
59
+ "name": "parallel",
60
+ "type": "uniform_float",
61
+ "log": false,
62
+ "lower": 0.01,
63
+ "upper": 1.0,
64
+ "default": 0.505,
65
+ "q": null
66
+ },
67
+ {
68
+ "name": "sequence",
69
+ "type": "uniform_float",
70
+ "log": false,
71
+ "lower": 0.01,
72
+ "upper": 1.0,
73
+ "default": 0.505,
74
+ "q": null
75
+ },
76
+ {
77
+ "name": "silent",
78
+ "type": "uniform_float",
79
+ "log": false,
80
+ "lower": 0.01,
81
+ "upper": 1.0,
82
+ "default": 0.505,
83
+ "q": null
84
+ }
85
+ ],
86
+ "conditions": [],
87
+ "forbiddens": [],
88
+ "python_module_version": "0.6.1",
89
+ "json_format_version": 0.4
90
+ }
smac3_output/07ecbfc3dc7bfceedce234fe2b508af8/0/intensifier.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "incumbent_ids": [
3
+ 1
4
+ ],
5
+ "rejected_config_ids": [],
6
+ "incumbents_changed": 1,
7
+ "trajectory": [
8
+ {
9
+ "config_ids": [
10
+ 1
11
+ ],
12
+ "costs": [
13
+ 0.148762785609154
14
+ ],
15
+ "trial": 1,
16
+ "walltime": 7.8146021366119385
17
+ }
18
+ ],
19
+ "state": {
20
+ "queue": [
21
+ [
22
+ 20,
23
+ 1
24
+ ]
25
+ ]
26
+ }
27
+ }