Andrea MH commited on
Commit
761e409
Β·
unverified Β·
2 Parent(s): 8742124 973f5db

Merge pull request #6 from lmu-dbs/5-automation-test-gedi-automatically

Browse files
This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .github/workflows/test_gedi.yml +162 -0
  2. .gitignore +5 -1
  3. README.md +1 -1
  4. config.py +3 -3
  5. config_files/algorithm/benchmark.json +1 -2
  6. config_files/algorithm/evaluation_plotter.json +7 -5
  7. config_files/algorithm/experiment_test.json +7 -7
  8. config_files/algorithm/feature_extraction.json +1 -1
  9. config_files/algorithm/generation.json +2 -5
  10. data/2_grid_test.csv +3 -3
  11. data/{test_2 β†’ test}/gen_el_168.xes +0 -0
  12. data/{test_2 β†’ test}/gen_el_169.xes +0 -0
  13. data/test/grid_feat.csv +3 -0
  14. data/test/plotter/1_enve_feat.csv +12 -0
  15. data/test/plotter/grid_1objectives_enve.csv +12 -0
  16. execute_grid_experiments.py +1 -1
  17. gedi/__init__.py +8 -0
  18. {tag β†’ gedi}/analyser.py +3 -3
  19. {tag β†’ gedi}/augmentation.py +1 -1
  20. {tag β†’ gedi}/benchmark.py +3 -3
  21. {tag β†’ gedi}/features.py +1 -1
  22. {tag β†’ gedi}/generator.py +2 -2
  23. {tag β†’ gedi}/plotter.py +6 -5
  24. {tag β†’ gedi}/utils/algorithms/__init__.py +0 -0
  25. {tag β†’ gedi}/utils/algorithms/tsne.py +0 -0
  26. {tag β†’ gedi}/utils/array_tools.py +0 -0
  27. {tag β†’ gedi}/utils/io_helpers.py +0 -0
  28. {tag β†’ gedi}/utils/matrix_tools.py +0 -0
  29. main.py +9 -9
  30. notebooks/.ipynb_checkpoints/augmentation-checkpoint.ipynb +0 -0
  31. notebooks/.ipynb_checkpoints/benchmarking_process_discovery-checkpoint.ipynb +0 -0
  32. notebooks/.ipynb_checkpoints/bpic_generability_pdm-checkpoint.ipynb +0 -0
  33. notebooks/.ipynb_checkpoints/data_exploration-checkpoint.ipynb +0 -0
  34. notebooks/.ipynb_checkpoints/experiment_generator-checkpoint.ipynb +0 -0
  35. notebooks/.ipynb_checkpoints/feature_distributions-checkpoint.ipynb +0 -0
  36. notebooks/.ipynb_checkpoints/feature_exploration-checkpoint.ipynb +0 -0
  37. notebooks/.ipynb_checkpoints/feature_performance_similarity-checkpoint.ipynb +0 -0
  38. notebooks/.ipynb_checkpoints/feature_selection-checkpoint.ipynb +0 -0
  39. notebooks/.ipynb_checkpoints/feature_variance-checkpoint.ipynb +0 -0
  40. notebooks/.ipynb_checkpoints/gedi_representativeness-checkpoint.ipynb +0 -0
  41. notebooks/.ipynb_checkpoints/grid_objectives-checkpoint.ipynb +0 -376
  42. notebooks/.ipynb_checkpoints/oversampling-checkpoint.ipynb +0 -6
  43. notebooks/.ipynb_checkpoints/performance_feature_correlation-checkpoint.ipynb +0 -6
  44. notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb +0 -0
  45. notebooks/.ipynb_checkpoints/statistics_tasks_to_datasets-checkpoint.ipynb +0 -818
  46. notebooks/.ipynb_checkpoints/test_feed-checkpoint.ipynb +0 -0
  47. notebooks/benchmarking_process_discovery.ipynb +2 -2
  48. notebooks/bpic_generability_pdm.ipynb +1 -1
  49. notebooks/experiment_generator.ipynb +2 -2
  50. notebooks/feature_distributions.ipynb +1 -1
.github/workflows/test_gedi.yml ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: GEDI Test
2
+
3
+ # Specifies when the action should run
4
+ on:
5
+ pull_request:
6
+ branches:
7
+ - main
8
+
9
+ # Specifies the jobs that are to be run
10
+ jobs:
11
+ test_feature-extraction:
12
+ runs-on: ubuntu-latest
13
+
14
+ # Setting up a python envronment for the test script to run
15
+ steps:
16
+ - name: Checkout code
17
+ uses: actions/checkout@v4
18
+
19
+ - name: Set up Python
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: 3.9
23
+
24
+ - name: Install feeed
25
+ run: |
26
+ python -m pip install --upgrade pip
27
+ pip install .
28
+
29
+ - name: Run test
30
+ run:
31
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/feature_extraction.json
32
+
33
+ - name: Compare output
34
+ run: diff data/test_feat.csv data/test_feat.csv
35
+
36
+ test_generation:
37
+ runs-on: ubuntu-latest
38
+
39
+ # Setting up a python envronment for the test script to run
40
+ steps:
41
+ - name: Checkout code
42
+ uses: actions/checkout@v4
43
+
44
+ - name: Set up Python
45
+ uses: actions/setup-python@v5
46
+ with:
47
+ python-version: 3.9
48
+
49
+ - name: Install dependencies
50
+ run: |
51
+ sudo apt-get install build-essential python3 python3-dev
52
+
53
+ - name: Install feeed
54
+ run: |
55
+ python -m pip install --upgrade pip
56
+ pip install .
57
+
58
+ - name: Run test
59
+ run:
60
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/generation.json
61
+
62
+ - name: Compare output
63
+ run: diff output/features/grid_feat/2_enself_rt20v/genELexperiment2_07_04.json output/features/grid_feat/2_enself_rt20v/genELexperiment2_07_04.json
64
+
65
+ test_benchmark:
66
+ runs-on: ubuntu-latest
67
+
68
+ # Setting up a python envronment for the test script to run
69
+ steps:
70
+ - name: Checkout code
71
+ uses: actions/checkout@v4
72
+
73
+ - name: Set up Python
74
+ uses: actions/setup-python@v5
75
+ with:
76
+ python-version: 3.9
77
+
78
+ - name: Install feeed
79
+ run: |
80
+ python -m pip install --upgrade pip
81
+ pip install .
82
+
83
+ - name: Run test
84
+ run:
85
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/benchmark.json
86
+
87
+ - name: Compare output
88
+ run: diff output/benchmark/test_benchmark.csv output/benchmark/test_benchmark.csv
89
+
90
+ test_augmentation:
91
+ runs-on: ubuntu-latest
92
+
93
+ # Setting up a python envronment for the test script to run
94
+ steps:
95
+ - name: Checkout code
96
+ uses: actions/checkout@v4
97
+
98
+ - name: Set up Python
99
+ uses: actions/setup-python@v5
100
+ with:
101
+ python-version: 3.9
102
+
103
+ - name: Install feeed
104
+ run: |
105
+ python -m pip install --upgrade pip
106
+ pip install .
107
+
108
+ - name: Run test
109
+ run:
110
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/augmentation.json
111
+
112
+ test_evaluation-plotter:
113
+ runs-on: ubuntu-latest
114
+
115
+ # Setting up a python envronment for the test script to run
116
+ steps:
117
+ - name: Checkout code
118
+ uses: actions/checkout@v4
119
+
120
+ - name: Set up Python
121
+ uses: actions/setup-python@v5
122
+ with:
123
+ python-version: 3.9
124
+
125
+ - name: Install dependencies
126
+ run: |
127
+ sudo apt-get install build-essential python3 python3-dev
128
+
129
+ - name: Install feeed
130
+ run: |
131
+ python -m pip install --upgrade pip
132
+ pip install .
133
+
134
+ - name: Run test
135
+ run:
136
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/evaluation_plotter.json
137
+
138
+ test_integration:
139
+ runs-on: ubuntu-latest
140
+
141
+ # Setting up a python envronment for the test script to run
142
+ steps:
143
+ - name: Checkout code
144
+ uses: actions/checkout@v4
145
+
146
+ - name: Set up Python
147
+ uses: actions/setup-python@v5
148
+ with:
149
+ python-version: 3.9
150
+
151
+ - name: Install dependencies
152
+ run: |
153
+ sudo apt-get install build-essential python3 python3-dev
154
+
155
+ - name: Install feeed
156
+ run: |
157
+ python -m pip install --upgrade pip
158
+ pip install .
159
+
160
+ - name: Run test
161
+ run:
162
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/experiment_test.json
.gitignore CHANGED
@@ -1,3 +1,7 @@
1
  smac3_output/
2
  data/
3
- output/
 
 
 
 
 
1
  smac3_output/
2
  data/
3
+ output/
4
+ .ipynb_checkpoints/
5
+ notebooks/.ipynb_checkpoints/*
6
+ gedi.egg-info/
7
+ build/
README.md CHANGED
@@ -32,7 +32,7 @@ python main.py -o config_files/options/baseline.json -a config_files/algorithm/e
32
  ## Usage
33
  Our pipeline offers several pipeline steps, which can be run sequentially or partially:
34
  - feature_extraction
35
- - event_logs_generation
36
  - benchmark
37
  - evaluation_plotter
38
 
 
32
  ## Usage
33
  Our pipeline offers several pipeline steps, which can be run sequentially or partially:
34
  - feature_extraction
35
+ - generation
36
  - benchmark
37
  - evaluation_plotter
38
 
config.py CHANGED
@@ -2,7 +2,7 @@ import json
2
  import os
3
  import warnings
4
 
5
- from tag.utils.io_helpers import sort_files
6
  from tqdm import tqdm
7
  from utils.param_keys import INPUT_NAME, FILENAME, FOLDER_PATH, PARAMS
8
 
@@ -63,8 +63,8 @@ def get_files_and_kwargs(params: dict):
63
 
64
  #TODO: generate parent directories if they don't exist
65
  if input_name == 'test':
66
- filename_list = list(tqdm(sort_files(os.listdir('data/test_2'))))
67
- kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/test_2'}
68
  elif input_name == 'realLogs':
69
  filename_list = list(tqdm(sort_files(os.listdir('data/real_event_logs'))))
70
  kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/real_event_logs'}
 
2
  import os
3
  import warnings
4
 
5
+ from gedi.utils.io_helpers import sort_files
6
  from tqdm import tqdm
7
  from utils.param_keys import INPUT_NAME, FILENAME, FOLDER_PATH, PARAMS
8
 
 
63
 
64
  #TODO: generate parent directories if they don't exist
65
  if input_name == 'test':
66
+ filename_list = list(tqdm(sort_files(os.listdir('data/test'))))
67
+ kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/test'}
68
  elif input_name == 'realLogs':
69
  filename_list = list(tqdm(sort_files(os.listdir('data/real_event_logs'))))
70
  kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/real_event_logs'}
config_files/algorithm/benchmark.json CHANGED
@@ -2,8 +2,7 @@
2
  {
3
  "pipeline_step": "benchmark_test",
4
  "benchmark_test": "discovery",
5
- "input_path":"data/test_2",
6
- "input_path":"data/test_2/gen_el_168.xes",
7
  "output_path":"output",
8
  "miners" : ["inductive", "heuristics", "imf", "ilp"]
9
  }
 
2
  {
3
  "pipeline_step": "benchmark_test",
4
  "benchmark_test": "discovery",
5
+ "input_path":"data/test",
 
6
  "output_path":"output",
7
  "miners" : ["inductive", "heuristics", "imf", "ilp"]
8
  }
config_files/algorithm/evaluation_plotter.json CHANGED
@@ -2,16 +2,18 @@
2
  {
3
  "pipeline_step": "evaluation_plotter",
4
  "input_path": "output/features/generated/34_bpic_features/",
5
- "input_path": "output/features/generated/grid_1obj/1_enve_feat.csv",
6
  "input_path": "output/features/generated/grid_2obj/",
7
  "input_path": ["output/features/generated/grid_1obj/", "output/features/generated/grid_2obj/"],
8
- "output_path": "output/plots",
 
9
  "reference_feature": "epa_normalized_sequence_entropy",
10
- "reference_feature": "epa_normalized_variant_entropy",
11
  "reference_feature": "epa_normalized_sequence_entropy_exponential_forgetting",
 
12
  "targets": "data/34_bpic_features.csv",
13
- "targets": "data/grid_experiments/grid_1obj/grid_1objectives_enve.csv",
14
  "targets": "data/grid_experiments/grid_2obj/",
15
- "targets": ["data/grid_experiments/grid_1obj/", "data/grid_experiments/grid_2obj/"]
 
 
 
16
  }
17
  ]
 
2
  {
3
  "pipeline_step": "evaluation_plotter",
4
  "input_path": "output/features/generated/34_bpic_features/",
 
5
  "input_path": "output/features/generated/grid_2obj/",
6
  "input_path": ["output/features/generated/grid_1obj/", "output/features/generated/grid_2obj/"],
7
+ "input_path": "output/features/generated/grid_1obj/1_enve_feat.csv",
8
+ "input_path": "data/test/plotter/1_enve_feat.csv",
9
  "reference_feature": "epa_normalized_sequence_entropy",
 
10
  "reference_feature": "epa_normalized_sequence_entropy_exponential_forgetting",
11
+ "reference_feature": "epa_normalized_variant_entropy",
12
  "targets": "data/34_bpic_features.csv",
 
13
  "targets": "data/grid_experiments/grid_2obj/",
14
+ "targets": ["data/grid_experiments/grid_1obj/", "data/grid_experiments/grid_2obj/"],
15
+ "targets": "data/grid_experiments/grid_1obj/grid_1objectives_enve.csv",
16
+ "targets": "data/test/plotter/grid_1objectives_enve.csv",
17
+ "output_path": "output/plots"
18
  }
19
  ]
config_files/algorithm/experiment_test.json CHANGED
@@ -9,16 +9,16 @@
9
  {
10
  "pipeline_step": "event_logs_generation",
11
  "output_path": "output/features/2_bpic_features/2_ense_rmcv_feat.csv",
12
- "output_path": "data/test_2",
13
  "generator_params": {
14
  "experiment": "data/grid_objectives.csv",
15
  "experiment": {"input_path": "data/2_bpic_features.csv",
16
  "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
17
  "experiment": [
18
- {"epa_normalized_sequence_entropy_linear_forgetting": 0.05, "ratio_top_20_variants": 0.4},
19
- {"epa_normalized_sequence_entropy_linear_forgetting": 0.5, "ratio_top_20_variants": 0.04}
20
  ],
21
- "experiment": {"epa_normalized_sequence_entropy_linear_forgetting": 0.05, "ratio_top_20_variants": 0.4},
22
  "config_space": {
23
  "mode": [5, 20],
24
  "sequence": [0.01, 1],
@@ -27,7 +27,7 @@
27
  "loop": [0.01, 1],
28
  "silent": [0.01, 1],
29
  "lt_dependency": [0.01, 1],
30
- "num_traces": [100, 10001],
31
  "duplicate": [0],
32
  "or": [0]
33
  },
@@ -36,7 +36,7 @@
36
  },
37
  {
38
  "pipeline_step": "feature_extraction",
39
- "input_path": "data/test_2",
40
  "feature_params": {"feature_set":["trace_length"]},
41
  "output_path": "output/plots",
42
  "real_eventlog_path": "data/bpic_features.csv",
@@ -45,7 +45,7 @@
45
  {
46
  "pipeline_step": "benchmark_test",
47
  "benchmark_test": "discovery",
48
- "input_path":"data/test_2",
49
  "output_path":"output",
50
  "miners" : ["inductive", "heuristics", "imf", "ilp"]
51
  }
 
9
  {
10
  "pipeline_step": "event_logs_generation",
11
  "output_path": "output/features/2_bpic_features/2_ense_rmcv_feat.csv",
12
+ "output_path": "data/test",
13
  "generator_params": {
14
  "experiment": "data/grid_objectives.csv",
15
  "experiment": {"input_path": "data/2_bpic_features.csv",
16
  "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
17
  "experiment": [
18
+ {"epa_normalized_sequence_entropy_linear_forgetting": 0.2, "ratio_top_20_variants": 0.4},
19
+ {"epa_normalized_sequence_entropy_linear_forgetting": 0.4, "ratio_top_20_variants": 0.7}
20
  ],
21
+ "experiment": {"epa_normalized_sequence_entropy_linear_forgetting": 0.2, "ratio_top_20_variants": 0.4},
22
  "config_space": {
23
  "mode": [5, 20],
24
  "sequence": [0.01, 1],
 
27
  "loop": [0.01, 1],
28
  "silent": [0.01, 1],
29
  "lt_dependency": [0.01, 1],
30
+ "num_traces": [10, 100],
31
  "duplicate": [0],
32
  "or": [0]
33
  },
 
36
  },
37
  {
38
  "pipeline_step": "feature_extraction",
39
+ "input_path": "data/test",
40
  "feature_params": {"feature_set":["trace_length"]},
41
  "output_path": "output/plots",
42
  "real_eventlog_path": "data/bpic_features.csv",
 
45
  {
46
  "pipeline_step": "benchmark_test",
47
  "benchmark_test": "discovery",
48
+ "input_path":"data/test",
49
  "output_path":"output",
50
  "miners" : ["inductive", "heuristics", "imf", "ilp"]
51
  }
config_files/algorithm/feature_extraction.json CHANGED
@@ -1,7 +1,7 @@
1
  [
2
  {
3
  "pipeline_step": "feature_extraction",
4
- "input_path": "data/test_2",
5
  "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
6
  "output_path": "output/plots",
7
  "real_eventlog_path": "data/bpic_features.csv",
 
1
  [
2
  {
3
  "pipeline_step": "feature_extraction",
4
+ "input_path": "data/test",
5
  "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
6
  "output_path": "output/plots",
7
  "real_eventlog_path": "data/bpic_features.csv",
config_files/algorithm/generation.json CHANGED
@@ -3,11 +3,8 @@
3
  "pipeline_step": "event_logs_generation",
4
  "output_path": "output",
5
  "generator_params": {
6
- "experiment": {
7
- "input_path": "data/grid_objectives.csv",
8
- "objectives": ["epa_normalized_variant_entropy"],
9
- "objectives": ["ratio_most_common_variant", "epa_normalized_sequence_entropy"],
10
- "objectives": ["ratio_top_20_variants","epa_normalized_sequence_entropy_linear_forgetting"]
11
  },
12
  "config_space": {
13
  "mode": [5, 20],
 
3
  "pipeline_step": "event_logs_generation",
4
  "output_path": "output",
5
  "generator_params": {
6
+ "experiment": {"input_path": "data/test/grid_feat.csv",
7
+ "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]
 
 
 
8
  },
9
  "config_space": {
10
  "mode": [5, 20],
data/2_grid_test.csv CHANGED
@@ -1,3 +1,3 @@
1
- task,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting
2
- task_1,0.0,0.0
3
- task_2,0.0,0.1
 
1
+ log,ratio_top_20_variants,epa_normalized_sequence_entropy_linear_forgetting
2
+ experiment1,0.2,0.4
3
+ experiment2,0.4,0.7
data/{test_2 β†’ test}/gen_el_168.xes RENAMED
File without changes
data/{test_2 β†’ test}/gen_el_169.xes RENAMED
File without changes
data/test/grid_feat.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ log,ratio_top_20_variants,epa_normalized_sequence_entropy_linear_forgetting
2
+ experiment1,0.2,0.4
3
+ experiment2,0.4,0.7
data/test/plotter/1_enve_feat.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epa_normalized_variant_entropy,log
2
+ 0.41202322946059605,task_5
3
+ 0.79999386158591,task_9
4
+ 0.8925919422394111,task_10
5
+ 0.493812449168448,task_6
6
+ 0.20299577565110202,task_3
7
+ 0.337263992015401,task_4
8
+ 0.0,task_1
9
+ 0.102184538023266,task_2
10
+ 0.600006599245775,task_7
11
+ 0.6999779396851361,task_8
12
+ 0.8796185572534461,task_11
data/test/plotter/grid_1objectives_enve.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,epa_normalized_variant_entropy
2
+ task_1,0.0
3
+ task_2,0.1
4
+ task_3,0.2
5
+ task_4,0.3
6
+ task_5,0.4
7
+ task_6,0.5
8
+ task_7,0.6
9
+ task_8,0.7
10
+ task_9,0.8
11
+ task_10,0.9
12
+ task_11,1.0
execute_grid_experiments.py CHANGED
@@ -2,7 +2,7 @@ import multiprocessing
2
  import os
3
 
4
  from datetime import datetime as dt
5
- from tag.utils.io_helpers import sort_files
6
  from tqdm import tqdm
7
 
8
  #TODO: Pass i properly
 
2
  import os
3
 
4
  from datetime import datetime as dt
5
+ from gedi.utils.io_helpers import sort_files
6
  from tqdm import tqdm
7
 
8
  #TODO: Pass i properly
gedi/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from .generator import GenerateEventLogs
2
+ from .features import EventLogFeatures
3
+ from .analyser import FeatureAnalyser
4
+ from .augmentation import InstanceAugmentator
5
+ from .benchmark import BenchmarkTest
6
+ from .plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
7
+
8
+ __all__=[ 'GenerateEventLogs', 'EventLogFeatures', 'FeatureAnalyser', 'InstanceAugmentator', 'BenchmarkTest', 'BenchmarkPlotter', 'FeaturesPlotter', 'AugmentationPlotter', 'GenerationPlotter']
{tag β†’ gedi}/analyser.py RENAMED
@@ -4,9 +4,9 @@ import warnings
4
  from sklearn.decomposition import FastICA, PCA
5
  from sklearn.manifold import TSNE
6
  from sklearn.preprocessing import Normalizer, StandardScaler
7
- from tag.features import EventLogFeatures
8
- from tag.plotter import ModelResultPlotter
9
- from tag.utils.matrix_tools import insert_missing_data
10
  # TODO: Call param_keys explicitly e.g. import INPUT_PATH
11
  from utils.param_keys import *
12
  from utils.param_keys.analyser import MODEL, INPUT_PARAMS, PERPLEXITY
 
4
  from sklearn.decomposition import FastICA, PCA
5
  from sklearn.manifold import TSNE
6
  from sklearn.preprocessing import Normalizer, StandardScaler
7
+ from gedi.features import EventLogFeatures
8
+ from gedi.plotter import ModelResultPlotter
9
+ from gedi.utils.matrix_tools import insert_missing_data
10
  # TODO: Call param_keys explicitly e.g. import INPUT_PATH
11
  from utils.param_keys import *
12
  from utils.param_keys.analyser import MODEL, INPUT_PARAMS, PERPLEXITY
{tag β†’ gedi}/augmentation.py RENAMED
@@ -3,7 +3,7 @@ from collections import Counter
3
  from datetime import datetime as dt
4
  from imblearn.over_sampling import SMOTE, SVMSMOTE, BorderlineSMOTE, KMeansSMOTE
5
  from sklearn.preprocessing import Normalizer
6
- from tag.utils.matrix_tools import insert_missing_data
7
  from utils.param_keys import INPUT_PATH, OUTPUT_PATH
8
  from utils.param_keys.augmentation import AUGMENTATION_PARAMS, NO_SAMPLES, FEATURE_SELECTION, METHOD
9
 
 
3
  from datetime import datetime as dt
4
  from imblearn.over_sampling import SMOTE, SVMSMOTE, BorderlineSMOTE, KMeansSMOTE
5
  from sklearn.preprocessing import Normalizer
6
+ from gedi.utils.matrix_tools import insert_missing_data
7
  from utils.param_keys import INPUT_PATH, OUTPUT_PATH
8
  from utils.param_keys.augmentation import AUGMENTATION_PARAMS, NO_SAMPLES, FEATURE_SELECTION, METHOD
9
 
{tag β†’ gedi}/benchmark.py RENAMED
@@ -16,7 +16,7 @@ from pm4py.algo.evaluation.generalization import algorithm as generalization_eva
16
  from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
17
  from pm4py.objects.bpmn.obj import BPMN
18
  from pm4py.objects.log.importer.xes import importer as xes_importer
19
- from tag.utils.io_helpers import dump_features_json
20
  from tqdm import tqdm
21
  from utils.param_keys import INPUT_PATH, OUTPUT_PATH
22
  from utils.param_keys.benchmark import MINERS
@@ -113,14 +113,14 @@ class BenchmarkTest:
113
  return
114
 
115
  def split_miner_wrapper(self, log_path="data/real_event_logs/BPI_Challenges/BPI_Challenge_2012.xes"):
116
- jar_path = os.path.join("tag","libs","split-miner-1.7.1-all.jar")
117
  filename = os.path.split(log_path)[-1].rsplit(".",1)[0]
118
  bpmn_path = os.path.join("output", "bpmns_split", filename)
119
  os.makedirs(os.path.split(bpmn_path)[0], exist_ok=True)
120
  command = [
121
  "java",
122
  "-cp",
123
- f"{os.getcwd()}/tag/libs/sm2.jar:{os.getcwd()}/tag/libs/lib/*",
124
  "au.edu.unimelb.services.ServiceProvider",
125
  "SM2",
126
  f"{os.getcwd()}/{log_path}",
 
16
  from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
17
  from pm4py.objects.bpmn.obj import BPMN
18
  from pm4py.objects.log.importer.xes import importer as xes_importer
19
+ from gedi.utils.io_helpers import dump_features_json
20
  from tqdm import tqdm
21
  from utils.param_keys import INPUT_PATH, OUTPUT_PATH
22
  from utils.param_keys.benchmark import MINERS
 
113
  return
114
 
115
  def split_miner_wrapper(self, log_path="data/real_event_logs/BPI_Challenges/BPI_Challenge_2012.xes"):
116
+ jar_path = os.path.join("gedi","libs","split-miner-1.7.1-all.jar")
117
  filename = os.path.split(log_path)[-1].rsplit(".",1)[0]
118
  bpmn_path = os.path.join("output", "bpmns_split", filename)
119
  os.makedirs(os.path.split(bpmn_path)[0], exist_ok=True)
120
  command = [
121
  "java",
122
  "-cp",
123
+ f"{os.getcwd()}/gedi/libs/sm2.jar:{os.getcwd()}/tag/libs/lib/*",
124
  "au.edu.unimelb.services.ServiceProvider",
125
  "SM2",
126
  f"{os.getcwd()}/{log_path}",
{tag β†’ gedi}/features.py RENAMED
@@ -11,7 +11,7 @@ from pathlib import Path, PurePath
11
  from sklearn.impute import SimpleImputer
12
  from utils.param_keys import INPUT_PATH
13
  from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
14
- from tag.utils.io_helpers import dump_features_json
15
 
16
  def get_sortby_parameter(elem):
17
  number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
 
11
  from sklearn.impute import SimpleImputer
12
  from utils.param_keys import INPUT_PATH
13
  from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
14
+ from gedi.utils.io_helpers import dump_features_json
15
 
16
  def get_sortby_parameter(elem):
17
  number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
{tag β†’ gedi}/generator.py RENAMED
@@ -20,7 +20,7 @@ from pm4py.sim import play_out
20
  from smac import HyperparameterOptimizationFacade, Scenario
21
  from utils.param_keys import OUTPUT_PATH, INPUT_PATH
22
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
23
- from tag.utils.io_helpers import get_output_key_value_location, dump_features_json, read_csvs
24
 
25
 
26
 
@@ -73,7 +73,7 @@ def get_tasks(experiment, output_path="", reference_feature=None):
73
  return tasks, output_path
74
 
75
  class GenerateEventLogs():
76
- # TODO: Clarify nomenclature: experiment, task, objective as in notebook (https://github.com/lmu-dbs/tag/blob/main/notebooks/grid_objectives.ipynb)
77
  def __init__(self, params):
78
  print("=========================== Generator ==========================")
79
  print(f"INFO: Running with {params}")
 
20
  from smac import HyperparameterOptimizationFacade, Scenario
21
  from utils.param_keys import OUTPUT_PATH, INPUT_PATH
22
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
23
+ from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, read_csvs
24
 
25
 
26
 
 
73
  return tasks, output_path
74
 
75
  class GenerateEventLogs():
76
+ # TODO: Clarify nomenclature: experiment, task, objective as in notebook (https://github.com/lmu-dbs/gedi/blob/main/notebooks/grid_objectives.ipynb)
77
  def __init__(self, params):
78
  print("=========================== Generator ==========================")
79
  print(f"INFO: Running with {params}")
{tag β†’ gedi}/plotter.py RENAMED
@@ -20,9 +20,9 @@ from collections import defaultdict
20
  from sklearn.preprocessing import Normalizer, StandardScaler
21
  from sklearn.decomposition import PCA
22
  from sklearn.metrics.pairwise import euclidean_distances
23
- from tag.generator import get_tasks
24
- from tag.utils.io_helpers import get_keys_abbreviation
25
- from tag.utils.io_helpers import read_csvs, select_instance
26
 
27
  def insert_newlines(string, every=140):
28
  return '\n'.join(string[i:i+every] for i in range(0, len(string), every))
@@ -331,6 +331,7 @@ class FeaturesPlotter:
331
  fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type})")
332
 
333
  if output_path != None:
 
334
  fig.savefig(output_path)
335
  print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
336
 
@@ -617,7 +618,7 @@ class AugmentationPlotter(object):
617
  if output_path != None:
618
  output_path += f"/augmentation_pca_{n_features}_{self.sampler}.jpg"
619
  fig.savefig(output_path)
620
- print("SUCCESS: Saved augmentation pca plot at {output_path}")
621
 
622
 
623
  class GenerationPlotter(object):
@@ -672,7 +673,7 @@ class GenerationPlotter(object):
672
  targets = orig_targets.copy()
673
  elif isinstance(orig_targets, defaultdict):
674
  if k not in orig_targets:
675
- print("[WARNING] {k} not in targets. Only in generated features. Will continue with next feature to compare with")
676
  continue
677
  targets = orig_targets[k].copy()
678
  else:
 
20
  from sklearn.preprocessing import Normalizer, StandardScaler
21
  from sklearn.decomposition import PCA
22
  from sklearn.metrics.pairwise import euclidean_distances
23
+ from gedi.generator import get_tasks
24
+ from gedi.utils.io_helpers import get_keys_abbreviation
25
+ from gedi.utils.io_helpers import read_csvs, select_instance
26
 
27
  def insert_newlines(string, every=140):
28
  return '\n'.join(string[i:i+every] for i in range(0, len(string), every))
 
331
  fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type})")
332
 
333
  if output_path != None:
334
+ os.makedirs(os.path.split(output_path)[0], exist_ok=True)
335
  fig.savefig(output_path)
336
  print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
337
 
 
618
  if output_path != None:
619
  output_path += f"/augmentation_pca_{n_features}_{self.sampler}.jpg"
620
  fig.savefig(output_path)
621
+ print(f"SUCCESS: Saved augmentation pca plot at {output_path}")
622
 
623
 
624
  class GenerationPlotter(object):
 
673
  targets = orig_targets.copy()
674
  elif isinstance(orig_targets, defaultdict):
675
  if k not in orig_targets:
676
+ print(f"[WARNING] {k} not in targets. Only in generated features. Will continue with next feature to compare with")
677
  continue
678
  targets = orig_targets[k].copy()
679
  else:
{tag β†’ gedi}/utils/algorithms/__init__.py RENAMED
File without changes
{tag β†’ gedi}/utils/algorithms/tsne.py RENAMED
File without changes
{tag β†’ gedi}/utils/array_tools.py RENAMED
File without changes
{tag β†’ gedi}/utils/io_helpers.py RENAMED
File without changes
{tag β†’ gedi}/utils/matrix_tools.py RENAMED
File without changes
main.py CHANGED
@@ -1,12 +1,12 @@
1
  import config
2
  import pandas as pd
3
  from datetime import datetime as dt
4
- from tag.generator import GenerateEventLogs
5
- from tag.features import EventLogFeatures
6
- from tag.analyser import FeatureAnalyser
7
- from tag.augmentation import InstanceAugmentator
8
- from tag.benchmark import BenchmarkTest
9
- from tag.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
10
  from utils.default_argparse import ArgParser
11
  from utils.param_keys import *
12
  from utils.param_keys.run_options import *
@@ -57,8 +57,8 @@ def run(kwargs:dict, model_paramas_list: list, filename_list:list):
57
 
58
 
59
  if __name__=='__main__':
60
- start_tag = dt.now()
61
- print(f'INFO: TAG starting {start_tag}')
62
 
63
  args = ArgParser().parse('GEDI main')
64
  run_params = config.get_run_params(args.run_params_json)
@@ -70,4 +70,4 @@ if __name__=='__main__':
70
  else:
71
  load(args.result_load_files, kwargs)
72
 
73
- print(f'SUCCESS: TAG took {dt.now()-start_tag} sec.')
 
1
  import config
2
  import pandas as pd
3
  from datetime import datetime as dt
4
+ from gedi.generator import GenerateEventLogs
5
+ from gedi.features import EventLogFeatures
6
+ from gedi.analyser import FeatureAnalyser
7
+ from gedi.augmentation import InstanceAugmentator
8
+ from gedi.benchmark import BenchmarkTest
9
+ from gedi.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
10
  from utils.default_argparse import ArgParser
11
  from utils.param_keys import *
12
  from utils.param_keys.run_options import *
 
57
 
58
 
59
  if __name__=='__main__':
60
+ start_gedi = dt.now()
61
+ print(f'INFO: GEDI starting {start_gedi}')
62
 
63
  args = ArgParser().parse('GEDI main')
64
  run_params = config.get_run_params(args.run_params_json)
 
70
  else:
71
  load(args.result_load_files, kwargs)
72
 
73
+ print(f'SUCCESS: GEDI took {dt.now()-start_gedi} sec.')
notebooks/.ipynb_checkpoints/augmentation-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/benchmarking_process_discovery-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/bpic_generability_pdm-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/data_exploration-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/experiment_generator-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/feature_distributions-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/feature_exploration-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/feature_performance_similarity-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/feature_selection-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/feature_variance-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/gedi_representativeness-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/grid_objectives-checkpoint.ipynb DELETED
@@ -1,376 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 9,
6
- "id": "e5aa7223",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "import pandas as pd\n",
11
- "import numpy as np"
12
- ]
13
- },
14
- {
15
- "cell_type": "code",
16
- "execution_count": 10,
17
- "id": "dfd1a302",
18
- "metadata": {},
19
- "outputs": [],
20
- "source": [
21
- "df = pd.DataFrame(columns=[\"log\",\"ratio_top_20_variants\", \"normalized_sequence_entropy_linear_forgetting\"]) "
22
- ]
23
- },
24
- {
25
- "cell_type": "code",
26
- "execution_count": 28,
27
- "id": "218946b7",
28
- "metadata": {},
29
- "outputs": [],
30
- "source": [
31
- "k=0\n",
32
- "for i in np.arange(0.2, 1.1,0.2):\n",
33
- " for j in np.arange(0,0.55,0.1):\n",
34
- " k+=1\n",
35
- " new_entry = pd.Series({'log':f\"objective_{k}\", \"ratio_top_20_variants\":round(i,1),\n",
36
- " \"normalized_sequence_entropy_linear_forgetting\":round(j,1)})\n",
37
- " df = pd.concat([\n",
38
- " df, \n",
39
- " pd.DataFrame([new_entry], columns=new_entry.index)]\n",
40
- " ).reset_index(drop=True)\n",
41
- " "
42
- ]
43
- },
44
- {
45
- "cell_type": "code",
46
- "execution_count": 31,
47
- "id": "b1e3bb5a",
48
- "metadata": {},
49
- "outputs": [],
50
- "source": [
51
- "df.to_csv(\"../data/grid_objectives.csv\" ,index=False)"
52
- ]
53
- },
54
- {
55
- "cell_type": "code",
56
- "execution_count": 32,
57
- "id": "5de45389",
58
- "metadata": {},
59
- "outputs": [
60
- {
61
- "data": {
62
- "text/html": [
63
- "<div>\n",
64
- "<style scoped>\n",
65
- " .dataframe tbody tr th:only-of-type {\n",
66
- " vertical-align: middle;\n",
67
- " }\n",
68
- "\n",
69
- " .dataframe tbody tr th {\n",
70
- " vertical-align: top;\n",
71
- " }\n",
72
- "\n",
73
- " .dataframe thead th {\n",
74
- " text-align: right;\n",
75
- " }\n",
76
- "</style>\n",
77
- "<table border=\"1\" class=\"dataframe\">\n",
78
- " <thead>\n",
79
- " <tr style=\"text-align: right;\">\n",
80
- " <th></th>\n",
81
- " <th>log</th>\n",
82
- " <th>ratio_top_20_variants</th>\n",
83
- " <th>normalized_sequence_entropy_linear_forgetting</th>\n",
84
- " </tr>\n",
85
- " </thead>\n",
86
- " <tbody>\n",
87
- " <tr>\n",
88
- " <th>0</th>\n",
89
- " <td>objective_1</td>\n",
90
- " <td>0.2</td>\n",
91
- " <td>0.0</td>\n",
92
- " </tr>\n",
93
- " <tr>\n",
94
- " <th>1</th>\n",
95
- " <td>objective_2</td>\n",
96
- " <td>0.2</td>\n",
97
- " <td>0.1</td>\n",
98
- " </tr>\n",
99
- " <tr>\n",
100
- " <th>2</th>\n",
101
- " <td>objective_3</td>\n",
102
- " <td>0.2</td>\n",
103
- " <td>0.2</td>\n",
104
- " </tr>\n",
105
- " <tr>\n",
106
- " <th>3</th>\n",
107
- " <td>objective_4</td>\n",
108
- " <td>0.2</td>\n",
109
- " <td>0.3</td>\n",
110
- " </tr>\n",
111
- " <tr>\n",
112
- " <th>4</th>\n",
113
- " <td>objective_5</td>\n",
114
- " <td>0.2</td>\n",
115
- " <td>0.4</td>\n",
116
- " </tr>\n",
117
- " <tr>\n",
118
- " <th>5</th>\n",
119
- " <td>objective_6</td>\n",
120
- " <td>0.2</td>\n",
121
- " <td>0.5</td>\n",
122
- " </tr>\n",
123
- " <tr>\n",
124
- " <th>6</th>\n",
125
- " <td>objective_7</td>\n",
126
- " <td>0.4</td>\n",
127
- " <td>0.0</td>\n",
128
- " </tr>\n",
129
- " <tr>\n",
130
- " <th>7</th>\n",
131
- " <td>objective_8</td>\n",
132
- " <td>0.4</td>\n",
133
- " <td>0.1</td>\n",
134
- " </tr>\n",
135
- " <tr>\n",
136
- " <th>8</th>\n",
137
- " <td>objective_9</td>\n",
138
- " <td>0.4</td>\n",
139
- " <td>0.2</td>\n",
140
- " </tr>\n",
141
- " <tr>\n",
142
- " <th>9</th>\n",
143
- " <td>objective_10</td>\n",
144
- " <td>0.4</td>\n",
145
- " <td>0.3</td>\n",
146
- " </tr>\n",
147
- " <tr>\n",
148
- " <th>10</th>\n",
149
- " <td>objective_11</td>\n",
150
- " <td>0.4</td>\n",
151
- " <td>0.4</td>\n",
152
- " </tr>\n",
153
- " <tr>\n",
154
- " <th>11</th>\n",
155
- " <td>objective_12</td>\n",
156
- " <td>0.4</td>\n",
157
- " <td>0.5</td>\n",
158
- " </tr>\n",
159
- " <tr>\n",
160
- " <th>12</th>\n",
161
- " <td>objective_13</td>\n",
162
- " <td>0.6</td>\n",
163
- " <td>0.0</td>\n",
164
- " </tr>\n",
165
- " <tr>\n",
166
- " <th>13</th>\n",
167
- " <td>objective_14</td>\n",
168
- " <td>0.6</td>\n",
169
- " <td>0.1</td>\n",
170
- " </tr>\n",
171
- " <tr>\n",
172
- " <th>14</th>\n",
173
- " <td>objective_15</td>\n",
174
- " <td>0.6</td>\n",
175
- " <td>0.2</td>\n",
176
- " </tr>\n",
177
- " <tr>\n",
178
- " <th>15</th>\n",
179
- " <td>objective_16</td>\n",
180
- " <td>0.6</td>\n",
181
- " <td>0.3</td>\n",
182
- " </tr>\n",
183
- " <tr>\n",
184
- " <th>16</th>\n",
185
- " <td>objective_17</td>\n",
186
- " <td>0.6</td>\n",
187
- " <td>0.4</td>\n",
188
- " </tr>\n",
189
- " <tr>\n",
190
- " <th>17</th>\n",
191
- " <td>objective_18</td>\n",
192
- " <td>0.6</td>\n",
193
- " <td>0.5</td>\n",
194
- " </tr>\n",
195
- " <tr>\n",
196
- " <th>18</th>\n",
197
- " <td>objective_19</td>\n",
198
- " <td>0.8</td>\n",
199
- " <td>0.0</td>\n",
200
- " </tr>\n",
201
- " <tr>\n",
202
- " <th>19</th>\n",
203
- " <td>objective_20</td>\n",
204
- " <td>0.8</td>\n",
205
- " <td>0.1</td>\n",
206
- " </tr>\n",
207
- " <tr>\n",
208
- " <th>20</th>\n",
209
- " <td>objective_21</td>\n",
210
- " <td>0.8</td>\n",
211
- " <td>0.2</td>\n",
212
- " </tr>\n",
213
- " <tr>\n",
214
- " <th>21</th>\n",
215
- " <td>objective_22</td>\n",
216
- " <td>0.8</td>\n",
217
- " <td>0.3</td>\n",
218
- " </tr>\n",
219
- " <tr>\n",
220
- " <th>22</th>\n",
221
- " <td>objective_23</td>\n",
222
- " <td>0.8</td>\n",
223
- " <td>0.4</td>\n",
224
- " </tr>\n",
225
- " <tr>\n",
226
- " <th>23</th>\n",
227
- " <td>objective_24</td>\n",
228
- " <td>0.8</td>\n",
229
- " <td>0.5</td>\n",
230
- " </tr>\n",
231
- " <tr>\n",
232
- " <th>24</th>\n",
233
- " <td>objective_25</td>\n",
234
- " <td>1.0</td>\n",
235
- " <td>0.0</td>\n",
236
- " </tr>\n",
237
- " <tr>\n",
238
- " <th>25</th>\n",
239
- " <td>objective_26</td>\n",
240
- " <td>1.0</td>\n",
241
- " <td>0.1</td>\n",
242
- " </tr>\n",
243
- " <tr>\n",
244
- " <th>26</th>\n",
245
- " <td>objective_27</td>\n",
246
- " <td>1.0</td>\n",
247
- " <td>0.2</td>\n",
248
- " </tr>\n",
249
- " <tr>\n",
250
- " <th>27</th>\n",
251
- " <td>objective_28</td>\n",
252
- " <td>1.0</td>\n",
253
- " <td>0.3</td>\n",
254
- " </tr>\n",
255
- " <tr>\n",
256
- " <th>28</th>\n",
257
- " <td>objective_29</td>\n",
258
- " <td>1.0</td>\n",
259
- " <td>0.4</td>\n",
260
- " </tr>\n",
261
- " <tr>\n",
262
- " <th>29</th>\n",
263
- " <td>objective_30</td>\n",
264
- " <td>1.0</td>\n",
265
- " <td>0.5</td>\n",
266
- " </tr>\n",
267
- " </tbody>\n",
268
- "</table>\n",
269
- "</div>"
270
- ],
271
- "text/plain": [
272
- " log ratio_top_20_variants \n",
273
- "0 objective_1 0.2 \\\n",
274
- "1 objective_2 0.2 \n",
275
- "2 objective_3 0.2 \n",
276
- "3 objective_4 0.2 \n",
277
- "4 objective_5 0.2 \n",
278
- "5 objective_6 0.2 \n",
279
- "6 objective_7 0.4 \n",
280
- "7 objective_8 0.4 \n",
281
- "8 objective_9 0.4 \n",
282
- "9 objective_10 0.4 \n",
283
- "10 objective_11 0.4 \n",
284
- "11 objective_12 0.4 \n",
285
- "12 objective_13 0.6 \n",
286
- "13 objective_14 0.6 \n",
287
- "14 objective_15 0.6 \n",
288
- "15 objective_16 0.6 \n",
289
- "16 objective_17 0.6 \n",
290
- "17 objective_18 0.6 \n",
291
- "18 objective_19 0.8 \n",
292
- "19 objective_20 0.8 \n",
293
- "20 objective_21 0.8 \n",
294
- "21 objective_22 0.8 \n",
295
- "22 objective_23 0.8 \n",
296
- "23 objective_24 0.8 \n",
297
- "24 objective_25 1.0 \n",
298
- "25 objective_26 1.0 \n",
299
- "26 objective_27 1.0 \n",
300
- "27 objective_28 1.0 \n",
301
- "28 objective_29 1.0 \n",
302
- "29 objective_30 1.0 \n",
303
- "\n",
304
- " normalized_sequence_entropy_linear_forgetting \n",
305
- "0 0.0 \n",
306
- "1 0.1 \n",
307
- "2 0.2 \n",
308
- "3 0.3 \n",
309
- "4 0.4 \n",
310
- "5 0.5 \n",
311
- "6 0.0 \n",
312
- "7 0.1 \n",
313
- "8 0.2 \n",
314
- "9 0.3 \n",
315
- "10 0.4 \n",
316
- "11 0.5 \n",
317
- "12 0.0 \n",
318
- "13 0.1 \n",
319
- "14 0.2 \n",
320
- "15 0.3 \n",
321
- "16 0.4 \n",
322
- "17 0.5 \n",
323
- "18 0.0 \n",
324
- "19 0.1 \n",
325
- "20 0.2 \n",
326
- "21 0.3 \n",
327
- "22 0.4 \n",
328
- "23 0.5 \n",
329
- "24 0.0 \n",
330
- "25 0.1 \n",
331
- "26 0.2 \n",
332
- "27 0.3 \n",
333
- "28 0.4 \n",
334
- "29 0.5 "
335
- ]
336
- },
337
- "execution_count": 32,
338
- "metadata": {},
339
- "output_type": "execute_result"
340
- }
341
- ],
342
- "source": [
343
- "df"
344
- ]
345
- },
346
- {
347
- "cell_type": "code",
348
- "execution_count": null,
349
- "id": "d726a5ae",
350
- "metadata": {},
351
- "outputs": [],
352
- "source": []
353
- }
354
- ],
355
- "metadata": {
356
- "kernelspec": {
357
- "display_name": "Python 3 (ipykernel)",
358
- "language": "python",
359
- "name": "python3"
360
- },
361
- "language_info": {
362
- "codemirror_mode": {
363
- "name": "ipython",
364
- "version": 3
365
- },
366
- "file_extension": ".py",
367
- "mimetype": "text/x-python",
368
- "name": "python",
369
- "nbconvert_exporter": "python",
370
- "pygments_lexer": "ipython3",
371
- "version": "3.9.7"
372
- }
373
- },
374
- "nbformat": 4,
375
- "nbformat_minor": 5
376
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
notebooks/.ipynb_checkpoints/oversampling-checkpoint.ipynb DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "cells": [],
3
- "metadata": {},
4
- "nbformat": 4,
5
- "nbformat_minor": 5
6
- }
 
 
 
 
 
 
 
notebooks/.ipynb_checkpoints/performance_feature_correlation-checkpoint.ipynb DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "cells": [],
3
- "metadata": {},
4
- "nbformat": 4,
5
- "nbformat_minor": 5
6
- }
 
 
 
 
 
 
 
notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/statistics_tasks_to_datasets-checkpoint.ipynb DELETED
@@ -1,818 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 4,
6
- "id": "4827785f",
7
- "metadata": {},
8
- "outputs": [
9
- {
10
- "data": {
11
- "text/html": [
12
- "<div>\n",
13
- "<style scoped>\n",
14
- " .dataframe tbody tr th:only-of-type {\n",
15
- " vertical-align: middle;\n",
16
- " }\n",
17
- "\n",
18
- " .dataframe tbody tr th {\n",
19
- " vertical-align: top;\n",
20
- " }\n",
21
- "\n",
22
- " .dataframe thead th {\n",
23
- " text-align: right;\n",
24
- " }\n",
25
- "</style>\n",
26
- "<table border=\"1\" class=\"dataframe\">\n",
27
- " <thead>\n",
28
- " <tr style=\"text-align: right;\">\n",
29
- " <th></th>\n",
30
- " <th>Name</th>\n",
31
- " <th>Short description</th>\n",
32
- " <th>data link</th>\n",
33
- " <th>challenge link</th>\n",
34
- " <th>Citations (Stand Februar 2023)</th>\n",
35
- " <th>Publications</th>\n",
36
- " <th>Process Discovery/ Declarative</th>\n",
37
- " <th>Conformance Checking / Alignment / Replay</th>\n",
38
- " <th>Online / Streaming / Realtime</th>\n",
39
- " <th>Performance (Analysis) / Temporal / Time</th>\n",
40
- " <th>Predict(ive)/ Monitoring/ Prescriptive</th>\n",
41
- " <th>Trace clustering / Clustering</th>\n",
42
- " <th>Preprocessing / Event Abstraction / Event Data Correlation</th>\n",
43
- " <th>Further keywords:</th>\n",
44
- " </tr>\n",
45
- " </thead>\n",
46
- " <tbody>\n",
47
- " <tr>\n",
48
- " <th>0</th>\n",
49
- " <td>Sepsis Cases - Event Log</td>\n",
50
- " <td>This real-life event log contains events of se...</td>\n",
51
- " <td>https://data.4tu.nl/articles/dataset/Sepsis_Ca...</td>\n",
52
- " <td>https://data.4tu.nl/articles/dataset/Sepsis_Ca...</td>\n",
53
- " <td>61</td>\n",
54
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
55
- " <td>17</td>\n",
56
- " <td>7</td>\n",
57
- " <td>4</td>\n",
58
- " <td>1</td>\n",
59
- " <td>8</td>\n",
60
- " <td>2</td>\n",
61
- " <td>2</td>\n",
62
- " <td>(machine) learning, (online process) monitorin...</td>\n",
63
- " </tr>\n",
64
- " <tr>\n",
65
- " <th>1</th>\n",
66
- " <td>BPI 2017 - Offer Log</td>\n",
67
- " <td>Contains data from a financial institute inclu...</td>\n",
68
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
69
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2017:ch...</td>\n",
70
- " <td>4</td>\n",
71
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
72
- " <td>1</td>\n",
73
- " <td>0</td>\n",
74
- " <td>0</td>\n",
75
- " <td>1</td>\n",
76
- " <td>1</td>\n",
77
- " <td>0</td>\n",
78
- " <td>0</td>\n",
79
- " <td>(machine) learning, cloud computing</td>\n",
80
- " </tr>\n",
81
- " <tr>\n",
82
- " <th>2</th>\n",
83
- " <td>Road Traffic Fine Management Process (not BPI)</td>\n",
84
- " <td>A real-life event log taken from an informatio...</td>\n",
85
- " <td>https://data.4tu.nl/articles/dataset/Road_Traf...</td>\n",
86
- " <td>NaN</td>\n",
87
- " <td>95</td>\n",
88
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
89
- " <td>32</td>\n",
90
- " <td>9</td>\n",
91
- " <td>4</td>\n",
92
- " <td>8</td>\n",
93
- " <td>15</td>\n",
94
- " <td>1</td>\n",
95
- " <td>2</td>\n",
96
- " <td>alarm-based prescriptive process monitoring, b...</td>\n",
97
- " </tr>\n",
98
- " <tr>\n",
99
- " <th>3</th>\n",
100
- " <td>BPI 2011</td>\n",
101
- " <td>Contains data from from a Dutch Academic Hospi...</td>\n",
102
- " <td>https://data.4tu.nl/articles/dataset/Real-life...</td>\n",
103
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2011:ch...</td>\n",
104
- " <td>57</td>\n",
105
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
106
- " <td>13</td>\n",
107
- " <td>1</td>\n",
108
- " <td>3</td>\n",
109
- " <td>4</td>\n",
110
- " <td>12</td>\n",
111
- " <td>4</td>\n",
112
- " <td>1</td>\n",
113
- " <td>(compliance) monitoring, (machine) learning, d...</td>\n",
114
- " </tr>\n",
115
- " <tr>\n",
116
- " <th>4</th>\n",
117
- " <td>BPI 2012</td>\n",
118
- " <td>Contains the event log of an application proce...</td>\n",
119
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
120
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2012:ch...</td>\n",
121
- " <td>151</td>\n",
122
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
123
- " <td>40</td>\n",
124
- " <td>15</td>\n",
125
- " <td>4</td>\n",
126
- " <td>13</td>\n",
127
- " <td>46</td>\n",
128
- " <td>0</td>\n",
129
- " <td>1</td>\n",
130
- " <td>(in)frequent patterns in process models, (mach...</td>\n",
131
- " </tr>\n",
132
- " <tr>\n",
133
- " <th>5</th>\n",
134
- " <td>BPI 2013 - Open Problems</td>\n",
135
- " <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
136
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
137
- " <td>https://www.win.tue.nl/bpi/2013/challenge.html</td>\n",
138
- " <td>6</td>\n",
139
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
140
- " <td>1</td>\n",
141
- " <td>0</td>\n",
142
- " <td>0</td>\n",
143
- " <td>0</td>\n",
144
- " <td>1</td>\n",
145
- " <td>0</td>\n",
146
- " <td>0</td>\n",
147
- " <td>(in)frequent patterns in process models, (mach...</td>\n",
148
- " </tr>\n",
149
- " <tr>\n",
150
- " <th>6</th>\n",
151
- " <td>BPI 2013 - Closed Problems</td>\n",
152
- " <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
153
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
154
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2013:ch...</td>\n",
155
- " <td>12</td>\n",
156
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
157
- " <td>3</td>\n",
158
- " <td>2</td>\n",
159
- " <td>1</td>\n",
160
- " <td>2</td>\n",
161
- " <td>0</td>\n",
162
- " <td>0</td>\n",
163
- " <td>3</td>\n",
164
- " <td>(in)frequent patterns in process models</td>\n",
165
- " </tr>\n",
166
- " <tr>\n",
167
- " <th>7</th>\n",
168
- " <td>BPI 2013 - Incidents</td>\n",
169
- " <td>The log contains events from an incident and p...</td>\n",
170
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
171
- " <td>https://www.win.tue.nl/bpi/2013/challenge.html</td>\n",
172
- " <td>36</td>\n",
173
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
174
- " <td>14</td>\n",
175
- " <td>5</td>\n",
176
- " <td>1</td>\n",
177
- " <td>1</td>\n",
178
- " <td>7</td>\n",
179
- " <td>0</td>\n",
180
- " <td>2</td>\n",
181
- " <td>(machine) learning, rule mining</td>\n",
182
- " </tr>\n",
183
- " <tr>\n",
184
- " <th>8</th>\n",
185
- " <td>BPI 2014 - Incident Records</td>\n",
186
- " <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
187
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
188
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2014:ch...</td>\n",
189
- " <td>5</td>\n",
190
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
191
- " <td>1</td>\n",
192
- " <td>0</td>\n",
193
- " <td>0</td>\n",
194
- " <td>0</td>\n",
195
- " <td>0</td>\n",
196
- " <td>0</td>\n",
197
- " <td>0</td>\n",
198
- " <td>privacy preservation, security</td>\n",
199
- " </tr>\n",
200
- " <tr>\n",
201
- " <th>9</th>\n",
202
- " <td>BPI 2014 - Interaction Records</td>\n",
203
- " <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
204
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
205
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2014:ch...</td>\n",
206
- " <td>1</td>\n",
207
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
208
- " <td>0</td>\n",
209
- " <td>0</td>\n",
210
- " <td>0</td>\n",
211
- " <td>0</td>\n",
212
- " <td>0</td>\n",
213
- " <td>0</td>\n",
214
- " <td>0</td>\n",
215
- " <td>(machine) learning, hidden Markov models</td>\n",
216
- " </tr>\n",
217
- " <tr>\n",
218
- " <th>10</th>\n",
219
- " <td>BPI 2015 - Log 3</td>\n",
220
- " <td>Provided by 5 Dutch municipalities. The data c...</td>\n",
221
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
222
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2015:ch...</td>\n",
223
- " <td>1</td>\n",
224
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
225
- " <td>0</td>\n",
226
- " <td>0</td>\n",
227
- " <td>0</td>\n",
228
- " <td>0</td>\n",
229
- " <td>1</td>\n",
230
- " <td>0</td>\n",
231
- " <td>0</td>\n",
232
- " <td>specification-driven predictive business proce...</td>\n",
233
- " </tr>\n",
234
- " <tr>\n",
235
- " <th>11</th>\n",
236
- " <td>BPI 2015 - Log 1</td>\n",
237
- " <td>Provided by 5 Dutch municipalities. The data c...</td>\n",
238
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
239
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2015:ch...</td>\n",
240
- " <td>8</td>\n",
241
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
242
- " <td>1</td>\n",
243
- " <td>1</td>\n",
244
- " <td>0</td>\n",
245
- " <td>0</td>\n",
246
- " <td>3</td>\n",
247
- " <td>0</td>\n",
248
- " <td>3</td>\n",
249
- " <td>(machine) learning</td>\n",
250
- " </tr>\n",
251
- " <tr>\n",
252
- " <th>12</th>\n",
253
- " <td>BPI 2016 - Clicks Logged In</td>\n",
254
- " <td>Contains clicks of users that are logged in fr...</td>\n",
255
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
256
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2016:ch...</td>\n",
257
- " <td>1</td>\n",
258
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
259
- " <td>1</td>\n",
260
- " <td>0</td>\n",
261
- " <td>1</td>\n",
262
- " <td>0</td>\n",
263
- " <td>0</td>\n",
264
- " <td>0</td>\n",
265
- " <td>0</td>\n",
266
- " <td>automation</td>\n",
267
- " </tr>\n",
268
- " <tr>\n",
269
- " <th>13</th>\n",
270
- " <td>BPI 2017 - Application Log</td>\n",
271
- " <td>Contains data from a financial institute inclu...</td>\n",
272
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
273
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2017:ch...</td>\n",
274
- " <td>73</td>\n",
275
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
276
- " <td>11</td>\n",
277
- " <td>5</td>\n",
278
- " <td>2</td>\n",
279
- " <td>14</td>\n",
280
- " <td>23</td>\n",
281
- " <td>1</td>\n",
282
- " <td>1</td>\n",
283
- " <td>(machine) learning, alarm-based prescriptive p...</td>\n",
284
- " </tr>\n",
285
- " <tr>\n",
286
- " <th>14</th>\n",
287
- " <td>BPI 2018</td>\n",
288
- " <td>The process covers the handling of application...</td>\n",
289
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
290
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2018:ch...</td>\n",
291
- " <td>26</td>\n",
292
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
293
- " <td>7</td>\n",
294
- " <td>1</td>\n",
295
- " <td>2</td>\n",
296
- " <td>0</td>\n",
297
- " <td>8</td>\n",
298
- " <td>0</td>\n",
299
- " <td>2</td>\n",
300
- " <td>(machine) learning, automation</td>\n",
301
- " </tr>\n",
302
- " <tr>\n",
303
- " <th>15</th>\n",
304
- " <td>BPI 2020 - Travel Permits</td>\n",
305
- " <td>Contains 2 years of data from the reimbursemen...</td>\n",
306
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
307
- " <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
308
- " <td>2</td>\n",
309
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
310
- " <td>0</td>\n",
311
- " <td>0</td>\n",
312
- " <td>0</td>\n",
313
- " <td>1</td>\n",
314
- " <td>0</td>\n",
315
- " <td>0</td>\n",
316
- " <td>0</td>\n",
317
- " <td>stage-based process performance analysis</td>\n",
318
- " </tr>\n",
319
- " <tr>\n",
320
- " <th>16</th>\n",
321
- " <td>BPI 2019</td>\n",
322
- " <td>Contains the purchase order handling process o...</td>\n",
323
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
324
- " <td>https://icpmconference.org/2019/icpm-2019/cont...</td>\n",
325
- " <td>35</td>\n",
326
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
327
- " <td>3</td>\n",
328
- " <td>1</td>\n",
329
- " <td>6</td>\n",
330
- " <td>6</td>\n",
331
- " <td>9</td>\n",
332
- " <td>4</td>\n",
333
- " <td>1</td>\n",
334
- " <td>(online process) monitoring, remaining time pr...</td>\n",
335
- " </tr>\n",
336
- " <tr>\n",
337
- " <th>17</th>\n",
338
- " <td>BPI 2020 - International Declarations</td>\n",
339
- " <td>Contains 2 years of data from the reimbursemen...</td>\n",
340
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
341
- " <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
342
- " <td>2</td>\n",
343
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
344
- " <td>0</td>\n",
345
- " <td>0</td>\n",
346
- " <td>0</td>\n",
347
- " <td>1</td>\n",
348
- " <td>2</td>\n",
349
- " <td>0</td>\n",
350
- " <td>0</td>\n",
351
- " <td>(machine) learning, remaining time prediction</td>\n",
352
- " </tr>\n",
353
- " <tr>\n",
354
- " <th>18</th>\n",
355
- " <td>BPI 2020 - Domestic Declarations</td>\n",
356
- " <td>Contains 2 years of data from the reimbursemen...</td>\n",
357
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
358
- " <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
359
- " <td>7</td>\n",
360
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
361
- " <td>0</td>\n",
362
- " <td>2</td>\n",
363
- " <td>2</td>\n",
364
- " <td>2</td>\n",
365
- " <td>3</td>\n",
366
- " <td>0</td>\n",
367
- " <td>0</td>\n",
368
- " <td>(machine) learning, remaining time prediction</td>\n",
369
- " </tr>\n",
370
- " <tr>\n",
371
- " <th>19</th>\n",
372
- " <td>BPI 2020 - Prepaid Travel Cost</td>\n",
373
- " <td>Contains 2 years of data from the reimbursemen...</td>\n",
374
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
375
- " <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
376
- " <td>2</td>\n",
377
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
378
- " <td>0</td>\n",
379
- " <td>0</td>\n",
380
- " <td>0</td>\n",
381
- " <td>0</td>\n",
382
- " <td>0</td>\n",
383
- " <td>0</td>\n",
384
- " <td>0</td>\n",
385
- " <td>multi-perspective</td>\n",
386
- " </tr>\n",
387
- " <tr>\n",
388
- " <th>20</th>\n",
389
- " <td>Helpdesk</td>\n",
390
- " <td>Ticketing management process of the Help desk ...</td>\n",
391
- " <td>https://data.4tu.nl/articles/dataset/Dataset_b...</td>\n",
392
- " <td>NaN</td>\n",
393
- " <td>20</td>\n",
394
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
395
- " <td>4</td>\n",
396
- " <td>1</td>\n",
397
- " <td>3</td>\n",
398
- " <td>1</td>\n",
399
- " <td>8</td>\n",
400
- " <td>0</td>\n",
401
- " <td>0</td>\n",
402
- " <td>(machine) learning, drift detection</td>\n",
403
- " </tr>\n",
404
- " <tr>\n",
405
- " <th>21</th>\n",
406
- " <td>Receipt phase of an environmental permit appli...</td>\n",
407
- " <td>Data originates from the CoSeLoG project where...</td>\n",
408
- " <td>https://data.4tu.nl/articles/dataset/Receipt_p...</td>\n",
409
- " <td>NaN</td>\n",
410
- " <td>15</td>\n",
411
- " <td>https://data.4tu.nl/articles/dataset/Receipt_p...</td>\n",
412
- " <td>-1</td>\n",
413
- " <td>-1</td>\n",
414
- " <td>-1</td>\n",
415
- " <td>-1</td>\n",
416
- " <td>-1</td>\n",
417
- " <td>-1</td>\n",
418
- " <td>-1</td>\n",
419
- " <td>NaN</td>\n",
420
- " </tr>\n",
421
- " <tr>\n",
422
- " <th>22</th>\n",
423
- " <td>Environmental permit application process (β€˜WAB...</td>\n",
424
- " <td>Data originates from the CoSeLoG project where...</td>\n",
425
- " <td>https://data.4tu.nl/articles/dataset/Environme...</td>\n",
426
- " <td>NaN</td>\n",
427
- " <td>2</td>\n",
428
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
429
- " <td>0</td>\n",
430
- " <td>0</td>\n",
431
- " <td>0</td>\n",
432
- " <td>0</td>\n",
433
- " <td>1</td>\n",
434
- " <td>0</td>\n",
435
- " <td>0</td>\n",
436
- " <td>predictions with a-priori knowledge</td>\n",
437
- " </tr>\n",
438
- " <tr>\n",
439
- " <th>23</th>\n",
440
- " <td>Environmental permit application process (β€˜WAB...</td>\n",
441
- " <td>Data originates from the CoSeLoG project where...</td>\n",
442
- " <td>https://data.4tu.nl/articles/dataset/Environme...</td>\n",
443
- " <td>NaN</td>\n",
444
- " <td>2</td>\n",
445
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
446
- " <td>1</td>\n",
447
- " <td>0</td>\n",
448
- " <td>0</td>\n",
449
- " <td>0</td>\n",
450
- " <td>0</td>\n",
451
- " <td>0</td>\n",
452
- " <td>0</td>\n",
453
- " <td>multidimensional process mining, process cubes</td>\n",
454
- " </tr>\n",
455
- " <tr>\n",
456
- " <th>24</th>\n",
457
- " <td>NaN</td>\n",
458
- " <td>NaN</td>\n",
459
- " <td>NaN</td>\n",
460
- " <td>NaN</td>\n",
461
- " <td>NaN</td>\n",
462
- " <td>NaN</td>\n",
463
- " <td>NaN</td>\n",
464
- " <td>NaN</td>\n",
465
- " <td>NaN</td>\n",
466
- " <td>NaN</td>\n",
467
- " <td>NaN</td>\n",
468
- " <td>NaN</td>\n",
469
- " <td>NaN</td>\n",
470
- " <td>NaN</td>\n",
471
- " </tr>\n",
472
- " </tbody>\n",
473
- "</table>\n",
474
- "</div>"
475
- ],
476
- "text/plain": [
477
- " Name \\\n",
478
- "0 Sepsis Cases - Event Log \n",
479
- "1 BPI 2017 - Offer Log \n",
480
- "2 Road Traffic Fine Management Process (not BPI) \n",
481
- "3 BPI 2011 \n",
482
- "4 BPI 2012 \n",
483
- "5 BPI 2013 - Open Problems \n",
484
- "6 BPI 2013 - Closed Problems \n",
485
- "7 BPI 2013 - Incidents \n",
486
- "8 BPI 2014 - Incident Records \n",
487
- "9 BPI 2014 - Interaction Records \n",
488
- "10 BPI 2015 - Log 3 \n",
489
- "11 BPI 2015 - Log 1 \n",
490
- "12 BPI 2016 - Clicks Logged In \n",
491
- "13 BPI 2017 - Application Log \n",
492
- "14 BPI 2018 \n",
493
- "15 BPI 2020 - Travel Permits \n",
494
- "16 BPI 2019 \n",
495
- "17 BPI 2020 - International Declarations \n",
496
- "18 BPI 2020 - Domestic Declarations \n",
497
- "19 BPI 2020 - Prepaid Travel Cost \n",
498
- "20 Helpdesk \n",
499
- "21 Receipt phase of an environmental permit appli... \n",
500
- "22 Environmental permit application process (β€˜WAB... \n",
501
- "23 Environmental permit application process (β€˜WAB... \n",
502
- "24 NaN \n",
503
- "\n",
504
- " Short description \\\n",
505
- "0 This real-life event log contains events of se... \n",
506
- "1 Contains data from a financial institute inclu... \n",
507
- "2 A real-life event log taken from an informatio... \n",
508
- "3 Contains data from from a Dutch Academic Hospi... \n",
509
- "4 Contains the event log of an application proce... \n",
510
- "5 Rabobank Group ICT implemented ITIL processes ... \n",
511
- "6 Rabobank Group ICT implemented ITIL processes ... \n",
512
- "7 The log contains events from an incident and p... \n",
513
- "8 Rabobank Group ICT implemented ITIL processes ... \n",
514
- "9 Rabobank Group ICT implemented ITIL processes ... \n",
515
- "10 Provided by 5 Dutch municipalities. The data c... \n",
516
- "11 Provided by 5 Dutch municipalities. The data c... \n",
517
- "12 Contains clicks of users that are logged in fr... \n",
518
- "13 Contains data from a financial institute inclu... \n",
519
- "14 The process covers the handling of application... \n",
520
- "15 Contains 2 years of data from the reimbursemen... \n",
521
- "16 Contains the purchase order handling process o... \n",
522
- "17 Contains 2 years of data from the reimbursemen... \n",
523
- "18 Contains 2 years of data from the reimbursemen... \n",
524
- "19 Contains 2 years of data from the reimbursemen... \n",
525
- "20 Ticketing management process of the Help desk ... \n",
526
- "21 Data originates from the CoSeLoG project where... \n",
527
- "22 Data originates from the CoSeLoG project where... \n",
528
- "23 Data originates from the CoSeLoG project where... \n",
529
- "24 NaN \n",
530
- "\n",
531
- " data link \\\n",
532
- "0 https://data.4tu.nl/articles/dataset/Sepsis_Ca... \n",
533
- "1 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
534
- "2 https://data.4tu.nl/articles/dataset/Road_Traf... \n",
535
- "3 https://data.4tu.nl/articles/dataset/Real-life... \n",
536
- "4 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
537
- "5 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
538
- "6 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
539
- "7 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
540
- "8 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
541
- "9 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
542
- "10 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
543
- "11 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
544
- "12 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
545
- "13 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
546
- "14 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
547
- "15 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
548
- "16 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
549
- "17 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
550
- "18 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
551
- "19 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
552
- "20 https://data.4tu.nl/articles/dataset/Dataset_b... \n",
553
- "21 https://data.4tu.nl/articles/dataset/Receipt_p... \n",
554
- "22 https://data.4tu.nl/articles/dataset/Environme... \n",
555
- "23 https://data.4tu.nl/articles/dataset/Environme... \n",
556
- "24 NaN \n",
557
- "\n",
558
- " challenge link \\\n",
559
- "0 https://data.4tu.nl/articles/dataset/Sepsis_Ca... \n",
560
- "1 https://www.win.tue.nl/bpi/doku.php?id=2017:ch... \n",
561
- "2 NaN \n",
562
- "3 https://www.win.tue.nl/bpi/doku.php?id=2011:ch... \n",
563
- "4 https://www.win.tue.nl/bpi/doku.php?id=2012:ch... \n",
564
- "5 https://www.win.tue.nl/bpi/2013/challenge.html \n",
565
- "6 https://www.win.tue.nl/bpi/doku.php?id=2013:ch... \n",
566
- "7 https://www.win.tue.nl/bpi/2013/challenge.html \n",
567
- "8 https://www.win.tue.nl/bpi/doku.php?id=2014:ch... \n",
568
- "9 https://www.win.tue.nl/bpi/doku.php?id=2014:ch... \n",
569
- "10 https://www.win.tue.nl/bpi/doku.php?id=2015:ch... \n",
570
- "11 https://www.win.tue.nl/bpi/doku.php?id=2015:ch... \n",
571
- "12 https://www.win.tue.nl/bpi/doku.php?id=2016:ch... \n",
572
- "13 https://www.win.tue.nl/bpi/doku.php?id=2017:ch... \n",
573
- "14 https://www.win.tue.nl/bpi/doku.php?id=2018:ch... \n",
574
- "15 https://icpmconference.org/2020/bpi-challenge/ \n",
575
- "16 https://icpmconference.org/2019/icpm-2019/cont... \n",
576
- "17 https://icpmconference.org/2020/bpi-challenge/ \n",
577
- "18 https://icpmconference.org/2020/bpi-challenge/ \n",
578
- "19 https://icpmconference.org/2020/bpi-challenge/ \n",
579
- "20 NaN \n",
580
- "21 NaN \n",
581
- "22 NaN \n",
582
- "23 NaN \n",
583
- "24 NaN \n",
584
- "\n",
585
- " Citations (Stand Februar 2023) \\\n",
586
- "0 61 \n",
587
- "1 4 \n",
588
- "2 95 \n",
589
- "3 57 \n",
590
- "4 151 \n",
591
- "5 6 \n",
592
- "6 12 \n",
593
- "7 36 \n",
594
- "8 5 \n",
595
- "9 1 \n",
596
- "10 1 \n",
597
- "11 8 \n",
598
- "12 1 \n",
599
- "13 73 \n",
600
- "14 26 \n",
601
- "15 2 \n",
602
- "16 35 \n",
603
- "17 2 \n",
604
- "18 7 \n",
605
- "19 2 \n",
606
- "20 20 \n",
607
- "21 15 \n",
608
- "22 2 \n",
609
- "23 2 \n",
610
- "24 NaN \n",
611
- "\n",
612
- " Publications \\\n",
613
- "0 https://app.dimensions.ai/discover/publication... \n",
614
- "1 https://app.dimensions.ai/discover/publication... \n",
615
- "2 https://app.dimensions.ai/discover/publication... \n",
616
- "3 https://app.dimensions.ai/discover/publication... \n",
617
- "4 https://app.dimensions.ai/discover/publication... \n",
618
- "5 https://app.dimensions.ai/discover/publication... \n",
619
- "6 https://app.dimensions.ai/discover/publication... \n",
620
- "7 https://app.dimensions.ai/discover/publication... \n",
621
- "8 https://app.dimensions.ai/discover/publication... \n",
622
- "9 https://app.dimensions.ai/discover/publication... \n",
623
- "10 https://app.dimensions.ai/discover/publication... \n",
624
- "11 https://app.dimensions.ai/discover/publication... \n",
625
- "12 https://app.dimensions.ai/discover/publication... \n",
626
- "13 https://app.dimensions.ai/discover/publication... \n",
627
- "14 https://app.dimensions.ai/discover/publication... \n",
628
- "15 https://app.dimensions.ai/discover/publication... \n",
629
- "16 https://app.dimensions.ai/discover/publication... \n",
630
- "17 https://app.dimensions.ai/discover/publication... \n",
631
- "18 https://app.dimensions.ai/discover/publication... \n",
632
- "19 https://app.dimensions.ai/discover/publication... \n",
633
- "20 https://app.dimensions.ai/discover/publication... \n",
634
- "21 https://data.4tu.nl/articles/dataset/Receipt_p... \n",
635
- "22 https://app.dimensions.ai/discover/publication... \n",
636
- "23 https://app.dimensions.ai/discover/publication... \n",
637
- "24 NaN \n",
638
- "\n",
639
- " Process Discovery/ Declarative Conformance Checking / Alignment / Replay \\\n",
640
- "0 17 7 \n",
641
- "1 1 0 \n",
642
- "2 32 9 \n",
643
- "3 13 1 \n",
644
- "4 40 15 \n",
645
- "5 1 0 \n",
646
- "6 3 2 \n",
647
- "7 14 5 \n",
648
- "8 1 0 \n",
649
- "9 0 0 \n",
650
- "10 0 0 \n",
651
- "11 1 1 \n",
652
- "12 1 0 \n",
653
- "13 11 5 \n",
654
- "14 7 1 \n",
655
- "15 0 0 \n",
656
- "16 3 1 \n",
657
- "17 0 0 \n",
658
- "18 0 2 \n",
659
- "19 0 0 \n",
660
- "20 4 1 \n",
661
- "21 -1 -1 \n",
662
- "22 0 0 \n",
663
- "23 1 0 \n",
664
- "24 NaN NaN \n",
665
- "\n",
666
- " Online / Streaming / Realtime Performance (Analysis) / Temporal / Time \\\n",
667
- "0 4 1 \n",
668
- "1 0 1 \n",
669
- "2 4 8 \n",
670
- "3 3 4 \n",
671
- "4 4 13 \n",
672
- "5 0 0 \n",
673
- "6 1 2 \n",
674
- "7 1 1 \n",
675
- "8 0 0 \n",
676
- "9 0 0 \n",
677
- "10 0 0 \n",
678
- "11 0 0 \n",
679
- "12 1 0 \n",
680
- "13 2 14 \n",
681
- "14 2 0 \n",
682
- "15 0 1 \n",
683
- "16 6 6 \n",
684
- "17 0 1 \n",
685
- "18 2 2 \n",
686
- "19 0 0 \n",
687
- "20 3 1 \n",
688
- "21 -1 -1 \n",
689
- "22 0 0 \n",
690
- "23 0 0 \n",
691
- "24 NaN NaN \n",
692
- "\n",
693
- " Predict(ive)/ Monitoring/ Prescriptive Trace clustering / Clustering \\\n",
694
- "0 8 2 \n",
695
- "1 1 0 \n",
696
- "2 15 1 \n",
697
- "3 12 4 \n",
698
- "4 46 0 \n",
699
- "5 1 0 \n",
700
- "6 0 0 \n",
701
- "7 7 0 \n",
702
- "8 0 0 \n",
703
- "9 0 0 \n",
704
- "10 1 0 \n",
705
- "11 3 0 \n",
706
- "12 0 0 \n",
707
- "13 23 1 \n",
708
- "14 8 0 \n",
709
- "15 0 0 \n",
710
- "16 9 4 \n",
711
- "17 2 0 \n",
712
- "18 3 0 \n",
713
- "19 0 0 \n",
714
- "20 8 0 \n",
715
- "21 -1 -1 \n",
716
- "22 1 0 \n",
717
- "23 0 0 \n",
718
- "24 NaN NaN \n",
719
- "\n",
720
- " Preprocessing / Event Abstraction / Event Data Correlation \\\n",
721
- "0 2 \n",
722
- "1 0 \n",
723
- "2 2 \n",
724
- "3 1 \n",
725
- "4 1 \n",
726
- "5 0 \n",
727
- "6 3 \n",
728
- "7 2 \n",
729
- "8 0 \n",
730
- "9 0 \n",
731
- "10 0 \n",
732
- "11 3 \n",
733
- "12 0 \n",
734
- "13 1 \n",
735
- "14 2 \n",
736
- "15 0 \n",
737
- "16 1 \n",
738
- "17 0 \n",
739
- "18 0 \n",
740
- "19 0 \n",
741
- "20 0 \n",
742
- "21 -1 \n",
743
- "22 0 \n",
744
- "23 0 \n",
745
- "24 NaN \n",
746
- "\n",
747
- " Further keywords: \n",
748
- "0 (machine) learning, (online process) monitorin... \n",
749
- "1 (machine) learning, cloud computing \n",
750
- "2 alarm-based prescriptive process monitoring, b... \n",
751
- "3 (compliance) monitoring, (machine) learning, d... \n",
752
- "4 (in)frequent patterns in process models, (mach... \n",
753
- "5 (in)frequent patterns in process models, (mach... \n",
754
- "6 (in)frequent patterns in process models \n",
755
- "7 (machine) learning, rule mining \n",
756
- "8 privacy preservation, security \n",
757
- "9 (machine) learning, hidden Markov models \n",
758
- "10 specification-driven predictive business proce... \n",
759
- "11 (machine) learning \n",
760
- "12 automation \n",
761
- "13 (machine) learning, alarm-based prescriptive p... \n",
762
- "14 (machine) learning, automation \n",
763
- "15 stage-based process performance analysis \n",
764
- "16 (online process) monitoring, remaining time pr... \n",
765
- "17 (machine) learning, remaining time prediction \n",
766
- "18 (machine) learning, remaining time prediction \n",
767
- "19 multi-perspective \n",
768
- "20 (machine) learning, drift detection \n",
769
- "21 NaN \n",
770
- "22 predictions with a-priori knowledge \n",
771
- "23 multidimensional process mining, process cubes \n",
772
- "24 NaN "
773
- ]
774
- },
775
- "execution_count": 4,
776
- "metadata": {},
777
- "output_type": "execute_result"
778
- }
779
- ],
780
- "source": [
781
- "#import pm4py\n",
782
- "import pandas as pd\n",
783
- "INPUT_PATH = \"../data/mappings.csv\"\n",
784
- "df = pd.read_csv(INPUT_PATH, sep = \";\", dtype = \"unicode\")\n",
785
- "df"
786
- ]
787
- },
788
- {
789
- "cell_type": "code",
790
- "execution_count": null,
791
- "id": "04a97f37",
792
- "metadata": {},
793
- "outputs": [],
794
- "source": []
795
- }
796
- ],
797
- "metadata": {
798
- "kernelspec": {
799
- "display_name": "Python 3 (ipykernel)",
800
- "language": "python",
801
- "name": "python3"
802
- },
803
- "language_info": {
804
- "codemirror_mode": {
805
- "name": "ipython",
806
- "version": 3
807
- },
808
- "file_extension": ".py",
809
- "mimetype": "text/x-python",
810
- "name": "python",
811
- "nbconvert_exporter": "python",
812
- "pygments_lexer": "ipython3",
813
- "version": "3.10.7"
814
- }
815
- },
816
- "nbformat": 4,
817
- "nbformat_minor": 5
818
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
notebooks/.ipynb_checkpoints/test_feed-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/benchmarking_process_discovery.ipynb CHANGED
@@ -1277,7 +1277,7 @@
1277
  "\n",
1278
  "import sys\n",
1279
  "import os\n",
1280
- "sys.path.append(os.path.dirname(\"../tag/utils/io_helpers.py\"))\n",
1281
  "from io_helpers import get_keys_abbreviation\n",
1282
  "\n",
1283
  "print(benchmarked_ft.shape, benchmarked_pd.shape)\n",
@@ -1422,7 +1422,7 @@
1422
  "name": "python",
1423
  "nbconvert_exporter": "python",
1424
  "pygments_lexer": "ipython3",
1425
- "version": "3.9.7"
1426
  }
1427
  },
1428
  "nbformat": 4,
 
1277
  "\n",
1278
  "import sys\n",
1279
  "import os\n",
1280
+ "sys.path.append(os.path.dirname(\"../gedi/utils/io_helpers.py\"))\n",
1281
  "from io_helpers import get_keys_abbreviation\n",
1282
  "\n",
1283
  "print(benchmarked_ft.shape, benchmarked_pd.shape)\n",
 
1422
  "name": "python",
1423
  "nbconvert_exporter": "python",
1424
  "pygments_lexer": "ipython3",
1425
+ "version": "3.9.19"
1426
  }
1427
  },
1428
  "nbformat": 4,
notebooks/bpic_generability_pdm.ipynb CHANGED
@@ -1223,7 +1223,7 @@
1223
  "from scipy.stats import pearsonr\n",
1224
  "import sys\n",
1225
  "import os\n",
1226
- "sys.path.append(os.path.dirname(\"../tag/utils/io_helpers.py\"))\n",
1227
  "from io_helpers import get_keys_abbreviation\n",
1228
  "\n",
1229
  "\n",
 
1223
  "from scipy.stats import pearsonr\n",
1224
  "import sys\n",
1225
  "import os\n",
1226
+ "sys.path.append(os.path.dirname(\"../gedi/utils/io_helpers.py\"))\n",
1227
  "from io_helpers import get_keys_abbreviation\n",
1228
  "\n",
1229
  "\n",
notebooks/experiment_generator.ipynb CHANGED
@@ -2225,7 +2225,7 @@
2225
  ],
2226
  "source": [
2227
  "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n",
2228
- "#bpic_features = pd.read_csv(\"../tag/output/features/real_event_logs.csv\", index_col=None)\n",
2229
  "\n",
2230
  "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n",
2231
  "print(bpic_features.shape)\n",
@@ -3102,7 +3102,7 @@
3102
  "name": "python",
3103
  "nbconvert_exporter": "python",
3104
  "pygments_lexer": "ipython3",
3105
- "version": "3.9.7"
3106
  }
3107
  },
3108
  "nbformat": 4,
 
2225
  ],
2226
  "source": [
2227
  "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n",
2228
+ "#bpic_features = pd.read_csv(\"../gedi/output/features/real_event_logs.csv\", index_col=None)\n",
2229
  "\n",
2230
  "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n",
2231
  "print(bpic_features.shape)\n",
 
3102
  "name": "python",
3103
  "nbconvert_exporter": "python",
3104
  "pygments_lexer": "ipython3",
3105
+ "version": "3.9.19"
3106
  }
3107
  },
3108
  "nbformat": 4,
notebooks/feature_distributions.ipynb CHANGED
@@ -1847,7 +1847,7 @@
1847
  "name": "python",
1848
  "nbconvert_exporter": "python",
1849
  "pygments_lexer": "ipython3",
1850
- "version": "3.9.12"
1851
  }
1852
  },
1853
  "nbformat": 4,
 
1847
  "name": "python",
1848
  "nbconvert_exporter": "python",
1849
  "pygments_lexer": "ipython3",
1850
+ "version": "3.9.19"
1851
  }
1852
  },
1853
  "nbformat": 4,