Andrea Maldonado commited on
Commit
17e1124
Β·
2 Parent(s): 83f4f0c 973f5db

Merge branch '5-automation-test-gedi-automatically' into bpm24

Browse files

* 5-automation-test-gedi-automatically: (56 commits)
Adds plotter test data
Adds provisory evaluation plotter
Renames test data dir-
Updates gitignore
Moves to data to test dir
Setup generation test with file
Adds multiple experiments to gen
Specifies Python version
Fixes integration ConfigSpace installation for ubuntu
Corrects generation output path
Removes unnecessary conda
Updates github action versions
Adds conda install for ConfigSpace
Removes .checkpoints
Gitignore
Updates github action versions
Fixes test typo
Adds integration test
specifies numpy
Fixes yml
...

This view is limited to 50 files because it contains too many changes. Β  See raw diff
Files changed (50) hide show
  1. .github/workflows/test_gedi.yml +162 -0
  2. .gitignore +5 -1
  3. README.md +2 -3
  4. config.py +3 -3
  5. config_files/algorithm/benchmark.json +1 -2
  6. config_files/algorithm/evaluation_plotter.json +7 -5
  7. config_files/algorithm/experiment_test.json +7 -7
  8. config_files/algorithm/feature_extraction.json +1 -1
  9. config_files/algorithm/generation.json +2 -5
  10. data/2_grid_test.csv +3 -3
  11. data/{test_2 β†’ test}/gen_el_168.xes +0 -0
  12. data/{test_2 β†’ test}/gen_el_169.xes +0 -0
  13. data/test/grid_feat.csv +3 -0
  14. data/test/plotter/1_enve_feat.csv +12 -0
  15. data/test/plotter/grid_1objectives_enve.csv +12 -0
  16. execute_grid_experiments.py +1 -1
  17. gedi/__init__.py +8 -0
  18. {tag β†’ gedi}/analyser.py +3 -3
  19. {tag β†’ gedi}/augmentation.py +1 -1
  20. {tag β†’ gedi}/benchmark.py +3 -3
  21. {tag β†’ gedi}/features.py +1 -1
  22. {tag β†’ gedi}/generator.py +2 -2
  23. {tag β†’ gedi}/plotter.py +6 -5
  24. {tag β†’ gedi}/utils/algorithms/__init__.py +0 -0
  25. {tag β†’ gedi}/utils/algorithms/tsne.py +0 -0
  26. {tag β†’ gedi}/utils/array_tools.py +0 -0
  27. {tag β†’ gedi}/utils/io_helpers.py +0 -0
  28. {tag β†’ gedi}/utils/matrix_tools.py +0 -0
  29. main.py +9 -9
  30. notebooks/.ipynb_checkpoints/augmentation-checkpoint.ipynb +0 -0
  31. notebooks/.ipynb_checkpoints/benchmarking_process_discovery-checkpoint.ipynb +0 -0
  32. notebooks/.ipynb_checkpoints/bpic_generability_pdm-checkpoint.ipynb +0 -0
  33. notebooks/.ipynb_checkpoints/data_exploration-checkpoint.ipynb +0 -0
  34. notebooks/.ipynb_checkpoints/experiment_generator-checkpoint.ipynb +0 -0
  35. notebooks/.ipynb_checkpoints/feature_distributions-checkpoint.ipynb +0 -0
  36. notebooks/.ipynb_checkpoints/feature_exploration-checkpoint.ipynb +0 -0
  37. notebooks/.ipynb_checkpoints/feature_performance_similarity-checkpoint.ipynb +0 -0
  38. notebooks/.ipynb_checkpoints/feature_selection-checkpoint.ipynb +0 -0
  39. notebooks/.ipynb_checkpoints/feature_variance-checkpoint.ipynb +0 -0
  40. notebooks/.ipynb_checkpoints/gedi_representativeness-checkpoint.ipynb +0 -0
  41. notebooks/.ipynb_checkpoints/grid_objectives-checkpoint.ipynb +0 -376
  42. notebooks/.ipynb_checkpoints/oversampling-checkpoint.ipynb +0 -6
  43. notebooks/.ipynb_checkpoints/performance_feature_correlation-checkpoint.ipynb +0 -6
  44. notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb +0 -0
  45. notebooks/.ipynb_checkpoints/statistics_tasks_to_datasets-checkpoint.ipynb +0 -818
  46. notebooks/.ipynb_checkpoints/test_feed-checkpoint.ipynb +0 -0
  47. notebooks/benchmarking_process_discovery.ipynb +2 -2
  48. notebooks/bpic_generability_pdm.ipynb +1 -1
  49. notebooks/experiment_generator.ipynb +2 -2
  50. notebooks/feature_distributions.ipynb +1 -1
.github/workflows/test_gedi.yml ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: GEDI Test
2
+
3
+ # Specifies when the action should run
4
+ on:
5
+ pull_request:
6
+ branches:
7
+ - main
8
+
9
+ # Specifies the jobs that are to be run
10
+ jobs:
11
+ test_feature-extraction:
12
+ runs-on: ubuntu-latest
13
+
14
+ # Setting up a python envronment for the test script to run
15
+ steps:
16
+ - name: Checkout code
17
+ uses: actions/checkout@v4
18
+
19
+ - name: Set up Python
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: 3.9
23
+
24
+ - name: Install feeed
25
+ run: |
26
+ python -m pip install --upgrade pip
27
+ pip install .
28
+
29
+ - name: Run test
30
+ run:
31
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/feature_extraction.json
32
+
33
+ - name: Compare output
34
+ run: diff data/test_feat.csv data/test_feat.csv
35
+
36
+ test_generation:
37
+ runs-on: ubuntu-latest
38
+
39
+ # Setting up a python envronment for the test script to run
40
+ steps:
41
+ - name: Checkout code
42
+ uses: actions/checkout@v4
43
+
44
+ - name: Set up Python
45
+ uses: actions/setup-python@v5
46
+ with:
47
+ python-version: 3.9
48
+
49
+ - name: Install dependencies
50
+ run: |
51
+ sudo apt-get install build-essential python3 python3-dev
52
+
53
+ - name: Install feeed
54
+ run: |
55
+ python -m pip install --upgrade pip
56
+ pip install .
57
+
58
+ - name: Run test
59
+ run:
60
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/generation.json
61
+
62
+ - name: Compare output
63
+ run: diff output/features/grid_feat/2_enself_rt20v/genELexperiment2_07_04.json output/features/grid_feat/2_enself_rt20v/genELexperiment2_07_04.json
64
+
65
+ test_benchmark:
66
+ runs-on: ubuntu-latest
67
+
68
+ # Setting up a python envronment for the test script to run
69
+ steps:
70
+ - name: Checkout code
71
+ uses: actions/checkout@v4
72
+
73
+ - name: Set up Python
74
+ uses: actions/setup-python@v5
75
+ with:
76
+ python-version: 3.9
77
+
78
+ - name: Install feeed
79
+ run: |
80
+ python -m pip install --upgrade pip
81
+ pip install .
82
+
83
+ - name: Run test
84
+ run:
85
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/benchmark.json
86
+
87
+ - name: Compare output
88
+ run: diff output/benchmark/test_benchmark.csv output/benchmark/test_benchmark.csv
89
+
90
+ test_augmentation:
91
+ runs-on: ubuntu-latest
92
+
93
+ # Setting up a python envronment for the test script to run
94
+ steps:
95
+ - name: Checkout code
96
+ uses: actions/checkout@v4
97
+
98
+ - name: Set up Python
99
+ uses: actions/setup-python@v5
100
+ with:
101
+ python-version: 3.9
102
+
103
+ - name: Install feeed
104
+ run: |
105
+ python -m pip install --upgrade pip
106
+ pip install .
107
+
108
+ - name: Run test
109
+ run:
110
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/augmentation.json
111
+
112
+ test_evaluation-plotter:
113
+ runs-on: ubuntu-latest
114
+
115
+ # Setting up a python envronment for the test script to run
116
+ steps:
117
+ - name: Checkout code
118
+ uses: actions/checkout@v4
119
+
120
+ - name: Set up Python
121
+ uses: actions/setup-python@v5
122
+ with:
123
+ python-version: 3.9
124
+
125
+ - name: Install dependencies
126
+ run: |
127
+ sudo apt-get install build-essential python3 python3-dev
128
+
129
+ - name: Install feeed
130
+ run: |
131
+ python -m pip install --upgrade pip
132
+ pip install .
133
+
134
+ - name: Run test
135
+ run:
136
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/evaluation_plotter.json
137
+
138
+ test_integration:
139
+ runs-on: ubuntu-latest
140
+
141
+ # Setting up a python envronment for the test script to run
142
+ steps:
143
+ - name: Checkout code
144
+ uses: actions/checkout@v4
145
+
146
+ - name: Set up Python
147
+ uses: actions/setup-python@v5
148
+ with:
149
+ python-version: 3.9
150
+
151
+ - name: Install dependencies
152
+ run: |
153
+ sudo apt-get install build-essential python3 python3-dev
154
+
155
+ - name: Install feeed
156
+ run: |
157
+ python -m pip install --upgrade pip
158
+ pip install .
159
+
160
+ - name: Run test
161
+ run:
162
+ python main.py -o config_files/options/baseline.json -a config_files/algorithm/experiment_test.json
.gitignore CHANGED
@@ -1,3 +1,7 @@
1
  smac3_output/
2
  data/
3
- output/
 
 
 
 
 
1
  smac3_output/
2
  data/
3
+ output/
4
+ .ipynb_checkpoints/
5
+ notebooks/.ipynb_checkpoints/*
6
+ gedi.egg-info/
7
+ build/
README.md CHANGED
@@ -16,12 +16,11 @@ For MacOS:
16
  brew install graphviz
17
  brew install swig
18
  ```
19
-
20
- ## Installation
21
  - For smac:
22
  ```console
23
  conda install pyrfr swig
24
  ```
 
25
  - `conda env create -f .conda.yml`
26
  - Install [Feature Extractor for Event Data (feeed)](https://github.com/lmu-dbs/feeed) in the newly installed conda environment: `pip install feeed`
27
 
@@ -33,7 +32,7 @@ python main.py -o config_files/options/baseline.json -a config_files/algorithm/e
33
  ## Usage
34
  Our pipeline offers several pipeline steps, which can be run sequentially or partially:
35
  - feature_extraction
36
- - event_logs_generation
37
  - benchmark
38
  - evaluation_plotter
39
 
 
16
  brew install graphviz
17
  brew install swig
18
  ```
 
 
19
  - For smac:
20
  ```console
21
  conda install pyrfr swig
22
  ```
23
+ ## Installation
24
  - `conda env create -f .conda.yml`
25
  - Install [Feature Extractor for Event Data (feeed)](https://github.com/lmu-dbs/feeed) in the newly installed conda environment: `pip install feeed`
26
 
 
32
  ## Usage
33
  Our pipeline offers several pipeline steps, which can be run sequentially or partially:
34
  - feature_extraction
35
+ - generation
36
  - benchmark
37
  - evaluation_plotter
38
 
config.py CHANGED
@@ -2,7 +2,7 @@ import json
2
  import os
3
  import warnings
4
 
5
- from tag.utils.io_helpers import sort_files
6
  from tqdm import tqdm
7
  from utils.param_keys import INPUT_NAME, FILENAME, FOLDER_PATH, PARAMS
8
 
@@ -63,8 +63,8 @@ def get_files_and_kwargs(params: dict):
63
 
64
  #TODO: generate parent directories if they don't exist
65
  if input_name == 'test':
66
- filename_list = list(tqdm(sort_files(os.listdir('data/test_2'))))
67
- kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/test_2'}
68
  elif input_name == 'realLogs':
69
  filename_list = list(tqdm(sort_files(os.listdir('data/real_event_logs'))))
70
  kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/real_event_logs'}
 
2
  import os
3
  import warnings
4
 
5
+ from gedi.utils.io_helpers import sort_files
6
  from tqdm import tqdm
7
  from utils.param_keys import INPUT_NAME, FILENAME, FOLDER_PATH, PARAMS
8
 
 
63
 
64
  #TODO: generate parent directories if they don't exist
65
  if input_name == 'test':
66
+ filename_list = list(tqdm(sort_files(os.listdir('data/test'))))
67
+ kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/test'}
68
  elif input_name == 'realLogs':
69
  filename_list = list(tqdm(sort_files(os.listdir('data/real_event_logs'))))
70
  kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/real_event_logs'}
config_files/algorithm/benchmark.json CHANGED
@@ -2,8 +2,7 @@
2
  {
3
  "pipeline_step": "benchmark_test",
4
  "benchmark_test": "discovery",
5
- "input_path":"data/test_2",
6
- "input_path":"data/test_2/gen_el_168.xes",
7
  "output_path":"output",
8
  "miners" : ["inductive", "heuristics", "imf", "ilp"]
9
  }
 
2
  {
3
  "pipeline_step": "benchmark_test",
4
  "benchmark_test": "discovery",
5
+ "input_path":"data/test",
 
6
  "output_path":"output",
7
  "miners" : ["inductive", "heuristics", "imf", "ilp"]
8
  }
config_files/algorithm/evaluation_plotter.json CHANGED
@@ -2,16 +2,18 @@
2
  {
3
  "pipeline_step": "evaluation_plotter",
4
  "input_path": "output/features/generated/34_bpic_features/",
5
- "input_path": "output/features/generated/grid_1obj/1_enve_feat.csv",
6
  "input_path": "output/features/generated/grid_2obj/",
7
  "input_path": ["output/features/generated/grid_1obj/", "output/features/generated/grid_2obj/"],
8
- "output_path": "output/plots",
 
9
  "reference_feature": "epa_normalized_sequence_entropy",
10
- "reference_feature": "epa_normalized_variant_entropy",
11
  "reference_feature": "epa_normalized_sequence_entropy_exponential_forgetting",
 
12
  "targets": "data/34_bpic_features.csv",
13
- "targets": "data/grid_experiments/grid_1obj/grid_1objectives_enve.csv",
14
  "targets": "data/grid_experiments/grid_2obj/",
15
- "targets": ["data/grid_experiments/grid_1obj/", "data/grid_experiments/grid_2obj/"]
 
 
 
16
  }
17
  ]
 
2
  {
3
  "pipeline_step": "evaluation_plotter",
4
  "input_path": "output/features/generated/34_bpic_features/",
 
5
  "input_path": "output/features/generated/grid_2obj/",
6
  "input_path": ["output/features/generated/grid_1obj/", "output/features/generated/grid_2obj/"],
7
+ "input_path": "output/features/generated/grid_1obj/1_enve_feat.csv",
8
+ "input_path": "data/test/plotter/1_enve_feat.csv",
9
  "reference_feature": "epa_normalized_sequence_entropy",
 
10
  "reference_feature": "epa_normalized_sequence_entropy_exponential_forgetting",
11
+ "reference_feature": "epa_normalized_variant_entropy",
12
  "targets": "data/34_bpic_features.csv",
 
13
  "targets": "data/grid_experiments/grid_2obj/",
14
+ "targets": ["data/grid_experiments/grid_1obj/", "data/grid_experiments/grid_2obj/"],
15
+ "targets": "data/grid_experiments/grid_1obj/grid_1objectives_enve.csv",
16
+ "targets": "data/test/plotter/grid_1objectives_enve.csv",
17
+ "output_path": "output/plots"
18
  }
19
  ]
config_files/algorithm/experiment_test.json CHANGED
@@ -9,16 +9,16 @@
9
  {
10
  "pipeline_step": "event_logs_generation",
11
  "output_path": "output/features/2_bpic_features/2_ense_rmcv_feat.csv",
12
- "output_path": "data/test_2",
13
  "generator_params": {
14
  "experiment": "data/grid_objectives.csv",
15
  "experiment": {"input_path": "data/2_bpic_features.csv",
16
  "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
17
  "experiment": [
18
- {"epa_normalized_sequence_entropy_linear_forgetting": 0.05, "ratio_top_20_variants": 0.4},
19
- {"epa_normalized_sequence_entropy_linear_forgetting": 0.5, "ratio_top_20_variants": 0.04}
20
  ],
21
- "experiment": {"epa_normalized_sequence_entropy_linear_forgetting": 0.05, "ratio_top_20_variants": 0.4},
22
  "config_space": {
23
  "mode": [5, 20],
24
  "sequence": [0.01, 1],
@@ -27,7 +27,7 @@
27
  "loop": [0.01, 1],
28
  "silent": [0.01, 1],
29
  "lt_dependency": [0.01, 1],
30
- "num_traces": [100, 10001],
31
  "duplicate": [0],
32
  "or": [0]
33
  },
@@ -36,7 +36,7 @@
36
  },
37
  {
38
  "pipeline_step": "feature_extraction",
39
- "input_path": "data/test_2",
40
  "feature_params": {"feature_set":["trace_length"]},
41
  "output_path": "output/plots",
42
  "real_eventlog_path": "data/bpic_features.csv",
@@ -45,7 +45,7 @@
45
  {
46
  "pipeline_step": "benchmark_test",
47
  "benchmark_test": "discovery",
48
- "input_path":"data/test_2",
49
  "output_path":"output",
50
  "miners" : ["inductive", "heuristics", "imf", "ilp"]
51
  }
 
9
  {
10
  "pipeline_step": "event_logs_generation",
11
  "output_path": "output/features/2_bpic_features/2_ense_rmcv_feat.csv",
12
+ "output_path": "data/test",
13
  "generator_params": {
14
  "experiment": "data/grid_objectives.csv",
15
  "experiment": {"input_path": "data/2_bpic_features.csv",
16
  "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
17
  "experiment": [
18
+ {"epa_normalized_sequence_entropy_linear_forgetting": 0.2, "ratio_top_20_variants": 0.4},
19
+ {"epa_normalized_sequence_entropy_linear_forgetting": 0.4, "ratio_top_20_variants": 0.7}
20
  ],
21
+ "experiment": {"epa_normalized_sequence_entropy_linear_forgetting": 0.2, "ratio_top_20_variants": 0.4},
22
  "config_space": {
23
  "mode": [5, 20],
24
  "sequence": [0.01, 1],
 
27
  "loop": [0.01, 1],
28
  "silent": [0.01, 1],
29
  "lt_dependency": [0.01, 1],
30
+ "num_traces": [10, 100],
31
  "duplicate": [0],
32
  "or": [0]
33
  },
 
36
  },
37
  {
38
  "pipeline_step": "feature_extraction",
39
+ "input_path": "data/test",
40
  "feature_params": {"feature_set":["trace_length"]},
41
  "output_path": "output/plots",
42
  "real_eventlog_path": "data/bpic_features.csv",
 
45
  {
46
  "pipeline_step": "benchmark_test",
47
  "benchmark_test": "discovery",
48
+ "input_path":"data/test",
49
  "output_path":"output",
50
  "miners" : ["inductive", "heuristics", "imf", "ilp"]
51
  }
config_files/algorithm/feature_extraction.json CHANGED
@@ -1,7 +1,7 @@
1
  [
2
  {
3
  "pipeline_step": "feature_extraction",
4
- "input_path": "data/test_2",
5
  "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
6
  "output_path": "output/plots",
7
  "real_eventlog_path": "data/bpic_features.csv",
 
1
  [
2
  {
3
  "pipeline_step": "feature_extraction",
4
+ "input_path": "data/test",
5
  "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
6
  "output_path": "output/plots",
7
  "real_eventlog_path": "data/bpic_features.csv",
config_files/algorithm/generation.json CHANGED
@@ -3,11 +3,8 @@
3
  "pipeline_step": "event_logs_generation",
4
  "output_path": "output",
5
  "generator_params": {
6
- "experiment": {
7
- "input_path": "data/grid_objectives.csv",
8
- "objectives": ["epa_normalized_variant_entropy"],
9
- "objectives": ["ratio_most_common_variant", "epa_normalized_sequence_entropy"],
10
- "objectives": ["ratio_top_20_variants","epa_normalized_sequence_entropy_linear_forgetting"]
11
  },
12
  "config_space": {
13
  "mode": [5, 20],
 
3
  "pipeline_step": "event_logs_generation",
4
  "output_path": "output",
5
  "generator_params": {
6
+ "experiment": {"input_path": "data/test/grid_feat.csv",
7
+ "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]
 
 
 
8
  },
9
  "config_space": {
10
  "mode": [5, 20],
data/2_grid_test.csv CHANGED
@@ -1,3 +1,3 @@
1
- task,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting
2
- task_1,0.0,0.0
3
- task_2,0.0,0.1
 
1
+ log,ratio_top_20_variants,epa_normalized_sequence_entropy_linear_forgetting
2
+ experiment1,0.2,0.4
3
+ experiment2,0.4,0.7
data/{test_2 β†’ test}/gen_el_168.xes RENAMED
File without changes
data/{test_2 β†’ test}/gen_el_169.xes RENAMED
File without changes
data/test/grid_feat.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ log,ratio_top_20_variants,epa_normalized_sequence_entropy_linear_forgetting
2
+ experiment1,0.2,0.4
3
+ experiment2,0.4,0.7
data/test/plotter/1_enve_feat.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epa_normalized_variant_entropy,log
2
+ 0.41202322946059605,task_5
3
+ 0.79999386158591,task_9
4
+ 0.8925919422394111,task_10
5
+ 0.493812449168448,task_6
6
+ 0.20299577565110202,task_3
7
+ 0.337263992015401,task_4
8
+ 0.0,task_1
9
+ 0.102184538023266,task_2
10
+ 0.600006599245775,task_7
11
+ 0.6999779396851361,task_8
12
+ 0.8796185572534461,task_11
data/test/plotter/grid_1objectives_enve.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ task,epa_normalized_variant_entropy
2
+ task_1,0.0
3
+ task_2,0.1
4
+ task_3,0.2
5
+ task_4,0.3
6
+ task_5,0.4
7
+ task_6,0.5
8
+ task_7,0.6
9
+ task_8,0.7
10
+ task_9,0.8
11
+ task_10,0.9
12
+ task_11,1.0
execute_grid_experiments.py CHANGED
@@ -2,7 +2,7 @@ import multiprocessing
2
  import os
3
 
4
  from datetime import datetime as dt
5
- from tag.utils.io_helpers import sort_files
6
  from tqdm import tqdm
7
 
8
  #TODO: Pass i properly
 
2
  import os
3
 
4
  from datetime import datetime as dt
5
+ from gedi.utils.io_helpers import sort_files
6
  from tqdm import tqdm
7
 
8
  #TODO: Pass i properly
gedi/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from .generator import GenerateEventLogs
2
+ from .features import EventLogFeatures
3
+ from .analyser import FeatureAnalyser
4
+ from .augmentation import InstanceAugmentator
5
+ from .benchmark import BenchmarkTest
6
+ from .plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
7
+
8
+ __all__=[ 'GenerateEventLogs', 'EventLogFeatures', 'FeatureAnalyser', 'InstanceAugmentator', 'BenchmarkTest', 'BenchmarkPlotter', 'FeaturesPlotter', 'AugmentationPlotter', 'GenerationPlotter']
{tag β†’ gedi}/analyser.py RENAMED
@@ -4,9 +4,9 @@ import warnings
4
  from sklearn.decomposition import FastICA, PCA
5
  from sklearn.manifold import TSNE
6
  from sklearn.preprocessing import Normalizer, StandardScaler
7
- from tag.features import EventLogFeatures
8
- from tag.plotter import ModelResultPlotter
9
- from tag.utils.matrix_tools import insert_missing_data
10
  # TODO: Call param_keys explicitly e.g. import INPUT_PATH
11
  from utils.param_keys import *
12
  from utils.param_keys.analyser import MODEL, INPUT_PARAMS, PERPLEXITY
 
4
  from sklearn.decomposition import FastICA, PCA
5
  from sklearn.manifold import TSNE
6
  from sklearn.preprocessing import Normalizer, StandardScaler
7
+ from gedi.features import EventLogFeatures
8
+ from gedi.plotter import ModelResultPlotter
9
+ from gedi.utils.matrix_tools import insert_missing_data
10
  # TODO: Call param_keys explicitly e.g. import INPUT_PATH
11
  from utils.param_keys import *
12
  from utils.param_keys.analyser import MODEL, INPUT_PARAMS, PERPLEXITY
{tag β†’ gedi}/augmentation.py RENAMED
@@ -3,7 +3,7 @@ from collections import Counter
3
  from datetime import datetime as dt
4
  from imblearn.over_sampling import SMOTE, SVMSMOTE, BorderlineSMOTE, KMeansSMOTE
5
  from sklearn.preprocessing import Normalizer
6
- from tag.utils.matrix_tools import insert_missing_data
7
  from utils.param_keys import INPUT_PATH, OUTPUT_PATH
8
  from utils.param_keys.augmentation import AUGMENTATION_PARAMS, NO_SAMPLES, FEATURE_SELECTION, METHOD
9
 
 
3
  from datetime import datetime as dt
4
  from imblearn.over_sampling import SMOTE, SVMSMOTE, BorderlineSMOTE, KMeansSMOTE
5
  from sklearn.preprocessing import Normalizer
6
+ from gedi.utils.matrix_tools import insert_missing_data
7
  from utils.param_keys import INPUT_PATH, OUTPUT_PATH
8
  from utils.param_keys.augmentation import AUGMENTATION_PARAMS, NO_SAMPLES, FEATURE_SELECTION, METHOD
9
 
{tag β†’ gedi}/benchmark.py RENAMED
@@ -16,7 +16,7 @@ from pm4py.algo.evaluation.generalization import algorithm as generalization_eva
16
  from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
17
  from pm4py.objects.bpmn.obj import BPMN
18
  from pm4py.objects.log.importer.xes import importer as xes_importer
19
- from tag.utils.io_helpers import dump_features_json
20
  from tqdm import tqdm
21
  from utils.param_keys import INPUT_PATH, OUTPUT_PATH
22
  from utils.param_keys.benchmark import MINERS
@@ -113,14 +113,14 @@ class BenchmarkTest:
113
  return
114
 
115
  def split_miner_wrapper(self, log_path="data/real_event_logs/BPI_Challenges/BPI_Challenge_2012.xes"):
116
- jar_path = os.path.join("tag","libs","split-miner-1.7.1-all.jar")
117
  filename = os.path.split(log_path)[-1].rsplit(".",1)[0]
118
  bpmn_path = os.path.join("output", "bpmns_split", filename)
119
  os.makedirs(os.path.split(bpmn_path)[0], exist_ok=True)
120
  command = [
121
  "java",
122
  "-cp",
123
- f"{os.getcwd()}/tag/libs/sm2.jar:{os.getcwd()}/tag/libs/lib/*",
124
  "au.edu.unimelb.services.ServiceProvider",
125
  "SM2",
126
  f"{os.getcwd()}/{log_path}",
 
16
  from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
17
  from pm4py.objects.bpmn.obj import BPMN
18
  from pm4py.objects.log.importer.xes import importer as xes_importer
19
+ from gedi.utils.io_helpers import dump_features_json
20
  from tqdm import tqdm
21
  from utils.param_keys import INPUT_PATH, OUTPUT_PATH
22
  from utils.param_keys.benchmark import MINERS
 
113
  return
114
 
115
  def split_miner_wrapper(self, log_path="data/real_event_logs/BPI_Challenges/BPI_Challenge_2012.xes"):
116
+ jar_path = os.path.join("gedi","libs","split-miner-1.7.1-all.jar")
117
  filename = os.path.split(log_path)[-1].rsplit(".",1)[0]
118
  bpmn_path = os.path.join("output", "bpmns_split", filename)
119
  os.makedirs(os.path.split(bpmn_path)[0], exist_ok=True)
120
  command = [
121
  "java",
122
  "-cp",
123
+ f"{os.getcwd()}/gedi/libs/sm2.jar:{os.getcwd()}/tag/libs/lib/*",
124
  "au.edu.unimelb.services.ServiceProvider",
125
  "SM2",
126
  f"{os.getcwd()}/{log_path}",
{tag β†’ gedi}/features.py RENAMED
@@ -11,7 +11,7 @@ from pathlib import Path, PurePath
11
  from sklearn.impute import SimpleImputer
12
  from utils.param_keys import INPUT_PATH
13
  from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
14
- from tag.utils.io_helpers import dump_features_json
15
 
16
  def get_sortby_parameter(elem):
17
  number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
 
11
  from sklearn.impute import SimpleImputer
12
  from utils.param_keys import INPUT_PATH
13
  from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
14
+ from gedi.utils.io_helpers import dump_features_json
15
 
16
  def get_sortby_parameter(elem):
17
  number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
{tag β†’ gedi}/generator.py RENAMED
@@ -20,7 +20,7 @@ from pm4py.sim import play_out
20
  from smac import HyperparameterOptimizationFacade, Scenario
21
  from utils.param_keys import OUTPUT_PATH, INPUT_PATH
22
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
23
- from tag.utils.io_helpers import get_output_key_value_location, dump_features_json, read_csvs
24
 
25
 
26
 
@@ -73,7 +73,7 @@ def get_tasks(experiment, output_path="", reference_feature=None):
73
  return tasks, output_path
74
 
75
  class GenerateEventLogs():
76
- # TODO: Clarify nomenclature: experiment, task, objective as in notebook (https://github.com/lmu-dbs/tag/blob/main/notebooks/grid_objectives.ipynb)
77
  def __init__(self, params):
78
  print("=========================== Generator ==========================")
79
  print(f"INFO: Running with {params}")
 
20
  from smac import HyperparameterOptimizationFacade, Scenario
21
  from utils.param_keys import OUTPUT_PATH, INPUT_PATH
22
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
23
+ from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, read_csvs
24
 
25
 
26
 
 
73
  return tasks, output_path
74
 
75
  class GenerateEventLogs():
76
+ # TODO: Clarify nomenclature: experiment, task, objective as in notebook (https://github.com/lmu-dbs/gedi/blob/main/notebooks/grid_objectives.ipynb)
77
  def __init__(self, params):
78
  print("=========================== Generator ==========================")
79
  print(f"INFO: Running with {params}")
{tag β†’ gedi}/plotter.py RENAMED
@@ -20,9 +20,9 @@ from collections import defaultdict
20
  from sklearn.preprocessing import Normalizer, StandardScaler
21
  from sklearn.decomposition import PCA
22
  from sklearn.metrics.pairwise import euclidean_distances
23
- from tag.generator import get_tasks
24
- from tag.utils.io_helpers import get_keys_abbreviation
25
- from tag.utils.io_helpers import read_csvs, select_instance
26
 
27
  def insert_newlines(string, every=140):
28
  return '\n'.join(string[i:i+every] for i in range(0, len(string), every))
@@ -331,6 +331,7 @@ class FeaturesPlotter:
331
  fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type})")
332
 
333
  if output_path != None:
 
334
  fig.savefig(output_path)
335
  print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
336
 
@@ -617,7 +618,7 @@ class AugmentationPlotter(object):
617
  if output_path != None:
618
  output_path += f"/augmentation_pca_{n_features}_{self.sampler}.jpg"
619
  fig.savefig(output_path)
620
- print("SUCCESS: Saved augmentation pca plot at {output_path}")
621
 
622
 
623
  class GenerationPlotter(object):
@@ -672,7 +673,7 @@ class GenerationPlotter(object):
672
  targets = orig_targets.copy()
673
  elif isinstance(orig_targets, defaultdict):
674
  if k not in orig_targets:
675
- print("[WARNING] {k} not in targets. Only in generated features. Will continue with next feature to compare with")
676
  continue
677
  targets = orig_targets[k].copy()
678
  else:
 
20
  from sklearn.preprocessing import Normalizer, StandardScaler
21
  from sklearn.decomposition import PCA
22
  from sklearn.metrics.pairwise import euclidean_distances
23
+ from gedi.generator import get_tasks
24
+ from gedi.utils.io_helpers import get_keys_abbreviation
25
+ from gedi.utils.io_helpers import read_csvs, select_instance
26
 
27
  def insert_newlines(string, every=140):
28
  return '\n'.join(string[i:i+every] for i in range(0, len(string), every))
 
331
  fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type})")
332
 
333
  if output_path != None:
334
+ os.makedirs(os.path.split(output_path)[0], exist_ok=True)
335
  fig.savefig(output_path)
336
  print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
337
 
 
618
  if output_path != None:
619
  output_path += f"/augmentation_pca_{n_features}_{self.sampler}.jpg"
620
  fig.savefig(output_path)
621
+ print(f"SUCCESS: Saved augmentation pca plot at {output_path}")
622
 
623
 
624
  class GenerationPlotter(object):
 
673
  targets = orig_targets.copy()
674
  elif isinstance(orig_targets, defaultdict):
675
  if k not in orig_targets:
676
+ print(f"[WARNING] {k} not in targets. Only in generated features. Will continue with next feature to compare with")
677
  continue
678
  targets = orig_targets[k].copy()
679
  else:
{tag β†’ gedi}/utils/algorithms/__init__.py RENAMED
File without changes
{tag β†’ gedi}/utils/algorithms/tsne.py RENAMED
File without changes
{tag β†’ gedi}/utils/array_tools.py RENAMED
File without changes
{tag β†’ gedi}/utils/io_helpers.py RENAMED
File without changes
{tag β†’ gedi}/utils/matrix_tools.py RENAMED
File without changes
main.py CHANGED
@@ -1,12 +1,12 @@
1
  import config
2
  import pandas as pd
3
  from datetime import datetime as dt
4
- from tag.generator import GenerateEventLogs
5
- from tag.features import EventLogFeatures
6
- from tag.analyser import FeatureAnalyser
7
- from tag.augmentation import InstanceAugmentator
8
- from tag.benchmark import BenchmarkTest
9
- from tag.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
10
  from utils.default_argparse import ArgParser
11
  from utils.param_keys import *
12
  from utils.param_keys.run_options import *
@@ -57,8 +57,8 @@ def run(kwargs:dict, model_paramas_list: list, filename_list:list):
57
 
58
 
59
  if __name__=='__main__':
60
- start_tag = dt.now()
61
- print(f'INFO: TAG starting {start_tag}')
62
 
63
  args = ArgParser().parse('GEDI main')
64
  run_params = config.get_run_params(args.run_params_json)
@@ -70,4 +70,4 @@ if __name__=='__main__':
70
  else:
71
  load(args.result_load_files, kwargs)
72
 
73
- print(f'SUCCESS: TAG took {dt.now()-start_tag} sec.')
 
1
  import config
2
  import pandas as pd
3
  from datetime import datetime as dt
4
+ from gedi.generator import GenerateEventLogs
5
+ from gedi.features import EventLogFeatures
6
+ from gedi.analyser import FeatureAnalyser
7
+ from gedi.augmentation import InstanceAugmentator
8
+ from gedi.benchmark import BenchmarkTest
9
+ from gedi.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
10
  from utils.default_argparse import ArgParser
11
  from utils.param_keys import *
12
  from utils.param_keys.run_options import *
 
57
 
58
 
59
  if __name__=='__main__':
60
+ start_gedi = dt.now()
61
+ print(f'INFO: GEDI starting {start_gedi}')
62
 
63
  args = ArgParser().parse('GEDI main')
64
  run_params = config.get_run_params(args.run_params_json)
 
70
  else:
71
  load(args.result_load_files, kwargs)
72
 
73
+ print(f'SUCCESS: GEDI took {dt.now()-start_gedi} sec.')
notebooks/.ipynb_checkpoints/augmentation-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/benchmarking_process_discovery-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/bpic_generability_pdm-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/data_exploration-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/experiment_generator-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/feature_distributions-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/feature_exploration-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/feature_performance_similarity-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/feature_selection-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/feature_variance-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/gedi_representativeness-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/grid_objectives-checkpoint.ipynb DELETED
@@ -1,376 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 9,
6
- "id": "e5aa7223",
7
- "metadata": {},
8
- "outputs": [],
9
- "source": [
10
- "import pandas as pd\n",
11
- "import numpy as np"
12
- ]
13
- },
14
- {
15
- "cell_type": "code",
16
- "execution_count": 10,
17
- "id": "dfd1a302",
18
- "metadata": {},
19
- "outputs": [],
20
- "source": [
21
- "df = pd.DataFrame(columns=[\"log\",\"ratio_top_20_variants\", \"normalized_sequence_entropy_linear_forgetting\"]) "
22
- ]
23
- },
24
- {
25
- "cell_type": "code",
26
- "execution_count": 28,
27
- "id": "218946b7",
28
- "metadata": {},
29
- "outputs": [],
30
- "source": [
31
- "k=0\n",
32
- "for i in np.arange(0.2, 1.1,0.2):\n",
33
- " for j in np.arange(0,0.55,0.1):\n",
34
- " k+=1\n",
35
- " new_entry = pd.Series({'log':f\"objective_{k}\", \"ratio_top_20_variants\":round(i,1),\n",
36
- " \"normalized_sequence_entropy_linear_forgetting\":round(j,1)})\n",
37
- " df = pd.concat([\n",
38
- " df, \n",
39
- " pd.DataFrame([new_entry], columns=new_entry.index)]\n",
40
- " ).reset_index(drop=True)\n",
41
- " "
42
- ]
43
- },
44
- {
45
- "cell_type": "code",
46
- "execution_count": 31,
47
- "id": "b1e3bb5a",
48
- "metadata": {},
49
- "outputs": [],
50
- "source": [
51
- "df.to_csv(\"../data/grid_objectives.csv\" ,index=False)"
52
- ]
53
- },
54
- {
55
- "cell_type": "code",
56
- "execution_count": 32,
57
- "id": "5de45389",
58
- "metadata": {},
59
- "outputs": [
60
- {
61
- "data": {
62
- "text/html": [
63
- "<div>\n",
64
- "<style scoped>\n",
65
- " .dataframe tbody tr th:only-of-type {\n",
66
- " vertical-align: middle;\n",
67
- " }\n",
68
- "\n",
69
- " .dataframe tbody tr th {\n",
70
- " vertical-align: top;\n",
71
- " }\n",
72
- "\n",
73
- " .dataframe thead th {\n",
74
- " text-align: right;\n",
75
- " }\n",
76
- "</style>\n",
77
- "<table border=\"1\" class=\"dataframe\">\n",
78
- " <thead>\n",
79
- " <tr style=\"text-align: right;\">\n",
80
- " <th></th>\n",
81
- " <th>log</th>\n",
82
- " <th>ratio_top_20_variants</th>\n",
83
- " <th>normalized_sequence_entropy_linear_forgetting</th>\n",
84
- " </tr>\n",
85
- " </thead>\n",
86
- " <tbody>\n",
87
- " <tr>\n",
88
- " <th>0</th>\n",
89
- " <td>objective_1</td>\n",
90
- " <td>0.2</td>\n",
91
- " <td>0.0</td>\n",
92
- " </tr>\n",
93
- " <tr>\n",
94
- " <th>1</th>\n",
95
- " <td>objective_2</td>\n",
96
- " <td>0.2</td>\n",
97
- " <td>0.1</td>\n",
98
- " </tr>\n",
99
- " <tr>\n",
100
- " <th>2</th>\n",
101
- " <td>objective_3</td>\n",
102
- " <td>0.2</td>\n",
103
- " <td>0.2</td>\n",
104
- " </tr>\n",
105
- " <tr>\n",
106
- " <th>3</th>\n",
107
- " <td>objective_4</td>\n",
108
- " <td>0.2</td>\n",
109
- " <td>0.3</td>\n",
110
- " </tr>\n",
111
- " <tr>\n",
112
- " <th>4</th>\n",
113
- " <td>objective_5</td>\n",
114
- " <td>0.2</td>\n",
115
- " <td>0.4</td>\n",
116
- " </tr>\n",
117
- " <tr>\n",
118
- " <th>5</th>\n",
119
- " <td>objective_6</td>\n",
120
- " <td>0.2</td>\n",
121
- " <td>0.5</td>\n",
122
- " </tr>\n",
123
- " <tr>\n",
124
- " <th>6</th>\n",
125
- " <td>objective_7</td>\n",
126
- " <td>0.4</td>\n",
127
- " <td>0.0</td>\n",
128
- " </tr>\n",
129
- " <tr>\n",
130
- " <th>7</th>\n",
131
- " <td>objective_8</td>\n",
132
- " <td>0.4</td>\n",
133
- " <td>0.1</td>\n",
134
- " </tr>\n",
135
- " <tr>\n",
136
- " <th>8</th>\n",
137
- " <td>objective_9</td>\n",
138
- " <td>0.4</td>\n",
139
- " <td>0.2</td>\n",
140
- " </tr>\n",
141
- " <tr>\n",
142
- " <th>9</th>\n",
143
- " <td>objective_10</td>\n",
144
- " <td>0.4</td>\n",
145
- " <td>0.3</td>\n",
146
- " </tr>\n",
147
- " <tr>\n",
148
- " <th>10</th>\n",
149
- " <td>objective_11</td>\n",
150
- " <td>0.4</td>\n",
151
- " <td>0.4</td>\n",
152
- " </tr>\n",
153
- " <tr>\n",
154
- " <th>11</th>\n",
155
- " <td>objective_12</td>\n",
156
- " <td>0.4</td>\n",
157
- " <td>0.5</td>\n",
158
- " </tr>\n",
159
- " <tr>\n",
160
- " <th>12</th>\n",
161
- " <td>objective_13</td>\n",
162
- " <td>0.6</td>\n",
163
- " <td>0.0</td>\n",
164
- " </tr>\n",
165
- " <tr>\n",
166
- " <th>13</th>\n",
167
- " <td>objective_14</td>\n",
168
- " <td>0.6</td>\n",
169
- " <td>0.1</td>\n",
170
- " </tr>\n",
171
- " <tr>\n",
172
- " <th>14</th>\n",
173
- " <td>objective_15</td>\n",
174
- " <td>0.6</td>\n",
175
- " <td>0.2</td>\n",
176
- " </tr>\n",
177
- " <tr>\n",
178
- " <th>15</th>\n",
179
- " <td>objective_16</td>\n",
180
- " <td>0.6</td>\n",
181
- " <td>0.3</td>\n",
182
- " </tr>\n",
183
- " <tr>\n",
184
- " <th>16</th>\n",
185
- " <td>objective_17</td>\n",
186
- " <td>0.6</td>\n",
187
- " <td>0.4</td>\n",
188
- " </tr>\n",
189
- " <tr>\n",
190
- " <th>17</th>\n",
191
- " <td>objective_18</td>\n",
192
- " <td>0.6</td>\n",
193
- " <td>0.5</td>\n",
194
- " </tr>\n",
195
- " <tr>\n",
196
- " <th>18</th>\n",
197
- " <td>objective_19</td>\n",
198
- " <td>0.8</td>\n",
199
- " <td>0.0</td>\n",
200
- " </tr>\n",
201
- " <tr>\n",
202
- " <th>19</th>\n",
203
- " <td>objective_20</td>\n",
204
- " <td>0.8</td>\n",
205
- " <td>0.1</td>\n",
206
- " </tr>\n",
207
- " <tr>\n",
208
- " <th>20</th>\n",
209
- " <td>objective_21</td>\n",
210
- " <td>0.8</td>\n",
211
- " <td>0.2</td>\n",
212
- " </tr>\n",
213
- " <tr>\n",
214
- " <th>21</th>\n",
215
- " <td>objective_22</td>\n",
216
- " <td>0.8</td>\n",
217
- " <td>0.3</td>\n",
218
- " </tr>\n",
219
- " <tr>\n",
220
- " <th>22</th>\n",
221
- " <td>objective_23</td>\n",
222
- " <td>0.8</td>\n",
223
- " <td>0.4</td>\n",
224
- " </tr>\n",
225
- " <tr>\n",
226
- " <th>23</th>\n",
227
- " <td>objective_24</td>\n",
228
- " <td>0.8</td>\n",
229
- " <td>0.5</td>\n",
230
- " </tr>\n",
231
- " <tr>\n",
232
- " <th>24</th>\n",
233
- " <td>objective_25</td>\n",
234
- " <td>1.0</td>\n",
235
- " <td>0.0</td>\n",
236
- " </tr>\n",
237
- " <tr>\n",
238
- " <th>25</th>\n",
239
- " <td>objective_26</td>\n",
240
- " <td>1.0</td>\n",
241
- " <td>0.1</td>\n",
242
- " </tr>\n",
243
- " <tr>\n",
244
- " <th>26</th>\n",
245
- " <td>objective_27</td>\n",
246
- " <td>1.0</td>\n",
247
- " <td>0.2</td>\n",
248
- " </tr>\n",
249
- " <tr>\n",
250
- " <th>27</th>\n",
251
- " <td>objective_28</td>\n",
252
- " <td>1.0</td>\n",
253
- " <td>0.3</td>\n",
254
- " </tr>\n",
255
- " <tr>\n",
256
- " <th>28</th>\n",
257
- " <td>objective_29</td>\n",
258
- " <td>1.0</td>\n",
259
- " <td>0.4</td>\n",
260
- " </tr>\n",
261
- " <tr>\n",
262
- " <th>29</th>\n",
263
- " <td>objective_30</td>\n",
264
- " <td>1.0</td>\n",
265
- " <td>0.5</td>\n",
266
- " </tr>\n",
267
- " </tbody>\n",
268
- "</table>\n",
269
- "</div>"
270
- ],
271
- "text/plain": [
272
- " log ratio_top_20_variants \n",
273
- "0 objective_1 0.2 \\\n",
274
- "1 objective_2 0.2 \n",
275
- "2 objective_3 0.2 \n",
276
- "3 objective_4 0.2 \n",
277
- "4 objective_5 0.2 \n",
278
- "5 objective_6 0.2 \n",
279
- "6 objective_7 0.4 \n",
280
- "7 objective_8 0.4 \n",
281
- "8 objective_9 0.4 \n",
282
- "9 objective_10 0.4 \n",
283
- "10 objective_11 0.4 \n",
284
- "11 objective_12 0.4 \n",
285
- "12 objective_13 0.6 \n",
286
- "13 objective_14 0.6 \n",
287
- "14 objective_15 0.6 \n",
288
- "15 objective_16 0.6 \n",
289
- "16 objective_17 0.6 \n",
290
- "17 objective_18 0.6 \n",
291
- "18 objective_19 0.8 \n",
292
- "19 objective_20 0.8 \n",
293
- "20 objective_21 0.8 \n",
294
- "21 objective_22 0.8 \n",
295
- "22 objective_23 0.8 \n",
296
- "23 objective_24 0.8 \n",
297
- "24 objective_25 1.0 \n",
298
- "25 objective_26 1.0 \n",
299
- "26 objective_27 1.0 \n",
300
- "27 objective_28 1.0 \n",
301
- "28 objective_29 1.0 \n",
302
- "29 objective_30 1.0 \n",
303
- "\n",
304
- " normalized_sequence_entropy_linear_forgetting \n",
305
- "0 0.0 \n",
306
- "1 0.1 \n",
307
- "2 0.2 \n",
308
- "3 0.3 \n",
309
- "4 0.4 \n",
310
- "5 0.5 \n",
311
- "6 0.0 \n",
312
- "7 0.1 \n",
313
- "8 0.2 \n",
314
- "9 0.3 \n",
315
- "10 0.4 \n",
316
- "11 0.5 \n",
317
- "12 0.0 \n",
318
- "13 0.1 \n",
319
- "14 0.2 \n",
320
- "15 0.3 \n",
321
- "16 0.4 \n",
322
- "17 0.5 \n",
323
- "18 0.0 \n",
324
- "19 0.1 \n",
325
- "20 0.2 \n",
326
- "21 0.3 \n",
327
- "22 0.4 \n",
328
- "23 0.5 \n",
329
- "24 0.0 \n",
330
- "25 0.1 \n",
331
- "26 0.2 \n",
332
- "27 0.3 \n",
333
- "28 0.4 \n",
334
- "29 0.5 "
335
- ]
336
- },
337
- "execution_count": 32,
338
- "metadata": {},
339
- "output_type": "execute_result"
340
- }
341
- ],
342
- "source": [
343
- "df"
344
- ]
345
- },
346
- {
347
- "cell_type": "code",
348
- "execution_count": null,
349
- "id": "d726a5ae",
350
- "metadata": {},
351
- "outputs": [],
352
- "source": []
353
- }
354
- ],
355
- "metadata": {
356
- "kernelspec": {
357
- "display_name": "Python 3 (ipykernel)",
358
- "language": "python",
359
- "name": "python3"
360
- },
361
- "language_info": {
362
- "codemirror_mode": {
363
- "name": "ipython",
364
- "version": 3
365
- },
366
- "file_extension": ".py",
367
- "mimetype": "text/x-python",
368
- "name": "python",
369
- "nbconvert_exporter": "python",
370
- "pygments_lexer": "ipython3",
371
- "version": "3.9.7"
372
- }
373
- },
374
- "nbformat": 4,
375
- "nbformat_minor": 5
376
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
notebooks/.ipynb_checkpoints/oversampling-checkpoint.ipynb DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "cells": [],
3
- "metadata": {},
4
- "nbformat": 4,
5
- "nbformat_minor": 5
6
- }
 
 
 
 
 
 
 
notebooks/.ipynb_checkpoints/performance_feature_correlation-checkpoint.ipynb DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "cells": [],
3
- "metadata": {},
4
- "nbformat": 4,
5
- "nbformat_minor": 5
6
- }
 
 
 
 
 
 
 
notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/.ipynb_checkpoints/statistics_tasks_to_datasets-checkpoint.ipynb DELETED
@@ -1,818 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": 4,
6
- "id": "4827785f",
7
- "metadata": {},
8
- "outputs": [
9
- {
10
- "data": {
11
- "text/html": [
12
- "<div>\n",
13
- "<style scoped>\n",
14
- " .dataframe tbody tr th:only-of-type {\n",
15
- " vertical-align: middle;\n",
16
- " }\n",
17
- "\n",
18
- " .dataframe tbody tr th {\n",
19
- " vertical-align: top;\n",
20
- " }\n",
21
- "\n",
22
- " .dataframe thead th {\n",
23
- " text-align: right;\n",
24
- " }\n",
25
- "</style>\n",
26
- "<table border=\"1\" class=\"dataframe\">\n",
27
- " <thead>\n",
28
- " <tr style=\"text-align: right;\">\n",
29
- " <th></th>\n",
30
- " <th>Name</th>\n",
31
- " <th>Short description</th>\n",
32
- " <th>data link</th>\n",
33
- " <th>challenge link</th>\n",
34
- " <th>Citations (Stand Februar 2023)</th>\n",
35
- " <th>Publications</th>\n",
36
- " <th>Process Discovery/ Declarative</th>\n",
37
- " <th>Conformance Checking / Alignment / Replay</th>\n",
38
- " <th>Online / Streaming / Realtime</th>\n",
39
- " <th>Performance (Analysis) / Temporal / Time</th>\n",
40
- " <th>Predict(ive)/ Monitoring/ Prescriptive</th>\n",
41
- " <th>Trace clustering / Clustering</th>\n",
42
- " <th>Preprocessing / Event Abstraction / Event Data Correlation</th>\n",
43
- " <th>Further keywords:</th>\n",
44
- " </tr>\n",
45
- " </thead>\n",
46
- " <tbody>\n",
47
- " <tr>\n",
48
- " <th>0</th>\n",
49
- " <td>Sepsis Cases - Event Log</td>\n",
50
- " <td>This real-life event log contains events of se...</td>\n",
51
- " <td>https://data.4tu.nl/articles/dataset/Sepsis_Ca...</td>\n",
52
- " <td>https://data.4tu.nl/articles/dataset/Sepsis_Ca...</td>\n",
53
- " <td>61</td>\n",
54
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
55
- " <td>17</td>\n",
56
- " <td>7</td>\n",
57
- " <td>4</td>\n",
58
- " <td>1</td>\n",
59
- " <td>8</td>\n",
60
- " <td>2</td>\n",
61
- " <td>2</td>\n",
62
- " <td>(machine) learning, (online process) monitorin...</td>\n",
63
- " </tr>\n",
64
- " <tr>\n",
65
- " <th>1</th>\n",
66
- " <td>BPI 2017 - Offer Log</td>\n",
67
- " <td>Contains data from a financial institute inclu...</td>\n",
68
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
69
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2017:ch...</td>\n",
70
- " <td>4</td>\n",
71
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
72
- " <td>1</td>\n",
73
- " <td>0</td>\n",
74
- " <td>0</td>\n",
75
- " <td>1</td>\n",
76
- " <td>1</td>\n",
77
- " <td>0</td>\n",
78
- " <td>0</td>\n",
79
- " <td>(machine) learning, cloud computing</td>\n",
80
- " </tr>\n",
81
- " <tr>\n",
82
- " <th>2</th>\n",
83
- " <td>Road Traffic Fine Management Process (not BPI)</td>\n",
84
- " <td>A real-life event log taken from an informatio...</td>\n",
85
- " <td>https://data.4tu.nl/articles/dataset/Road_Traf...</td>\n",
86
- " <td>NaN</td>\n",
87
- " <td>95</td>\n",
88
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
89
- " <td>32</td>\n",
90
- " <td>9</td>\n",
91
- " <td>4</td>\n",
92
- " <td>8</td>\n",
93
- " <td>15</td>\n",
94
- " <td>1</td>\n",
95
- " <td>2</td>\n",
96
- " <td>alarm-based prescriptive process monitoring, b...</td>\n",
97
- " </tr>\n",
98
- " <tr>\n",
99
- " <th>3</th>\n",
100
- " <td>BPI 2011</td>\n",
101
- " <td>Contains data from from a Dutch Academic Hospi...</td>\n",
102
- " <td>https://data.4tu.nl/articles/dataset/Real-life...</td>\n",
103
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2011:ch...</td>\n",
104
- " <td>57</td>\n",
105
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
106
- " <td>13</td>\n",
107
- " <td>1</td>\n",
108
- " <td>3</td>\n",
109
- " <td>4</td>\n",
110
- " <td>12</td>\n",
111
- " <td>4</td>\n",
112
- " <td>1</td>\n",
113
- " <td>(compliance) monitoring, (machine) learning, d...</td>\n",
114
- " </tr>\n",
115
- " <tr>\n",
116
- " <th>4</th>\n",
117
- " <td>BPI 2012</td>\n",
118
- " <td>Contains the event log of an application proce...</td>\n",
119
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
120
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2012:ch...</td>\n",
121
- " <td>151</td>\n",
122
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
123
- " <td>40</td>\n",
124
- " <td>15</td>\n",
125
- " <td>4</td>\n",
126
- " <td>13</td>\n",
127
- " <td>46</td>\n",
128
- " <td>0</td>\n",
129
- " <td>1</td>\n",
130
- " <td>(in)frequent patterns in process models, (mach...</td>\n",
131
- " </tr>\n",
132
- " <tr>\n",
133
- " <th>5</th>\n",
134
- " <td>BPI 2013 - Open Problems</td>\n",
135
- " <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
136
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
137
- " <td>https://www.win.tue.nl/bpi/2013/challenge.html</td>\n",
138
- " <td>6</td>\n",
139
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
140
- " <td>1</td>\n",
141
- " <td>0</td>\n",
142
- " <td>0</td>\n",
143
- " <td>0</td>\n",
144
- " <td>1</td>\n",
145
- " <td>0</td>\n",
146
- " <td>0</td>\n",
147
- " <td>(in)frequent patterns in process models, (mach...</td>\n",
148
- " </tr>\n",
149
- " <tr>\n",
150
- " <th>6</th>\n",
151
- " <td>BPI 2013 - Closed Problems</td>\n",
152
- " <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
153
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
154
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2013:ch...</td>\n",
155
- " <td>12</td>\n",
156
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
157
- " <td>3</td>\n",
158
- " <td>2</td>\n",
159
- " <td>1</td>\n",
160
- " <td>2</td>\n",
161
- " <td>0</td>\n",
162
- " <td>0</td>\n",
163
- " <td>3</td>\n",
164
- " <td>(in)frequent patterns in process models</td>\n",
165
- " </tr>\n",
166
- " <tr>\n",
167
- " <th>7</th>\n",
168
- " <td>BPI 2013 - Incidents</td>\n",
169
- " <td>The log contains events from an incident and p...</td>\n",
170
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
171
- " <td>https://www.win.tue.nl/bpi/2013/challenge.html</td>\n",
172
- " <td>36</td>\n",
173
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
174
- " <td>14</td>\n",
175
- " <td>5</td>\n",
176
- " <td>1</td>\n",
177
- " <td>1</td>\n",
178
- " <td>7</td>\n",
179
- " <td>0</td>\n",
180
- " <td>2</td>\n",
181
- " <td>(machine) learning, rule mining</td>\n",
182
- " </tr>\n",
183
- " <tr>\n",
184
- " <th>8</th>\n",
185
- " <td>BPI 2014 - Incident Records</td>\n",
186
- " <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
187
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
188
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2014:ch...</td>\n",
189
- " <td>5</td>\n",
190
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
191
- " <td>1</td>\n",
192
- " <td>0</td>\n",
193
- " <td>0</td>\n",
194
- " <td>0</td>\n",
195
- " <td>0</td>\n",
196
- " <td>0</td>\n",
197
- " <td>0</td>\n",
198
- " <td>privacy preservation, security</td>\n",
199
- " </tr>\n",
200
- " <tr>\n",
201
- " <th>9</th>\n",
202
- " <td>BPI 2014 - Interaction Records</td>\n",
203
- " <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
204
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
205
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2014:ch...</td>\n",
206
- " <td>1</td>\n",
207
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
208
- " <td>0</td>\n",
209
- " <td>0</td>\n",
210
- " <td>0</td>\n",
211
- " <td>0</td>\n",
212
- " <td>0</td>\n",
213
- " <td>0</td>\n",
214
- " <td>0</td>\n",
215
- " <td>(machine) learning, hidden Markov models</td>\n",
216
- " </tr>\n",
217
- " <tr>\n",
218
- " <th>10</th>\n",
219
- " <td>BPI 2015 - Log 3</td>\n",
220
- " <td>Provided by 5 Dutch municipalities. The data c...</td>\n",
221
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
222
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2015:ch...</td>\n",
223
- " <td>1</td>\n",
224
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
225
- " <td>0</td>\n",
226
- " <td>0</td>\n",
227
- " <td>0</td>\n",
228
- " <td>0</td>\n",
229
- " <td>1</td>\n",
230
- " <td>0</td>\n",
231
- " <td>0</td>\n",
232
- " <td>specification-driven predictive business proce...</td>\n",
233
- " </tr>\n",
234
- " <tr>\n",
235
- " <th>11</th>\n",
236
- " <td>BPI 2015 - Log 1</td>\n",
237
- " <td>Provided by 5 Dutch municipalities. The data c...</td>\n",
238
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
239
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2015:ch...</td>\n",
240
- " <td>8</td>\n",
241
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
242
- " <td>1</td>\n",
243
- " <td>1</td>\n",
244
- " <td>0</td>\n",
245
- " <td>0</td>\n",
246
- " <td>3</td>\n",
247
- " <td>0</td>\n",
248
- " <td>3</td>\n",
249
- " <td>(machine) learning</td>\n",
250
- " </tr>\n",
251
- " <tr>\n",
252
- " <th>12</th>\n",
253
- " <td>BPI 2016 - Clicks Logged In</td>\n",
254
- " <td>Contains clicks of users that are logged in fr...</td>\n",
255
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
256
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2016:ch...</td>\n",
257
- " <td>1</td>\n",
258
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
259
- " <td>1</td>\n",
260
- " <td>0</td>\n",
261
- " <td>1</td>\n",
262
- " <td>0</td>\n",
263
- " <td>0</td>\n",
264
- " <td>0</td>\n",
265
- " <td>0</td>\n",
266
- " <td>automation</td>\n",
267
- " </tr>\n",
268
- " <tr>\n",
269
- " <th>13</th>\n",
270
- " <td>BPI 2017 - Application Log</td>\n",
271
- " <td>Contains data from a financial institute inclu...</td>\n",
272
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
273
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2017:ch...</td>\n",
274
- " <td>73</td>\n",
275
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
276
- " <td>11</td>\n",
277
- " <td>5</td>\n",
278
- " <td>2</td>\n",
279
- " <td>14</td>\n",
280
- " <td>23</td>\n",
281
- " <td>1</td>\n",
282
- " <td>1</td>\n",
283
- " <td>(machine) learning, alarm-based prescriptive p...</td>\n",
284
- " </tr>\n",
285
- " <tr>\n",
286
- " <th>14</th>\n",
287
- " <td>BPI 2018</td>\n",
288
- " <td>The process covers the handling of application...</td>\n",
289
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
290
- " <td>https://www.win.tue.nl/bpi/doku.php?id=2018:ch...</td>\n",
291
- " <td>26</td>\n",
292
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
293
- " <td>7</td>\n",
294
- " <td>1</td>\n",
295
- " <td>2</td>\n",
296
- " <td>0</td>\n",
297
- " <td>8</td>\n",
298
- " <td>0</td>\n",
299
- " <td>2</td>\n",
300
- " <td>(machine) learning, automation</td>\n",
301
- " </tr>\n",
302
- " <tr>\n",
303
- " <th>15</th>\n",
304
- " <td>BPI 2020 - Travel Permits</td>\n",
305
- " <td>Contains 2 years of data from the reimbursemen...</td>\n",
306
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
307
- " <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
308
- " <td>2</td>\n",
309
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
310
- " <td>0</td>\n",
311
- " <td>0</td>\n",
312
- " <td>0</td>\n",
313
- " <td>1</td>\n",
314
- " <td>0</td>\n",
315
- " <td>0</td>\n",
316
- " <td>0</td>\n",
317
- " <td>stage-based process performance analysis</td>\n",
318
- " </tr>\n",
319
- " <tr>\n",
320
- " <th>16</th>\n",
321
- " <td>BPI 2019</td>\n",
322
- " <td>Contains the purchase order handling process o...</td>\n",
323
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
324
- " <td>https://icpmconference.org/2019/icpm-2019/cont...</td>\n",
325
- " <td>35</td>\n",
326
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
327
- " <td>3</td>\n",
328
- " <td>1</td>\n",
329
- " <td>6</td>\n",
330
- " <td>6</td>\n",
331
- " <td>9</td>\n",
332
- " <td>4</td>\n",
333
- " <td>1</td>\n",
334
- " <td>(online process) monitoring, remaining time pr...</td>\n",
335
- " </tr>\n",
336
- " <tr>\n",
337
- " <th>17</th>\n",
338
- " <td>BPI 2020 - International Declarations</td>\n",
339
- " <td>Contains 2 years of data from the reimbursemen...</td>\n",
340
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
341
- " <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
342
- " <td>2</td>\n",
343
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
344
- " <td>0</td>\n",
345
- " <td>0</td>\n",
346
- " <td>0</td>\n",
347
- " <td>1</td>\n",
348
- " <td>2</td>\n",
349
- " <td>0</td>\n",
350
- " <td>0</td>\n",
351
- " <td>(machine) learning, remaining time prediction</td>\n",
352
- " </tr>\n",
353
- " <tr>\n",
354
- " <th>18</th>\n",
355
- " <td>BPI 2020 - Domestic Declarations</td>\n",
356
- " <td>Contains 2 years of data from the reimbursemen...</td>\n",
357
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
358
- " <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
359
- " <td>7</td>\n",
360
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
361
- " <td>0</td>\n",
362
- " <td>2</td>\n",
363
- " <td>2</td>\n",
364
- " <td>2</td>\n",
365
- " <td>3</td>\n",
366
- " <td>0</td>\n",
367
- " <td>0</td>\n",
368
- " <td>(machine) learning, remaining time prediction</td>\n",
369
- " </tr>\n",
370
- " <tr>\n",
371
- " <th>19</th>\n",
372
- " <td>BPI 2020 - Prepaid Travel Cost</td>\n",
373
- " <td>Contains 2 years of data from the reimbursemen...</td>\n",
374
- " <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
375
- " <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
376
- " <td>2</td>\n",
377
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
378
- " <td>0</td>\n",
379
- " <td>0</td>\n",
380
- " <td>0</td>\n",
381
- " <td>0</td>\n",
382
- " <td>0</td>\n",
383
- " <td>0</td>\n",
384
- " <td>0</td>\n",
385
- " <td>multi-perspective</td>\n",
386
- " </tr>\n",
387
- " <tr>\n",
388
- " <th>20</th>\n",
389
- " <td>Helpdesk</td>\n",
390
- " <td>Ticketing management process of the Help desk ...</td>\n",
391
- " <td>https://data.4tu.nl/articles/dataset/Dataset_b...</td>\n",
392
- " <td>NaN</td>\n",
393
- " <td>20</td>\n",
394
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
395
- " <td>4</td>\n",
396
- " <td>1</td>\n",
397
- " <td>3</td>\n",
398
- " <td>1</td>\n",
399
- " <td>8</td>\n",
400
- " <td>0</td>\n",
401
- " <td>0</td>\n",
402
- " <td>(machine) learning, drift detection</td>\n",
403
- " </tr>\n",
404
- " <tr>\n",
405
- " <th>21</th>\n",
406
- " <td>Receipt phase of an environmental permit appli...</td>\n",
407
- " <td>Data originates from the CoSeLoG project where...</td>\n",
408
- " <td>https://data.4tu.nl/articles/dataset/Receipt_p...</td>\n",
409
- " <td>NaN</td>\n",
410
- " <td>15</td>\n",
411
- " <td>https://data.4tu.nl/articles/dataset/Receipt_p...</td>\n",
412
- " <td>-1</td>\n",
413
- " <td>-1</td>\n",
414
- " <td>-1</td>\n",
415
- " <td>-1</td>\n",
416
- " <td>-1</td>\n",
417
- " <td>-1</td>\n",
418
- " <td>-1</td>\n",
419
- " <td>NaN</td>\n",
420
- " </tr>\n",
421
- " <tr>\n",
422
- " <th>22</th>\n",
423
- " <td>Environmental permit application process (β€˜WAB...</td>\n",
424
- " <td>Data originates from the CoSeLoG project where...</td>\n",
425
- " <td>https://data.4tu.nl/articles/dataset/Environme...</td>\n",
426
- " <td>NaN</td>\n",
427
- " <td>2</td>\n",
428
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
429
- " <td>0</td>\n",
430
- " <td>0</td>\n",
431
- " <td>0</td>\n",
432
- " <td>0</td>\n",
433
- " <td>1</td>\n",
434
- " <td>0</td>\n",
435
- " <td>0</td>\n",
436
- " <td>predictions with a-priori knowledge</td>\n",
437
- " </tr>\n",
438
- " <tr>\n",
439
- " <th>23</th>\n",
440
- " <td>Environmental permit application process (β€˜WAB...</td>\n",
441
- " <td>Data originates from the CoSeLoG project where...</td>\n",
442
- " <td>https://data.4tu.nl/articles/dataset/Environme...</td>\n",
443
- " <td>NaN</td>\n",
444
- " <td>2</td>\n",
445
- " <td>https://app.dimensions.ai/discover/publication...</td>\n",
446
- " <td>1</td>\n",
447
- " <td>0</td>\n",
448
- " <td>0</td>\n",
449
- " <td>0</td>\n",
450
- " <td>0</td>\n",
451
- " <td>0</td>\n",
452
- " <td>0</td>\n",
453
- " <td>multidimensional process mining, process cubes</td>\n",
454
- " </tr>\n",
455
- " <tr>\n",
456
- " <th>24</th>\n",
457
- " <td>NaN</td>\n",
458
- " <td>NaN</td>\n",
459
- " <td>NaN</td>\n",
460
- " <td>NaN</td>\n",
461
- " <td>NaN</td>\n",
462
- " <td>NaN</td>\n",
463
- " <td>NaN</td>\n",
464
- " <td>NaN</td>\n",
465
- " <td>NaN</td>\n",
466
- " <td>NaN</td>\n",
467
- " <td>NaN</td>\n",
468
- " <td>NaN</td>\n",
469
- " <td>NaN</td>\n",
470
- " <td>NaN</td>\n",
471
- " </tr>\n",
472
- " </tbody>\n",
473
- "</table>\n",
474
- "</div>"
475
- ],
476
- "text/plain": [
477
- " Name \\\n",
478
- "0 Sepsis Cases - Event Log \n",
479
- "1 BPI 2017 - Offer Log \n",
480
- "2 Road Traffic Fine Management Process (not BPI) \n",
481
- "3 BPI 2011 \n",
482
- "4 BPI 2012 \n",
483
- "5 BPI 2013 - Open Problems \n",
484
- "6 BPI 2013 - Closed Problems \n",
485
- "7 BPI 2013 - Incidents \n",
486
- "8 BPI 2014 - Incident Records \n",
487
- "9 BPI 2014 - Interaction Records \n",
488
- "10 BPI 2015 - Log 3 \n",
489
- "11 BPI 2015 - Log 1 \n",
490
- "12 BPI 2016 - Clicks Logged In \n",
491
- "13 BPI 2017 - Application Log \n",
492
- "14 BPI 2018 \n",
493
- "15 BPI 2020 - Travel Permits \n",
494
- "16 BPI 2019 \n",
495
- "17 BPI 2020 - International Declarations \n",
496
- "18 BPI 2020 - Domestic Declarations \n",
497
- "19 BPI 2020 - Prepaid Travel Cost \n",
498
- "20 Helpdesk \n",
499
- "21 Receipt phase of an environmental permit appli... \n",
500
- "22 Environmental permit application process (β€˜WAB... \n",
501
- "23 Environmental permit application process (β€˜WAB... \n",
502
- "24 NaN \n",
503
- "\n",
504
- " Short description \\\n",
505
- "0 This real-life event log contains events of se... \n",
506
- "1 Contains data from a financial institute inclu... \n",
507
- "2 A real-life event log taken from an informatio... \n",
508
- "3 Contains data from from a Dutch Academic Hospi... \n",
509
- "4 Contains the event log of an application proce... \n",
510
- "5 Rabobank Group ICT implemented ITIL processes ... \n",
511
- "6 Rabobank Group ICT implemented ITIL processes ... \n",
512
- "7 The log contains events from an incident and p... \n",
513
- "8 Rabobank Group ICT implemented ITIL processes ... \n",
514
- "9 Rabobank Group ICT implemented ITIL processes ... \n",
515
- "10 Provided by 5 Dutch municipalities. The data c... \n",
516
- "11 Provided by 5 Dutch municipalities. The data c... \n",
517
- "12 Contains clicks of users that are logged in fr... \n",
518
- "13 Contains data from a financial institute inclu... \n",
519
- "14 The process covers the handling of application... \n",
520
- "15 Contains 2 years of data from the reimbursemen... \n",
521
- "16 Contains the purchase order handling process o... \n",
522
- "17 Contains 2 years of data from the reimbursemen... \n",
523
- "18 Contains 2 years of data from the reimbursemen... \n",
524
- "19 Contains 2 years of data from the reimbursemen... \n",
525
- "20 Ticketing management process of the Help desk ... \n",
526
- "21 Data originates from the CoSeLoG project where... \n",
527
- "22 Data originates from the CoSeLoG project where... \n",
528
- "23 Data originates from the CoSeLoG project where... \n",
529
- "24 NaN \n",
530
- "\n",
531
- " data link \\\n",
532
- "0 https://data.4tu.nl/articles/dataset/Sepsis_Ca... \n",
533
- "1 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
534
- "2 https://data.4tu.nl/articles/dataset/Road_Traf... \n",
535
- "3 https://data.4tu.nl/articles/dataset/Real-life... \n",
536
- "4 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
537
- "5 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
538
- "6 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
539
- "7 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
540
- "8 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
541
- "9 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
542
- "10 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
543
- "11 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
544
- "12 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
545
- "13 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
546
- "14 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
547
- "15 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
548
- "16 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
549
- "17 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
550
- "18 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
551
- "19 https://data.4tu.nl/articles/dataset/BPI_Chall... \n",
552
- "20 https://data.4tu.nl/articles/dataset/Dataset_b... \n",
553
- "21 https://data.4tu.nl/articles/dataset/Receipt_p... \n",
554
- "22 https://data.4tu.nl/articles/dataset/Environme... \n",
555
- "23 https://data.4tu.nl/articles/dataset/Environme... \n",
556
- "24 NaN \n",
557
- "\n",
558
- " challenge link \\\n",
559
- "0 https://data.4tu.nl/articles/dataset/Sepsis_Ca... \n",
560
- "1 https://www.win.tue.nl/bpi/doku.php?id=2017:ch... \n",
561
- "2 NaN \n",
562
- "3 https://www.win.tue.nl/bpi/doku.php?id=2011:ch... \n",
563
- "4 https://www.win.tue.nl/bpi/doku.php?id=2012:ch... \n",
564
- "5 https://www.win.tue.nl/bpi/2013/challenge.html \n",
565
- "6 https://www.win.tue.nl/bpi/doku.php?id=2013:ch... \n",
566
- "7 https://www.win.tue.nl/bpi/2013/challenge.html \n",
567
- "8 https://www.win.tue.nl/bpi/doku.php?id=2014:ch... \n",
568
- "9 https://www.win.tue.nl/bpi/doku.php?id=2014:ch... \n",
569
- "10 https://www.win.tue.nl/bpi/doku.php?id=2015:ch... \n",
570
- "11 https://www.win.tue.nl/bpi/doku.php?id=2015:ch... \n",
571
- "12 https://www.win.tue.nl/bpi/doku.php?id=2016:ch... \n",
572
- "13 https://www.win.tue.nl/bpi/doku.php?id=2017:ch... \n",
573
- "14 https://www.win.tue.nl/bpi/doku.php?id=2018:ch... \n",
574
- "15 https://icpmconference.org/2020/bpi-challenge/ \n",
575
- "16 https://icpmconference.org/2019/icpm-2019/cont... \n",
576
- "17 https://icpmconference.org/2020/bpi-challenge/ \n",
577
- "18 https://icpmconference.org/2020/bpi-challenge/ \n",
578
- "19 https://icpmconference.org/2020/bpi-challenge/ \n",
579
- "20 NaN \n",
580
- "21 NaN \n",
581
- "22 NaN \n",
582
- "23 NaN \n",
583
- "24 NaN \n",
584
- "\n",
585
- " Citations (Stand Februar 2023) \\\n",
586
- "0 61 \n",
587
- "1 4 \n",
588
- "2 95 \n",
589
- "3 57 \n",
590
- "4 151 \n",
591
- "5 6 \n",
592
- "6 12 \n",
593
- "7 36 \n",
594
- "8 5 \n",
595
- "9 1 \n",
596
- "10 1 \n",
597
- "11 8 \n",
598
- "12 1 \n",
599
- "13 73 \n",
600
- "14 26 \n",
601
- "15 2 \n",
602
- "16 35 \n",
603
- "17 2 \n",
604
- "18 7 \n",
605
- "19 2 \n",
606
- "20 20 \n",
607
- "21 15 \n",
608
- "22 2 \n",
609
- "23 2 \n",
610
- "24 NaN \n",
611
- "\n",
612
- " Publications \\\n",
613
- "0 https://app.dimensions.ai/discover/publication... \n",
614
- "1 https://app.dimensions.ai/discover/publication... \n",
615
- "2 https://app.dimensions.ai/discover/publication... \n",
616
- "3 https://app.dimensions.ai/discover/publication... \n",
617
- "4 https://app.dimensions.ai/discover/publication... \n",
618
- "5 https://app.dimensions.ai/discover/publication... \n",
619
- "6 https://app.dimensions.ai/discover/publication... \n",
620
- "7 https://app.dimensions.ai/discover/publication... \n",
621
- "8 https://app.dimensions.ai/discover/publication... \n",
622
- "9 https://app.dimensions.ai/discover/publication... \n",
623
- "10 https://app.dimensions.ai/discover/publication... \n",
624
- "11 https://app.dimensions.ai/discover/publication... \n",
625
- "12 https://app.dimensions.ai/discover/publication... \n",
626
- "13 https://app.dimensions.ai/discover/publication... \n",
627
- "14 https://app.dimensions.ai/discover/publication... \n",
628
- "15 https://app.dimensions.ai/discover/publication... \n",
629
- "16 https://app.dimensions.ai/discover/publication... \n",
630
- "17 https://app.dimensions.ai/discover/publication... \n",
631
- "18 https://app.dimensions.ai/discover/publication... \n",
632
- "19 https://app.dimensions.ai/discover/publication... \n",
633
- "20 https://app.dimensions.ai/discover/publication... \n",
634
- "21 https://data.4tu.nl/articles/dataset/Receipt_p... \n",
635
- "22 https://app.dimensions.ai/discover/publication... \n",
636
- "23 https://app.dimensions.ai/discover/publication... \n",
637
- "24 NaN \n",
638
- "\n",
639
- " Process Discovery/ Declarative Conformance Checking / Alignment / Replay \\\n",
640
- "0 17 7 \n",
641
- "1 1 0 \n",
642
- "2 32 9 \n",
643
- "3 13 1 \n",
644
- "4 40 15 \n",
645
- "5 1 0 \n",
646
- "6 3 2 \n",
647
- "7 14 5 \n",
648
- "8 1 0 \n",
649
- "9 0 0 \n",
650
- "10 0 0 \n",
651
- "11 1 1 \n",
652
- "12 1 0 \n",
653
- "13 11 5 \n",
654
- "14 7 1 \n",
655
- "15 0 0 \n",
656
- "16 3 1 \n",
657
- "17 0 0 \n",
658
- "18 0 2 \n",
659
- "19 0 0 \n",
660
- "20 4 1 \n",
661
- "21 -1 -1 \n",
662
- "22 0 0 \n",
663
- "23 1 0 \n",
664
- "24 NaN NaN \n",
665
- "\n",
666
- " Online / Streaming / Realtime Performance (Analysis) / Temporal / Time \\\n",
667
- "0 4 1 \n",
668
- "1 0 1 \n",
669
- "2 4 8 \n",
670
- "3 3 4 \n",
671
- "4 4 13 \n",
672
- "5 0 0 \n",
673
- "6 1 2 \n",
674
- "7 1 1 \n",
675
- "8 0 0 \n",
676
- "9 0 0 \n",
677
- "10 0 0 \n",
678
- "11 0 0 \n",
679
- "12 1 0 \n",
680
- "13 2 14 \n",
681
- "14 2 0 \n",
682
- "15 0 1 \n",
683
- "16 6 6 \n",
684
- "17 0 1 \n",
685
- "18 2 2 \n",
686
- "19 0 0 \n",
687
- "20 3 1 \n",
688
- "21 -1 -1 \n",
689
- "22 0 0 \n",
690
- "23 0 0 \n",
691
- "24 NaN NaN \n",
692
- "\n",
693
- " Predict(ive)/ Monitoring/ Prescriptive Trace clustering / Clustering \\\n",
694
- "0 8 2 \n",
695
- "1 1 0 \n",
696
- "2 15 1 \n",
697
- "3 12 4 \n",
698
- "4 46 0 \n",
699
- "5 1 0 \n",
700
- "6 0 0 \n",
701
- "7 7 0 \n",
702
- "8 0 0 \n",
703
- "9 0 0 \n",
704
- "10 1 0 \n",
705
- "11 3 0 \n",
706
- "12 0 0 \n",
707
- "13 23 1 \n",
708
- "14 8 0 \n",
709
- "15 0 0 \n",
710
- "16 9 4 \n",
711
- "17 2 0 \n",
712
- "18 3 0 \n",
713
- "19 0 0 \n",
714
- "20 8 0 \n",
715
- "21 -1 -1 \n",
716
- "22 1 0 \n",
717
- "23 0 0 \n",
718
- "24 NaN NaN \n",
719
- "\n",
720
- " Preprocessing / Event Abstraction / Event Data Correlation \\\n",
721
- "0 2 \n",
722
- "1 0 \n",
723
- "2 2 \n",
724
- "3 1 \n",
725
- "4 1 \n",
726
- "5 0 \n",
727
- "6 3 \n",
728
- "7 2 \n",
729
- "8 0 \n",
730
- "9 0 \n",
731
- "10 0 \n",
732
- "11 3 \n",
733
- "12 0 \n",
734
- "13 1 \n",
735
- "14 2 \n",
736
- "15 0 \n",
737
- "16 1 \n",
738
- "17 0 \n",
739
- "18 0 \n",
740
- "19 0 \n",
741
- "20 0 \n",
742
- "21 -1 \n",
743
- "22 0 \n",
744
- "23 0 \n",
745
- "24 NaN \n",
746
- "\n",
747
- " Further keywords: \n",
748
- "0 (machine) learning, (online process) monitorin... \n",
749
- "1 (machine) learning, cloud computing \n",
750
- "2 alarm-based prescriptive process monitoring, b... \n",
751
- "3 (compliance) monitoring, (machine) learning, d... \n",
752
- "4 (in)frequent patterns in process models, (mach... \n",
753
- "5 (in)frequent patterns in process models, (mach... \n",
754
- "6 (in)frequent patterns in process models \n",
755
- "7 (machine) learning, rule mining \n",
756
- "8 privacy preservation, security \n",
757
- "9 (machine) learning, hidden Markov models \n",
758
- "10 specification-driven predictive business proce... \n",
759
- "11 (machine) learning \n",
760
- "12 automation \n",
761
- "13 (machine) learning, alarm-based prescriptive p... \n",
762
- "14 (machine) learning, automation \n",
763
- "15 stage-based process performance analysis \n",
764
- "16 (online process) monitoring, remaining time pr... \n",
765
- "17 (machine) learning, remaining time prediction \n",
766
- "18 (machine) learning, remaining time prediction \n",
767
- "19 multi-perspective \n",
768
- "20 (machine) learning, drift detection \n",
769
- "21 NaN \n",
770
- "22 predictions with a-priori knowledge \n",
771
- "23 multidimensional process mining, process cubes \n",
772
- "24 NaN "
773
- ]
774
- },
775
- "execution_count": 4,
776
- "metadata": {},
777
- "output_type": "execute_result"
778
- }
779
- ],
780
- "source": [
781
- "#import pm4py\n",
782
- "import pandas as pd\n",
783
- "INPUT_PATH = \"../data/mappings.csv\"\n",
784
- "df = pd.read_csv(INPUT_PATH, sep = \";\", dtype = \"unicode\")\n",
785
- "df"
786
- ]
787
- },
788
- {
789
- "cell_type": "code",
790
- "execution_count": null,
791
- "id": "04a97f37",
792
- "metadata": {},
793
- "outputs": [],
794
- "source": []
795
- }
796
- ],
797
- "metadata": {
798
- "kernelspec": {
799
- "display_name": "Python 3 (ipykernel)",
800
- "language": "python",
801
- "name": "python3"
802
- },
803
- "language_info": {
804
- "codemirror_mode": {
805
- "name": "ipython",
806
- "version": 3
807
- },
808
- "file_extension": ".py",
809
- "mimetype": "text/x-python",
810
- "name": "python",
811
- "nbconvert_exporter": "python",
812
- "pygments_lexer": "ipython3",
813
- "version": "3.10.7"
814
- }
815
- },
816
- "nbformat": 4,
817
- "nbformat_minor": 5
818
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
notebooks/.ipynb_checkpoints/test_feed-checkpoint.ipynb DELETED
The diff for this file is too large to render. See raw diff
 
notebooks/benchmarking_process_discovery.ipynb CHANGED
@@ -1277,7 +1277,7 @@
1277
  "\n",
1278
  "import sys\n",
1279
  "import os\n",
1280
- "sys.path.append(os.path.dirname(\"../tag/utils/io_helpers.py\"))\n",
1281
  "from io_helpers import get_keys_abbreviation\n",
1282
  "\n",
1283
  "print(benchmarked_ft.shape, benchmarked_pd.shape)\n",
@@ -1422,7 +1422,7 @@
1422
  "name": "python",
1423
  "nbconvert_exporter": "python",
1424
  "pygments_lexer": "ipython3",
1425
- "version": "3.9.7"
1426
  }
1427
  },
1428
  "nbformat": 4,
 
1277
  "\n",
1278
  "import sys\n",
1279
  "import os\n",
1280
+ "sys.path.append(os.path.dirname(\"../gedi/utils/io_helpers.py\"))\n",
1281
  "from io_helpers import get_keys_abbreviation\n",
1282
  "\n",
1283
  "print(benchmarked_ft.shape, benchmarked_pd.shape)\n",
 
1422
  "name": "python",
1423
  "nbconvert_exporter": "python",
1424
  "pygments_lexer": "ipython3",
1425
+ "version": "3.9.19"
1426
  }
1427
  },
1428
  "nbformat": 4,
notebooks/bpic_generability_pdm.ipynb CHANGED
@@ -1223,7 +1223,7 @@
1223
  "from scipy.stats import pearsonr\n",
1224
  "import sys\n",
1225
  "import os\n",
1226
- "sys.path.append(os.path.dirname(\"../tag/utils/io_helpers.py\"))\n",
1227
  "from io_helpers import get_keys_abbreviation\n",
1228
  "\n",
1229
  "\n",
 
1223
  "from scipy.stats import pearsonr\n",
1224
  "import sys\n",
1225
  "import os\n",
1226
+ "sys.path.append(os.path.dirname(\"../gedi/utils/io_helpers.py\"))\n",
1227
  "from io_helpers import get_keys_abbreviation\n",
1228
  "\n",
1229
  "\n",
notebooks/experiment_generator.ipynb CHANGED
@@ -2225,7 +2225,7 @@
2225
  ],
2226
  "source": [
2227
  "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n",
2228
- "#bpic_features = pd.read_csv(\"../tag/output/features/real_event_logs.csv\", index_col=None)\n",
2229
  "\n",
2230
  "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n",
2231
  "print(bpic_features.shape)\n",
@@ -3102,7 +3102,7 @@
3102
  "name": "python",
3103
  "nbconvert_exporter": "python",
3104
  "pygments_lexer": "ipython3",
3105
- "version": "3.9.7"
3106
  }
3107
  },
3108
  "nbformat": 4,
 
2225
  ],
2226
  "source": [
2227
  "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n",
2228
+ "#bpic_features = pd.read_csv(\"../gedi/output/features/real_event_logs.csv\", index_col=None)\n",
2229
  "\n",
2230
  "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n",
2231
  "print(bpic_features.shape)\n",
 
3102
  "name": "python",
3103
  "nbconvert_exporter": "python",
3104
  "pygments_lexer": "ipython3",
3105
+ "version": "3.9.19"
3106
  }
3107
  },
3108
  "nbformat": 4,
notebooks/feature_distributions.ipynb CHANGED
@@ -1847,7 +1847,7 @@
1847
  "name": "python",
1848
  "nbconvert_exporter": "python",
1849
  "pygments_lexer": "ipython3",
1850
- "version": "3.9.12"
1851
  }
1852
  },
1853
  "nbformat": 4,
 
1847
  "name": "python",
1848
  "nbconvert_exporter": "python",
1849
  "pygments_lexer": "ipython3",
1850
+ "version": "3.9.19"
1851
  }
1852
  },
1853
  "nbformat": 4,