Spaces:
Running
Running
Merge pull request #24 from lmu-dbs/demo-icpm24
Browse filesThis view is limited to 50 files because it contains too many changes. Β
See raw diff
- .github/workflows/huggingface.yml +28 -0
- .github/workflows/pypi_release.yml +101 -0
- .github/workflows/test_gedi.yml +50 -10
- README.md +288 -23
- config.py +6 -69
- config_files/config_layout.json +48 -0
- config_files/{algorithm/experiment_real_targets.json β experiment_real_targets.json} +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_ense_enseef.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_ense_enself.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_ense_enve.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_ense_rmcv.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_ense_rt10v.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_ense_rvpnot.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enseef_enself.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enseef_enve.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enseef_rmcv.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enseef_rt10v.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enseef_rvpnot.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enself_enve.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enself_rmcv.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enself_rt10v.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enself_rvpnot.json +0 -0
- config_files/grid_2obj/generator_grid_2objectives_enve_mvo.json +1 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enve_rmcv.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enve_rt10v.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enve_rvpnot.json +0 -0
- config_files/grid_2obj/generator_grid_2objectives_enve_sam.json +1 -0
- config_files/grid_2obj/generator_grid_2objectives_mvo_sam.json +1 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_rmcv_rt10v.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_rmcv_rvpnot.json +0 -0
- config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_rt10v_rvpnot.json +0 -0
- config_files/options/baseline.json +0 -9
- config_files/options/run_params.json +0 -9
- config_files/{algorithm/pipeline_steps β pipeline_steps}/augmentation.json +0 -0
- config_files/{algorithm/pipeline_steps β pipeline_steps}/benchmark.json +1 -1
- config_files/{algorithm/pipeline_steps β pipeline_steps}/evaluation_plotter.json +0 -0
- config_files/{algorithm/pipeline_steps β pipeline_steps}/feature_extraction.json +0 -0
- config_files/{algorithm/pipeline_steps β pipeline_steps}/generation.json +0 -0
- config_files/{algorithm β test}/experiment_test.json +0 -0
- config_files/{algorithm/test β test}/generator_2bpic_2objectives_ense_enseef.json +0 -0
- config_files/{algorithm/test β test}/generator_grid_1objectives_rt10v.json +0 -0
- config_files/{algorithm/test β test}/generator_grid_2objectives_ense_enself.json +0 -0
- config_files/test/test_abbrv_generation.json +16 -0
- data/test/grid_experiments/rt10v.csv +0 -12
- data/test/grid_feat.csv +2 -0
- data/test/igedi_table_1.csv +4 -0
- data/validation/2_ense_rmcv_feat.csv +4 -0
- data/validation/genELexperiment1_04_02.json +1 -1
- data/validation/genELexperiment3_04_nan.json +1 -0
- data/validation/genELexperiment4_nan_02.json +1 -0
.github/workflows/huggingface.yml
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Sync to Hugging Face hub
|
2 |
+
on:
|
3 |
+
push:
|
4 |
+
branches:
|
5 |
+
- main
|
6 |
+
- demo-icpm24
|
7 |
+
# to run this workflow manually from the Actions tab
|
8 |
+
workflow_dispatch:
|
9 |
+
|
10 |
+
jobs:
|
11 |
+
sync-to-hub:
|
12 |
+
runs-on: ubuntu-latest
|
13 |
+
steps:
|
14 |
+
- name: Check large files
|
15 |
+
uses: ActionsDesk/[email protected]
|
16 |
+
with:
|
17 |
+
filesizelimit: 10485760 # this is 10MB so we can sync to HF Spaces
|
18 |
+
- uses: actions/checkout@v3
|
19 |
+
with:
|
20 |
+
fetch-depth: 0
|
21 |
+
lfs: true
|
22 |
+
- name: Set current branch as a variable
|
23 |
+
id: set_branch
|
24 |
+
run: echo "CURRENT_BRANCH=${GITHUB_REF##*/}" >> $GITHUB_ENV
|
25 |
+
- name: Push to hub
|
26 |
+
env:
|
27 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
28 |
+
run: git push --force https://andreamalhera:[email protected]/spaces/andreamalhera/igedi $CURRENT_BRANCH:main
|
.github/workflows/pypi_release.yml
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Publish Python π distribution π¦ to PyPI
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
tags:
|
6 |
+
- 'v*.*.*' # Triggers the workflow when a new version tag is pushed
|
7 |
+
|
8 |
+
jobs:
|
9 |
+
build:
|
10 |
+
runs-on: ubuntu-latest
|
11 |
+
|
12 |
+
steps:
|
13 |
+
- name: Check out the code
|
14 |
+
uses: actions/checkout@v4
|
15 |
+
|
16 |
+
- name: Set up Python
|
17 |
+
uses: actions/setup-python@v5
|
18 |
+
with:
|
19 |
+
python-version: '3.x' # Specify your Python version
|
20 |
+
|
21 |
+
- name: Install pypa/build
|
22 |
+
run: >-
|
23 |
+
python3 -m
|
24 |
+
pip install
|
25 |
+
build
|
26 |
+
--user
|
27 |
+
- name: Build a binary wheel and a source tarball
|
28 |
+
run: python3 -m build
|
29 |
+
- name: Store the distribution packages
|
30 |
+
uses: actions/upload-artifact@v3
|
31 |
+
with:
|
32 |
+
name: python-package-distributions
|
33 |
+
path: dist/
|
34 |
+
|
35 |
+
publish-to-pypi:
|
36 |
+
name: >-
|
37 |
+
Publish Python π distribution π¦ to PyPI
|
38 |
+
if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes
|
39 |
+
needs:
|
40 |
+
- build
|
41 |
+
runs-on: ubuntu-latest
|
42 |
+
environment:
|
43 |
+
name: pypi
|
44 |
+
url: https://pypi.org/p/GEDI
|
45 |
+
permissions:
|
46 |
+
id-token: write # IMPORTANT: mandatory for trusted publishing
|
47 |
+
|
48 |
+
steps:
|
49 |
+
- name: Download all the dists
|
50 |
+
uses: actions/download-artifact@v3
|
51 |
+
with:
|
52 |
+
name: python-package-distributions
|
53 |
+
path: dist/
|
54 |
+
- name: Publish distribution π¦ to PyPI
|
55 |
+
uses: pypa/gh-action-pypi-publish@release/v1
|
56 |
+
|
57 |
+
github-release:
|
58 |
+
name: >-
|
59 |
+
Sign the Python π distribution π¦ with Sigstore
|
60 |
+
and upload them to GitHub Release
|
61 |
+
needs:
|
62 |
+
- publish-to-pypi
|
63 |
+
runs-on: ubuntu-latest
|
64 |
+
|
65 |
+
permissions:
|
66 |
+
contents: write # IMPORTANT: mandatory for making GitHub Releases
|
67 |
+
id-token: write # IMPORTANT: mandatory for sigstore
|
68 |
+
|
69 |
+
steps:
|
70 |
+
- name: Download all the dists
|
71 |
+
uses: actions/download-artifact@v3
|
72 |
+
with:
|
73 |
+
name: python-package-distributions
|
74 |
+
path: dist/
|
75 |
+
- name: Sign the dists with Sigstore
|
76 |
+
uses: sigstore/[email protected]
|
77 |
+
with:
|
78 |
+
inputs: >-
|
79 |
+
./dist/*.tar.gz
|
80 |
+
./dist/*.whl
|
81 |
+
- name: Create GitHub Release
|
82 |
+
env:
|
83 |
+
GITHUB_TOKEN: ${{ github.token }}
|
84 |
+
run: >-
|
85 |
+
gh release create
|
86 |
+
'${{ github.ref_name }}'
|
87 |
+
--repo '${{ github.repository }}'
|
88 |
+
--notes ""
|
89 |
+
- name: Upload artifact signatures to GitHub Release
|
90 |
+
env:
|
91 |
+
GITHUB_TOKEN: ${{ github.token }}
|
92 |
+
# Upload to GitHub Release using the `gh` CLI.
|
93 |
+
# `dist/` contains the built packages, and the
|
94 |
+
# sigstore-produced signatures and certificates.
|
95 |
+
run: >-
|
96 |
+
gh release upload
|
97 |
+
'${{ github.ref_name }}' dist/**
|
98 |
+
--repo '${{ github.repository }}'
|
99 |
+
|
100 |
+
- name: Cleanup
|
101 |
+
run: rm -rf dist
|
.github/workflows/test_gedi.yml
CHANGED
@@ -31,7 +31,7 @@ jobs:
|
|
31 |
|
32 |
- name: Run test
|
33 |
run:
|
34 |
-
python main.py -
|
35 |
|
36 |
- name: Compare output
|
37 |
run: diff data/validation/test_feat.csv data/test_feat.csv
|
@@ -60,15 +60,23 @@ jobs:
|
|
60 |
|
61 |
- name: Run test
|
62 |
run:
|
63 |
-
python main.py -
|
64 |
|
65 |
- name: Compare output 1
|
66 |
run:
|
67 |
-
diff data/validation/
|
68 |
|
69 |
- name: Compare output 2
|
70 |
run:
|
71 |
-
diff data/validation/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
test_benchmark:
|
74 |
runs-on: ubuntu-latest
|
@@ -90,10 +98,12 @@ jobs:
|
|
90 |
|
91 |
- name: Run test
|
92 |
run:
|
93 |
-
python main.py -
|
94 |
|
95 |
- name: Convert output and validation to same encoding
|
96 |
-
run:
|
|
|
|
|
97 |
|
98 |
- name: Compare output
|
99 |
run: diff data/validation/test_benchmark.csv output/benchmark/test_benchmark.csv
|
@@ -118,7 +128,7 @@ jobs:
|
|
118 |
|
119 |
- name: Run test
|
120 |
run:
|
121 |
-
python main.py -
|
122 |
|
123 |
test_evaluation-plotter:
|
124 |
runs-on: ubuntu-latest
|
@@ -144,7 +154,7 @@ jobs:
|
|
144 |
|
145 |
- name: Run test
|
146 |
run:
|
147 |
-
python main.py -
|
148 |
|
149 |
test_integration:
|
150 |
runs-on: ubuntu-latest
|
@@ -170,7 +180,7 @@ jobs:
|
|
170 |
|
171 |
- name: Run test
|
172 |
run:
|
173 |
-
python main.py -
|
174 |
|
175 |
test_grid_experiments_script:
|
176 |
runs-on: ubuntu-latest
|
@@ -196,10 +206,40 @@ jobs:
|
|
196 |
|
197 |
- name: Run test
|
198 |
run:
|
199 |
-
python execute_grid_experiments.py config_files/
|
200 |
|
201 |
- name: Convert output and validation to same encoding
|
202 |
run: iconv -f UTF-8 -t ASCII output/features/generated/2_bpic_features/2_ense_enseef_feat.csv > data/validation/2_ense_enseef_feat.csv
|
203 |
|
204 |
- name: Compare output
|
205 |
run: diff data/validation/2_ense_enseef_feat.csv output/features/generated/2_bpic_features/2_ense_enseef_feat.csv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
- name: Run test
|
33 |
run:
|
34 |
+
python main.py -a config_files/pipeline_steps/feature_extraction.json
|
35 |
|
36 |
- name: Compare output
|
37 |
run: diff data/validation/test_feat.csv data/test_feat.csv
|
|
|
60 |
|
61 |
- name: Run test
|
62 |
run:
|
63 |
+
python main.py -a config_files/pipeline_steps/generation.json
|
64 |
|
65 |
- name: Compare output 1
|
66 |
run:
|
67 |
+
diff data/validation/genELexperiment1_04_02.json output/features/grid_feat/2_enself_rt20v/genELexperiment1_04_02.json
|
68 |
|
69 |
- name: Compare output 2
|
70 |
run:
|
71 |
+
diff data/validation/genELexperiment2_07_04.json output/features/grid_feat/2_enself_rt20v/genELexperiment2_07_04.json
|
72 |
+
|
73 |
+
- name: Compare output 3
|
74 |
+
run:
|
75 |
+
diff data/validation/genELexperiment3_04_nan.json output/features/grid_feat/2_enself_rt20v/genELexperiment3_04_nan.json
|
76 |
+
|
77 |
+
- name: Compare output 4
|
78 |
+
run:
|
79 |
+
diff data/validation/genELexperiment4_nan_02.json output/features/grid_feat/2_enself_rt20v/genELexperiment4_nan_02.json
|
80 |
|
81 |
test_benchmark:
|
82 |
runs-on: ubuntu-latest
|
|
|
98 |
|
99 |
- name: Run test
|
100 |
run:
|
101 |
+
python main.py -a config_files/pipeline_steps/benchmark.json
|
102 |
|
103 |
- name: Convert output and validation to same encoding
|
104 |
+
run: |
|
105 |
+
iconv -f UTF-8 -t ASCII data/validation/test_benchmark.csv > data/validation/test_benchmark.csv
|
106 |
+
iconv -f UTF-8 -t ASCII output/benchmark/test_benchmark.csv > output/benchmark/test_benchmark.csv
|
107 |
|
108 |
- name: Compare output
|
109 |
run: diff data/validation/test_benchmark.csv output/benchmark/test_benchmark.csv
|
|
|
128 |
|
129 |
- name: Run test
|
130 |
run:
|
131 |
+
python main.py -a config_files/pipeline_steps/augmentation.json
|
132 |
|
133 |
test_evaluation-plotter:
|
134 |
runs-on: ubuntu-latest
|
|
|
154 |
|
155 |
- name: Run test
|
156 |
run:
|
157 |
+
python main.py -a config_files/pipeline_steps/evaluation_plotter.json
|
158 |
|
159 |
test_integration:
|
160 |
runs-on: ubuntu-latest
|
|
|
180 |
|
181 |
- name: Run test
|
182 |
run:
|
183 |
+
python main.py -a config_files/test/experiment_test.json
|
184 |
|
185 |
test_grid_experiments_script:
|
186 |
runs-on: ubuntu-latest
|
|
|
206 |
|
207 |
- name: Run test
|
208 |
run:
|
209 |
+
python gedi/utils/execute_grid_experiments.py config_files/test
|
210 |
|
211 |
- name: Convert output and validation to same encoding
|
212 |
run: iconv -f UTF-8 -t ASCII output/features/generated/2_bpic_features/2_ense_enseef_feat.csv > data/validation/2_ense_enseef_feat.csv
|
213 |
|
214 |
- name: Compare output
|
215 |
run: diff data/validation/2_ense_enseef_feat.csv output/features/generated/2_bpic_features/2_ense_enseef_feat.csv
|
216 |
+
|
217 |
+
test_abbrv:
|
218 |
+
runs-on: ubuntu-latest
|
219 |
+
|
220 |
+
# Setting up a python envronment for the test script to run
|
221 |
+
steps:
|
222 |
+
- name: Checkout code
|
223 |
+
uses: actions/checkout@v4
|
224 |
+
|
225 |
+
- name: Set up Python
|
226 |
+
uses: actions/setup-python@v5
|
227 |
+
with:
|
228 |
+
python-version: 3.9
|
229 |
+
|
230 |
+
- name: Install dependencies
|
231 |
+
run: |
|
232 |
+
sudo apt-get install build-essential python3 python3-dev
|
233 |
+
|
234 |
+
- name: Install feeed
|
235 |
+
run: |
|
236 |
+
python -m pip install --upgrade pip
|
237 |
+
pip install .
|
238 |
+
|
239 |
+
- name: Run test
|
240 |
+
run:
|
241 |
+
python main.py -a config_files/test/test_abbrv_generation.json
|
242 |
+
|
243 |
+
- name: Compare output
|
244 |
+
run:
|
245 |
+
diff data/validation/2_ense_rmcv_feat.csv output/test/igedi_table_1/2_ense_rmcv_feat.csv
|
README.md
CHANGED
@@ -1,15 +1,36 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
## Table of Contents
|
6 |
|
7 |
-
- [
|
|
|
8 |
- [Installation](#installation)
|
9 |
- [General Usage](#general-usage)
|
10 |
- [Experiments](#experiments)
|
11 |
- [Citation](#citation)
|
12 |
|
|
|
|
|
|
|
|
|
13 |
## Requirements
|
14 |
- [Miniconda](https://docs.conda.io/en/latest/miniconda.html)
|
15 |
- Graphviz on your OS e.g.
|
@@ -28,7 +49,7 @@ conda install pyrfr swig
|
|
28 |
### Startup
|
29 |
```console
|
30 |
conda activate gedi
|
31 |
-
python main.py -
|
32 |
```
|
33 |
The last step should take only a few minutes to run.
|
34 |
|
@@ -42,18 +63,18 @@ Our pipeline offers several pipeline steps, which can be run sequentially or par
|
|
42 |
To run different steps of the GEDI pipeline, please adapt the `.json` accordingly.
|
43 |
```console
|
44 |
conda activate gedi
|
45 |
-
python main.py -
|
46 |
```
|
47 |
-
For reference of possible keys and values for each step, please see `config_files/
|
48 |
To run the whole pipeline please create a new `.json` file, specifying all steps you want to run and specify desired keys and values for each step.
|
49 |
-
To reproduce results from
|
50 |
|
51 |
### Feature Extraction
|
52 |
---
|
53 |
To extract the features on the event-log level and use them for hyperparameter optimization, we employ the following script:
|
54 |
```console
|
55 |
conda activate gedi
|
56 |
-
python main.py -
|
57 |
```
|
58 |
The JSON file consists of the following key-value pairs:
|
59 |
|
@@ -64,8 +85,7 @@ The JSON file consists of the following key-value pairs:
|
|
64 |
- real_eventlog_path: defines the file with the features extracted from the real event logs
|
65 |
- plot_type: defines the style of the output plotting (possible values: violinplot, boxplot)
|
66 |
- font_size: label font size of the output plot
|
67 |
-
-
|
68 |
-
|
69 |
|
70 |
### Generation
|
71 |
---
|
@@ -75,7 +95,7 @@ The command to execute the generation step is given by a exemplarily generation.
|
|
75 |
|
76 |
```console
|
77 |
conda activate gedi
|
78 |
-
python main.py -
|
79 |
```
|
80 |
|
81 |
In the `generation.json`, we have the following key-value pairs:
|
@@ -102,12 +122,228 @@ In the `generation.json`, we have the following key-value pairs:
|
|
102 |
|
103 |
- plot_reference_feature: defines the feature, which is used on the x-axis on the output plots, i.e., each feature defined in the 'objectives' of the 'experiment' is plotted against the reference feature being defined in this value
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
### Benchmark
|
106 |
The benchmarking defines the downstream task which is used for evaluating the goodness of the synthesized event log datasets with the metrics of real-world datasets. The command to execute a benchmarking is shown in the following script:
|
107 |
|
108 |
```console
|
109 |
conda activate gedi
|
110 |
-
python main.py -
|
111 |
```
|
112 |
|
113 |
In the `benchmark.json`, we have the following key-value pairs:
|
@@ -125,7 +361,7 @@ The purpose of the evaluation plotting step is used just for visualization. Some
|
|
125 |
|
126 |
```console
|
127 |
conda activate gedi
|
128 |
-
python main.py -
|
129 |
```
|
130 |
|
131 |
Generally, in the `evaluation_plotter.json`, we have the following key-value pairs:
|
@@ -141,26 +377,35 @@ In this repository, experiments can be run selectively or from scratch, as prefe
|
|
141 |
We present two settings for generating intentional event logs, using [real targets](#generating-data-with-real-targets) or using [grid targets](#generating-data-with-grid-targets). Both settings output `.xes` event logs, `.json` and `.csv` files containing feature values, as well as evaluation results, from running a [process discovery benchmark](#benchmark), for the generated event logs.
|
142 |
|
143 |
### Generating data with real targets
|
144 |
-
To execute the experiments with real targets, we employ the [experiment_real_targets.json](config_files/
|
145 |
|
146 |
```console
|
147 |
conda activate gedi
|
148 |
-
python main.py -
|
149 |
```
|
150 |
|
151 |
### Generating data with grid targets
|
152 |
-
To execute the experiments with grid targets, a single [configuration](config_files/
|
153 |
```
|
154 |
conda activate gedi
|
155 |
-
python execute_grid_experiments.py config_files/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
```
|
157 |
-
We employ the [experiment_grid_2obj_configfiles_fabric.ipynb](notebooks/experiment_grid_2obj_configfiles_fabric.ipynb) to create all necessary [configuration](config_files/algorithm/grid_2obj) and [objective](data/grid_2obj) files for this experiment. For more details about these config_files, please refer to [Feature Extraction](#feature-extraction), [Generation](#generation), and [Benchmark](#benchmark).
|
158 |
|
159 |
### Visualizations
|
160 |
To run the visualizations, we employ [jupyter notebooks](https://jupyter.org/install) and [add the installed environment to the jupyter notebook](https://medium.com/@nrk25693/how-to-add-your-conda-environment-to-your-jupyter-notebook-in-just-4-steps-abeab8b8d084). We then start all visualizations by running e.g.: `jupyter noteboook`. In the following, we describe the `.ipynb`-files in the folder `\notebooks` to reproduce the figures from our paper.
|
161 |
|
162 |
#### [Fig. 4 and fig. 5 Representativeness](notebooks/gedi_figs4and5_representativeness.ipynb)
|
163 |
-
To visualize the coverage of the feasible feature space of generated event logs compared to existing real-world benchmark datasets, in this notebook, we conduct a principal component analysis on the features of both settings. The first two principal components are utilized to visualize the coverage which is further highlighted by computing a convex hull of the 2D mapping.
|
164 |
|
165 |
#### [Fig. 6 Benchmark Boxplots](notebooks/gedi_fig6_benchmark_boxplots.ipynb)
|
166 |
This notebook is used to visualize the metric distribution of real event logs compared to the generated ones. It shows 5 different metrics on 3 various process discovery techniques. We use 'fitness,', 'precision', 'fscore', 'size', 'cfc' (control-flow complexity) as metrics and as 'heuristic miner', 'ilp' (integer linear programming), and 'imf' (inductive miner infrequent) as miners. The notebook outputs the visualization shown in Fig.6 in the paper.
|
@@ -169,11 +414,14 @@ This notebook is used to visualize the metric distribution of real event logs co
|
|
169 |
|
170 |
This notebook is used to answer the question if there is a statistically significant relation between feature similarity and performance metrics for the downstream tasks of process discovery. For that, we compute the pearson coefficient, as well as the kendall's tau coefficient. This elucidates the correlation between the features with metric scores being used for process discovery. Each coefficient is calculated for three different settings: i) real-world datasets; ii) synthesized event log data with real-world targets; iii) synthesized event log data with grid objectives. Figures 7 and 8 shown in the paper refer to this notebook.
|
171 |
|
|
|
|
|
|
|
172 |
## Citation
|
173 |
-
The `GEDI` framework is taken directly from the original paper by [Maldonado](mailto:[email protected]), Frey, Tavares, Rehwald and Seidl
|
174 |
|
175 |
-
```
|
176 |
-
@InProceedings{
|
177 |
author="Maldonado, Andrea
|
178 |
and Frey, Christian M. M.
|
179 |
and Tavares, Gabriel Marques
|
@@ -193,3 +441,20 @@ abstract="Process mining solutions include enhancing performance, conserving res
|
|
193 |
isbn="978-3-031-70396-6"
|
194 |
}
|
195 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: iGedi
|
3 |
+
emoji: π
|
4 |
+
colorFrom: indigo
|
5 |
+
colorTo: pink
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.38.0
|
8 |
+
app_file: utils/config_fabric.py
|
9 |
+
pinned: false
|
10 |
+
license: mit
|
11 |
+
---
|
12 |
+
|
13 |
+
<p>
|
14 |
+
<img src="gedi/utils/logo.png" alt="Logo" width="100" align="left" />
|
15 |
+
<h1 style="display: inline;">(i)GEDI</h1>
|
16 |
+
</p>
|
17 |
+
|
18 |
+
(**i**nteractive) **G**enerating **E**vent **D**ata with **I**ntentional Features for Benchmarking Process Mining<br />
|
19 |
+
This repository contains the codebase for the interactive web application tool (iGEDI) as well as for the [GEDI paper](https://mcml.ai/publications/gedi.pdf) accepted at the BPM'24 conference.
|
20 |
|
21 |
## Table of Contents
|
22 |
|
23 |
+
- [Interactive Web Application (iGEDI)](#interactive-web-application)
|
24 |
+
- [Requirements](#requirements)
|
25 |
- [Installation](#installation)
|
26 |
- [General Usage](#general-usage)
|
27 |
- [Experiments](#experiments)
|
28 |
- [Citation](#citation)
|
29 |
|
30 |
+
## Interactive Web Application
|
31 |
+
Our [interactive web application](https://huggingface.co/spaces/andreamalhera/gedi) (iGEDI) guides you through the specification process, runs GEDI for you. You can directly download the resulting generated logs or the configuration file to run GEDI locally.
|
32 |
+

|
33 |
+
|
34 |
## Requirements
|
35 |
- [Miniconda](https://docs.conda.io/en/latest/miniconda.html)
|
36 |
- Graphviz on your OS e.g.
|
|
|
49 |
### Startup
|
50 |
```console
|
51 |
conda activate gedi
|
52 |
+
python main.py -a config_files/test/experiment_test.json
|
53 |
```
|
54 |
The last step should take only a few minutes to run.
|
55 |
|
|
|
63 |
To run different steps of the GEDI pipeline, please adapt the `.json` accordingly.
|
64 |
```console
|
65 |
conda activate gedi
|
66 |
+
python main.py -a config_files/pipeline_steps/<pipeline-step>.json
|
67 |
```
|
68 |
+
For reference of possible keys and values for each step, please see `config_files/test/experiment_test.json`.
|
69 |
To run the whole pipeline please create a new `.json` file, specifying all steps you want to run and specify desired keys and values for each step.
|
70 |
+
To reproduce results from our paper, please refer to [Experiments](#experiments).
|
71 |
|
72 |
### Feature Extraction
|
73 |
---
|
74 |
To extract the features on the event-log level and use them for hyperparameter optimization, we employ the following script:
|
75 |
```console
|
76 |
conda activate gedi
|
77 |
+
python main.py -a config_files/pipeline_steps/feature_extraction.json
|
78 |
```
|
79 |
The JSON file consists of the following key-value pairs:
|
80 |
|
|
|
85 |
- real_eventlog_path: defines the file with the features extracted from the real event logs
|
86 |
- plot_type: defines the style of the output plotting (possible values: violinplot, boxplot)
|
87 |
- font_size: label font size of the output plot
|
88 |
+
- boxplot_width: width of the violinplot/boxplot
|
|
|
89 |
|
90 |
### Generation
|
91 |
---
|
|
|
95 |
|
96 |
```console
|
97 |
conda activate gedi
|
98 |
+
python main.py -a config_files/pipeline_steps/generation.json
|
99 |
```
|
100 |
|
101 |
In the `generation.json`, we have the following key-value pairs:
|
|
|
122 |
|
123 |
- plot_reference_feature: defines the feature, which is used on the x-axis on the output plots, i.e., each feature defined in the 'objectives' of the 'experiment' is plotted against the reference feature being defined in this value
|
124 |
|
125 |
+
In case of manually defining the targets for the features in config space, the following table shows the range of the features in the real-world event log data (BPIC's) for reference:
|
126 |
+
<div style="overflow-x:auto;">
|
127 |
+
<table border="1" class="dataframe">
|
128 |
+
<thead>
|
129 |
+
<tr style="text-align: right;">
|
130 |
+
<th></th>
|
131 |
+
<th>n_traces</th>
|
132 |
+
<th>n_unique_traces</th>
|
133 |
+
<th>ratio_variants_per_number_of_traces</th>
|
134 |
+
<th>trace_len_min</th>
|
135 |
+
<th>trace_len_max</th>
|
136 |
+
<th>trace_len_mean</th>
|
137 |
+
<th>trace_len_median</th>
|
138 |
+
<th>trace_len_mode</th>
|
139 |
+
<th>trace_len_std</th>
|
140 |
+
<th>trace_len_variance</th>
|
141 |
+
<th>trace_len_q1</th>
|
142 |
+
<th>trace_len_q3</th>
|
143 |
+
<th>trace_len_iqr</th>
|
144 |
+
<th>trace_len_geometric_mean</th>
|
145 |
+
<th>trace_len_geometric_std</th>
|
146 |
+
<th>trace_len_harmonic_mean</th>
|
147 |
+
<th>trace_len_skewness</th>
|
148 |
+
<th>trace_len_kurtosis</th>
|
149 |
+
<th>trace_len_coefficient_variation</th>
|
150 |
+
<th>trace_len_entropy</th>
|
151 |
+
<th>trace_len_hist1</th>
|
152 |
+
<th>trace_len_hist2</th>
|
153 |
+
<th>trace_len_hist3</th>
|
154 |
+
<th>trace_len_hist4</th>
|
155 |
+
<th>trace_len_hist5</th>
|
156 |
+
<th>trace_len_hist6</th>
|
157 |
+
<th>trace_len_hist7</th>
|
158 |
+
<th>trace_len_hist8</th>
|
159 |
+
<th>trace_len_hist9</th>
|
160 |
+
<th>trace_len_hist10</th>
|
161 |
+
<th>trace_len_skewness_hist</th>
|
162 |
+
<th>trace_len_kurtosis_hist</th>
|
163 |
+
<th>ratio_most_common_variant</th>
|
164 |
+
<th>ratio_top_1_variants</th>
|
165 |
+
<th>ratio_top_5_variants</th>
|
166 |
+
<th>ratio_top_10_variants</th>
|
167 |
+
<th>ratio_top_20_variants</th>
|
168 |
+
<th>ratio_top_50_variants</th>
|
169 |
+
<th>ratio_top_75_variants</th>
|
170 |
+
<th>mean_variant_occurrence</th>
|
171 |
+
<th>std_variant_occurrence</th>
|
172 |
+
<th>skewness_variant_occurrence</th>
|
173 |
+
<th>kurtosis_variant_occurrence</th>
|
174 |
+
<th>n_unique_activities</th>
|
175 |
+
<th>activities_min</th>
|
176 |
+
<th>activities_max</th>
|
177 |
+
<th>activities_mean</th>
|
178 |
+
<th>activities_median</th>
|
179 |
+
<th>activities_std</th>
|
180 |
+
<th>activities_variance</th>
|
181 |
+
<th>activities_q1</th>
|
182 |
+
<th>activities_q3</th>
|
183 |
+
<th>activities_iqr</th>
|
184 |
+
<th>activities_skewness</th>
|
185 |
+
<th>activities_kurtosis</th>
|
186 |
+
<th>n_unique_start_activities</th>
|
187 |
+
<th>start_activities_min</th>
|
188 |
+
<th>start_activities_max</th>
|
189 |
+
<th>start_activities_mean</th>
|
190 |
+
<th>start_activities_median</th>
|
191 |
+
<th>start_activities_std</th>
|
192 |
+
<th>start_activities_variance</th>
|
193 |
+
<th>start_activities_q1</th>
|
194 |
+
<th>start_activities_q3</th>
|
195 |
+
<th>start_activities_iqr</th>
|
196 |
+
<th>start_activities_skewness</th>
|
197 |
+
<th>start_activities_kurtosis</th>
|
198 |
+
<th>n_unique_end_activities</th>
|
199 |
+
<th>end_activities_min</th>
|
200 |
+
<th>end_activities_max</th>
|
201 |
+
<th>end_activities_mean</th>
|
202 |
+
<th>end_activities_median</th>
|
203 |
+
<th>end_activities_std</th>
|
204 |
+
<th>end_activities_variance</th>
|
205 |
+
<th>end_activities_q1</th>
|
206 |
+
<th>end_activities_q3</th>
|
207 |
+
<th>end_activities_iqr</th>
|
208 |
+
<th>end_activities_skewness</th>
|
209 |
+
<th>end_activities_kurtosis</th>
|
210 |
+
<th>eventropy_trace</th>
|
211 |
+
<th>eventropy_prefix</th>
|
212 |
+
<th>eventropy_global_block</th>
|
213 |
+
<th>eventropy_lempel_ziv</th>
|
214 |
+
<th>eventropy_k_block_diff_1</th>
|
215 |
+
<th>eventropy_k_block_diff_3</th>
|
216 |
+
<th>eventropy_k_block_diff_5</th>
|
217 |
+
<th>eventropy_k_block_ratio_1</th>
|
218 |
+
<th>eventropy_k_block_ratio_3</th>
|
219 |
+
<th>eventropy_k_block_ratio_5</th>
|
220 |
+
<th>eventropy_knn_3</th>
|
221 |
+
<th>eventropy_knn_5</th>
|
222 |
+
<th>eventropy_knn_7</th>
|
223 |
+
<th>epa_variant_entropy</th>
|
224 |
+
<th>epa_normalized_variant_entropy</th>
|
225 |
+
<th>epa_sequence_entropy</th>
|
226 |
+
<th>epa_normalized_sequence_entropy</th>
|
227 |
+
<th>epa_sequence_entropy_linear_forgetting</th>
|
228 |
+
<th>epa_normalized_sequence_entropy_linear_forgetting</th>
|
229 |
+
<th>epa_sequence_entropy_exponential_forgetting</th>
|
230 |
+
<th>epa_normalized_sequence_entropy_exponential_forgetting</th>
|
231 |
+
</tr>
|
232 |
+
</thead>
|
233 |
+
<tbody>
|
234 |
+
<tr>
|
235 |
+
<td>[ min, max ]</td>
|
236 |
+
<td>[ 226.0, 251734.0 ]</td>
|
237 |
+
<td>[ 6.0, 28457.0 ]</td>
|
238 |
+
<td>[ 0.0, 1.0 ]</td>
|
239 |
+
<td>[ 1.0, 24.0 ]</td>
|
240 |
+
<td>[ 1.0, 2973.0 ]</td>
|
241 |
+
<td>[ 1.0, 131.49 ]</td>
|
242 |
+
<td>[ 1.0, 55.0 ]</td>
|
243 |
+
<td>[ 1.0, 61.0 ]</td>
|
244 |
+
<td>[ 0.0, 202.53 ]</td>
|
245 |
+
<td>[ 0.0, 41017.89 ]</td>
|
246 |
+
<td>[ 1.0, 44.0 ]</td>
|
247 |
+
<td>[ 1.0, 169.0 ]</td>
|
248 |
+
<td>[ 0.0, 161.0 ]</td>
|
249 |
+
<td>[ 1.0, 53.78 ]</td>
|
250 |
+
<td>[ 1.0, 5.65 ]</td>
|
251 |
+
<td>[ 1.0, 51.65 ]</td>
|
252 |
+
<td>[ -0.58, 111.97 ]</td>
|
253 |
+
<td>[ -0.97, 14006.75 ]</td>
|
254 |
+
<td>[ 0.0, 4.74 ]</td>
|
255 |
+
<td>[ 5.33, 12.04 ]</td>
|
256 |
+
<td>[ 0.0, 1.99 ]</td>
|
257 |
+
<td>[ 0.0, 0.42 ]</td>
|
258 |
+
<td>[ 0.0, 0.4 ]</td>
|
259 |
+
<td>[ 0.0, 0.19 ]</td>
|
260 |
+
<td>[ 0.0, 0.14 ]</td>
|
261 |
+
<td>[ 0.0, 10.0 ]</td>
|
262 |
+
<td>[ 0.0, 0.02 ]</td>
|
263 |
+
<td>[ 0.0, 0.04 ]</td>
|
264 |
+
<td>[ 0.0, 0.0 ]</td>
|
265 |
+
<td>[ 0.0, 2.7 ]</td>
|
266 |
+
<td>[ -0.58, 111.97 ]</td>
|
267 |
+
<td>[ -0.97, 14006.75 ]</td>
|
268 |
+
<td>[ 0.0, 0.79 ]</td>
|
269 |
+
<td>[ 0.0, 0.87 ]</td>
|
270 |
+
<td>[ 0.0, 0.98 ]</td>
|
271 |
+
<td>[ 0.0, 0.99 ]</td>
|
272 |
+
<td>[ 0.2, 1.0 ]</td>
|
273 |
+
<td>[ 0.5, 1.0 ]</td>
|
274 |
+
<td>[ 0.75, 1.0 ]</td>
|
275 |
+
<td>[ 1.0, 24500.67 ]</td>
|
276 |
+
<td>[ 0.04, 42344.04 ]</td>
|
277 |
+
<td>[ 1.54, 64.77 ]</td>
|
278 |
+
<td>[ 0.66, 5083.46 ]</td>
|
279 |
+
<td>[ 1.0, 1152.0 ]</td>
|
280 |
+
<td>[ 1.0, 66058.0 ]</td>
|
281 |
+
<td>[ 34.0, 466141.0 ]</td>
|
282 |
+
<td>[ 4.13, 66058.0 ]</td>
|
283 |
+
<td>[ 2.0, 66058.0 ]</td>
|
284 |
+
<td>[ 0.0, 120522.25 ]</td>
|
285 |
+
<td>[ 0.0, 14525612122.34 ]</td>
|
286 |
+
<td>[ 1.0, 66058.0 ]</td>
|
287 |
+
<td>[ 4.0, 79860.0 ]</td>
|
288 |
+
<td>[ 0.0, 77290.0 ]</td>
|
289 |
+
<td>[ -0.06, 15.21 ]</td>
|
290 |
+
<td>[ -1.5, 315.84 ]</td>
|
291 |
+
<td>[ 1.0, 809.0 ]</td>
|
292 |
+
<td>[ 1.0, 150370.0 ]</td>
|
293 |
+
<td>[ 27.0, 199867.0 ]</td>
|
294 |
+
<td>[ 3.7, 150370.0 ]</td>
|
295 |
+
<td>[ 1.0, 150370.0 ]</td>
|
296 |
+
<td>[ 0.0, 65387.49 ]</td>
|
297 |
+
<td>[ 0.0, 4275524278.19 ]</td>
|
298 |
+
<td>[ 1.0, 150370.0 ]</td>
|
299 |
+
<td>[ 4.0, 150370.0 ]</td>
|
300 |
+
<td>[ 0.0, 23387.25 ]</td>
|
301 |
+
<td>[ 0.0, 9.3 ]</td>
|
302 |
+
<td>[ -2.0, 101.82 ]</td>
|
303 |
+
<td>[ 1.0, 757.0 ]</td>
|
304 |
+
<td>[ 1.0, 16653.0 ]</td>
|
305 |
+
<td>[ 28.0, 181328.0 ]</td>
|
306 |
+
<td>[ 3.53, 24500.67 ]</td>
|
307 |
+
<td>[ 1.0, 16653.0 ]</td>
|
308 |
+
<td>[ 0.0, 42344.04 ]</td>
|
309 |
+
<td>[ 0.0, 1793017566.89 ]</td>
|
310 |
+
<td>[ 1.0, 16653.0 ]</td>
|
311 |
+
<td>[ 3.0, 39876.0 ]</td>
|
312 |
+
<td>[ 0.0, 39766.0 ]</td>
|
313 |
+
<td>[ -0.7, 13.82 ]</td>
|
314 |
+
<td>[ -2.0, 255.39 ]</td>
|
315 |
+
<td>[ 0.0, 13.36 ]</td>
|
316 |
+
<td>[ 0.0, 16.77 ]</td>
|
317 |
+
<td>[ 0.0, 24.71 ]</td>
|
318 |
+
<td>[ 0.0, 685.0 ]</td>
|
319 |
+
<td>[ -328.0, 962.0 ]</td>
|
320 |
+
<td>[ 0.0, 871.0 ]</td>
|
321 |
+
<td>[ 0.0, 881.0 ]</td>
|
322 |
+
<td>[ 0.0, 935.0 ]</td>
|
323 |
+
<td>[ 0.0, 7.11 ]</td>
|
324 |
+
<td>[ 0.0, 7.11 ]</td>
|
325 |
+
<td>[ 0.0, 8.93 ]</td>
|
326 |
+
<td>[ 0.0, 648.0 ]</td>
|
327 |
+
<td>[ 0.0, 618.0 ]</td>
|
328 |
+
<td>[ 0.0, 11563842.15 ]</td>
|
329 |
+
<td>[ 0.0, 0.9 ]</td>
|
330 |
+
<td>[ 0.0, 21146257.12 ]</td>
|
331 |
+
<td>[ 0.0, 0.76 ]</td>
|
332 |
+
<td>[ 0.0, 14140225.9 ]</td>
|
333 |
+
<td>[ 0.0, 0.42 ]</td>
|
334 |
+
<td>[ 0.0, 15576076.83 ]</td>
|
335 |
+
<td>[ 0.0, 0.51 ]</td>
|
336 |
+
</tr>
|
337 |
+
</tbody>
|
338 |
+
</table>
|
339 |
+
</div>
|
340 |
+
|
341 |
### Benchmark
|
342 |
The benchmarking defines the downstream task which is used for evaluating the goodness of the synthesized event log datasets with the metrics of real-world datasets. The command to execute a benchmarking is shown in the following script:
|
343 |
|
344 |
```console
|
345 |
conda activate gedi
|
346 |
+
python main.py -a config_files/pipeline_steps/benchmark.json
|
347 |
```
|
348 |
|
349 |
In the `benchmark.json`, we have the following key-value pairs:
|
|
|
361 |
|
362 |
```console
|
363 |
conda activate gedi
|
364 |
+
python main.py -a config_files/pipeline_steps/evaluation_plotter.json
|
365 |
```
|
366 |
|
367 |
Generally, in the `evaluation_plotter.json`, we have the following key-value pairs:
|
|
|
377 |
We present two settings for generating intentional event logs, using [real targets](#generating-data-with-real-targets) or using [grid targets](#generating-data-with-grid-targets). Both settings output `.xes` event logs, `.json` and `.csv` files containing feature values, as well as evaluation results, from running a [process discovery benchmark](#benchmark), for the generated event logs.
|
378 |
|
379 |
### Generating data with real targets
|
380 |
+
To execute the experiments with real targets, we employ the [experiment_real_targets.json](config_files/experiment_real_targets.json). The script's pipeline will output the [generated event logs (GenBaselineED)](data/event_logs/GenBaselineED), which optimize their feature values towards [real-world event data features](data/BaselineED_feat.csv), alongside their respectively measured [feature values](data/GenBaselineED_feat.csv) and [benchmark metrics values](data/GenBaselineED_bench.csv).
|
381 |
|
382 |
```console
|
383 |
conda activate gedi
|
384 |
+
python main.py -a config_files/experiment_real_targets.json
|
385 |
```
|
386 |
|
387 |
### Generating data with grid targets
|
388 |
+
To execute the experiments with grid targets, a single [configuration](config_files/grid_2obj) can be selected or all [grid objectives](data/grid_2obj) can be run with one command using the following script. This script will output the [generated event logs (GenED)](data/event_logs/GenED), alongside their respectively measured [feature values](data/GenED_feat.csv) and [benchmark metrics values](data/GenED_bench.csv).
|
389 |
```
|
390 |
conda activate gedi
|
391 |
+
python gedi/utils/execute_grid_experiments.py config_files/test
|
392 |
+
```
|
393 |
+
We employ the [experiment_grid_2obj_configfiles_fabric.ipynb](notebooks/experiment_grid_2obj_configfiles_fabric.ipynb) to create all necessary [configuration](config_files/grid_2obj) and [objective](data/grid_2obj) files for this experiment.
|
394 |
+
For more details about these config_files, please refer to [Feature Extraction](#feature-extraction), [Generation](#generation), and [Benchmark](#benchmark).
|
395 |
+
To create configuration files for grid objectives interactively, you can use the start the following dashboard:
|
396 |
+
```
|
397 |
+
streamlit run utils/config_fabric.py # To tunnel to local machine add: --server.port 8501 --server.headless true
|
398 |
+
|
399 |
+
# In local machine (only in case you are tunneling):
|
400 |
+
ssh -N -f -L 9000:localhost:8501 <user@remote_machine.com>
|
401 |
+
open "http://localhost:9000/"
|
402 |
```
|
|
|
403 |
|
404 |
### Visualizations
|
405 |
To run the visualizations, we employ [jupyter notebooks](https://jupyter.org/install) and [add the installed environment to the jupyter notebook](https://medium.com/@nrk25693/how-to-add-your-conda-environment-to-your-jupyter-notebook-in-just-4-steps-abeab8b8d084). We then start all visualizations by running e.g.: `jupyter noteboook`. In the following, we describe the `.ipynb`-files in the folder `\notebooks` to reproduce the figures from our paper.
|
406 |
|
407 |
#### [Fig. 4 and fig. 5 Representativeness](notebooks/gedi_figs4and5_representativeness.ipynb)
|
408 |
+
To visualize the coverage of the feasible feature space of generated event logs compared to existing real-world benchmark datasets, in this notebook, we conduct a principal component analysis on the features of both settings. The first two principal components are utilized to visualize the coverage which is further highlighted by computing a convex hull of the 2D mapping.Additionally, we visualize the distribution of each meta feature we used in the paper as a boxplot. Additional features can be extracted with FEEED. Therefore, the notebook contains the figures 4 and 5 in the paper.
|
409 |
|
410 |
#### [Fig. 6 Benchmark Boxplots](notebooks/gedi_fig6_benchmark_boxplots.ipynb)
|
411 |
This notebook is used to visualize the metric distribution of real event logs compared to the generated ones. It shows 5 different metrics on 3 various process discovery techniques. We use 'fitness,', 'precision', 'fscore', 'size', 'cfc' (control-flow complexity) as metrics and as 'heuristic miner', 'ilp' (integer linear programming), and 'imf' (inductive miner infrequent) as miners. The notebook outputs the visualization shown in Fig.6 in the paper.
|
|
|
414 |
|
415 |
This notebook is used to answer the question if there is a statistically significant relation between feature similarity and performance metrics for the downstream tasks of process discovery. For that, we compute the pearson coefficient, as well as the kendall's tau coefficient. This elucidates the correlation between the features with metric scores being used for process discovery. Each coefficient is calculated for three different settings: i) real-world datasets; ii) synthesized event log data with real-world targets; iii) synthesized event log data with grid objectives. Figures 7 and 8 shown in the paper refer to this notebook.
|
416 |
|
417 |
+
#### [Fig. 9 Consistency and fig. 10 Limitations](notebooks/gedi_figs9and10_consistency.ipynb)
|
418 |
+
Likewise to the evaluation on the statistical tests in notebook `gedi_figs7and8_benchmarking_statisticalTests.ipynb`, this notebook is used to compute the differences between two correlation matrices $\Delta C = C_1 - C_2$. This logic is employed to evaluate and visualize the distance of two correlation matrices. Furthermore, we show how significant scores are retained from the correlations being evaluated on real-world datasets coompared to synthesized event log datasets with real-world targets. In Fig. 9 and 10 in the paper, the results of the notebook are shown.
|
419 |
+
|
420 |
## Citation
|
421 |
+
The `GEDI` framework is taken directly from the original paper by [Maldonado](mailto:[email protected]), Frey, Tavares, Rehwald and Seidl on BPM'24.
|
422 |
|
423 |
+
```
|
424 |
+
@InProceedings{maldonado2024gedi,
|
425 |
author="Maldonado, Andrea
|
426 |
and Frey, Christian M. M.
|
427 |
and Tavares, Gabriel Marques
|
|
|
441 |
isbn="978-3-031-70396-6"
|
442 |
}
|
443 |
```
|
444 |
+
|
445 |
+
Furthermore, the `iGEDI` web application is taken directly from the original paper by [Maldonado](mailto:[email protected]), Aryasomayajula, Frey, and Seidl and is *to appear on Demos@ICPM'24*.
|
446 |
+
```
|
447 |
+
@inproceedings{maldonado2024igedi,
|
448 |
+
author = {Andrea Maldonado and
|
449 |
+
Sai Anirudh Aryasomayajula and
|
450 |
+
Christian M. M. Frey and
|
451 |
+
Thomas Seidl},
|
452 |
+
editor = {Jochen De Weerdt, Giovanni Meroni, Han van der Aa, and Karolin Winter},
|
453 |
+
title = {iGEDI: interactive Generating Event Data with Intentional Features},
|
454 |
+
booktitle = {ICPM 2024 Tool Demonstration Track, October 14-18, 2024, Kongens Lyngby, Denmark},
|
455 |
+
series = {{CEUR} Workshop Proceedings},
|
456 |
+
publisher = {CEUR-WS.org},
|
457 |
+
year = {2024},
|
458 |
+
bibsource = {dblp computer science bibliography, https://dblp.org}
|
459 |
+
}
|
460 |
+
```
|
config.py
CHANGED
@@ -1,10 +1,8 @@
|
|
1 |
import json
|
2 |
-
import os
|
3 |
import warnings
|
4 |
|
5 |
-
from
|
6 |
-
from
|
7 |
-
from utils.param_keys import INPUT_NAME, FILENAME, FOLDER_PATH, PARAMS
|
8 |
|
9 |
def get_model_params_list(alg_json_file: str) :#-> list[dict]:
|
10 |
"""
|
@@ -20,69 +18,8 @@ def get_model_params_list(alg_json_file: str) :#-> list[dict]:
|
|
20 |
warnings.warn('The default model parameter list is used instead of a .json-file.\n'
|
21 |
' Use a configuration from the `config_files`-folder together with the args `-a`.')
|
22 |
return [
|
23 |
-
{
|
|
|
|
|
|
|
24 |
]
|
25 |
-
def get_run_params(alg_params_json: str) -> dict:
|
26 |
-
"""
|
27 |
-
Loads the running configuration given from a json file or the default dictionary from the code.
|
28 |
-
@param alg_params_json: str
|
29 |
-
Path to the json data with the running configuration
|
30 |
-
@return: dict
|
31 |
-
Running Configuration
|
32 |
-
"""
|
33 |
-
if alg_params_json is not None:
|
34 |
-
return json.load(open(alg_params_json))
|
35 |
-
else:
|
36 |
-
warnings.warn('The default run option is used instead of a .json-file.\n'
|
37 |
-
' Use a configuration from the `config_files`-folder together with the args `-o`.')
|
38 |
-
return {
|
39 |
-
RUN_OPTION: COMPARE,
|
40 |
-
PLOT_TYPE: COLOR_MAP, # 'heat_map', 'color_map', '3d_map', 'explained_var_plot'
|
41 |
-
PLOT_TICS: True,
|
42 |
-
N_COMPONENTS: 2,
|
43 |
-
INPUT_NAME: 'runningExample',
|
44 |
-
SAVE_RESULTS: True,
|
45 |
-
LOAD_RESULTS: True
|
46 |
-
}
|
47 |
-
|
48 |
-
def get_files_and_kwargs(params: dict):
|
49 |
-
"""
|
50 |
-
This method returns the filename list of the trajectory and generates the kwargs for the DataTrajectory.
|
51 |
-
The method is individually created for the available data set.
|
52 |
-
Add new trajectory options, if different data set are used.
|
53 |
-
@param params: dict
|
54 |
-
running configuration
|
55 |
-
@return: tuple
|
56 |
-
list of filenames of the trajectories AND
|
57 |
-
kwargs with the important arguments for the classes
|
58 |
-
"""
|
59 |
-
try:
|
60 |
-
input_name = params[INPUT_NAME]
|
61 |
-
except KeyError as e:
|
62 |
-
raise KeyError(f'Run option parameter is missing the key: `{e}`. This parameter is mandatory.')
|
63 |
-
|
64 |
-
#TODO: generate parent directories if they don't exist
|
65 |
-
if input_name == 'test':
|
66 |
-
filename_list = list(tqdm(sort_files(os.listdir('data/test'))))
|
67 |
-
kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/test'}
|
68 |
-
elif input_name == 'realLogs':
|
69 |
-
filename_list = list(tqdm(sort_files(os.listdir('data/real_event_logs'))))
|
70 |
-
kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/real_event_logs'}
|
71 |
-
elif input_name == 'gen5':
|
72 |
-
filename_list = list(tqdm(sort_files(os.listdir('data/event_log'))))[:5]
|
73 |
-
kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/event_log'}
|
74 |
-
elif input_name == 'gen20':
|
75 |
-
filename_list = list(tqdm(sort_files(os.listdir('data/event_log'))))[:20]
|
76 |
-
kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/event_log'}
|
77 |
-
elif input_name == 'runningExample':
|
78 |
-
filename_list = ['running-example.xes']
|
79 |
-
kwargs = {FILENAME: filename_list[0], FOLDER_PATH: 'data/'}
|
80 |
-
elif input_name == 'metaFeatures':
|
81 |
-
filename_list = ['log_features.csv']
|
82 |
-
kwargs = {FILENAME: filename_list[0], FOLDER_PATH: 'results/'}
|
83 |
-
else:
|
84 |
-
raise ValueError(f'No data trajectory was found with the name `{input_name}`.')
|
85 |
-
|
86 |
-
#filename_list.pop(file_element)
|
87 |
-
kwargs[PARAMS] = params
|
88 |
-
return filename_list, kwargs
|
|
|
1 |
import json
|
|
|
2 |
import warnings
|
3 |
|
4 |
+
from utils.param_keys import PIPELINE_STEP, INPUT_PATH, OUTPUT_PATH
|
5 |
+
from utils.param_keys.features import FEATURE_SET, FEATURE_PARAMS
|
|
|
6 |
|
7 |
def get_model_params_list(alg_json_file: str) :#-> list[dict]:
|
8 |
"""
|
|
|
18 |
warnings.warn('The default model parameter list is used instead of a .json-file.\n'
|
19 |
' Use a configuration from the `config_files`-folder together with the args `-a`.')
|
20 |
return [
|
21 |
+
{PIPELINE_STEP: 'feature_extraction', INPUT_PATH: 'data/test',
|
22 |
+
FEATURE_PARAMS: {FEATURE_SET: ['ratio_unique_traces_per_trace',
|
23 |
+
'ratio_most_common_variant']},
|
24 |
+
OUTPUT_PATH: 'output/plots'}
|
25 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config_files/config_layout.json
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"pipeline_step": "instance_augmentation",
|
4 |
+
"augmentation_params":{"method":"SMOTE", "no_samples":2,
|
5 |
+
"feature_selection": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
|
6 |
+
"input_path": "data/test/bpic_features.csv",
|
7 |
+
"output_path": "output"
|
8 |
+
},
|
9 |
+
{
|
10 |
+
"pipeline_step": "event_logs_generation",
|
11 |
+
"output_path": "output/features/2_bpic_features/2_ense_rmcv_feat.csv",
|
12 |
+
"output_path": "data/frontend/test",
|
13 |
+
"generator_params": {
|
14 |
+
"experiment": "data/grid_objectives.csv",
|
15 |
+
"experiment": {"input_path": "data/2_bpic_features.csv",
|
16 |
+
"objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
|
17 |
+
"experiment": {"n_traces":832, "n_unique_traces":828, "ratio_variants_per_number_of_traces":0.99, "trace_len_min":1, "trace_len_max":132, "trace_len_mean":53.31, "trace_len_median":54, "trace_len_mode":61, "trace_len_std":19.89, "trace_len_variance":395.81, "trace_len_q1":44, "trace_len_q3":62, "trace_len_iqr":18, "trace_len_geometric_mean":48.15, "trace_len_geometric_std":1.69, "trace_len_harmonic_mean":37.58, "trace_len_skewness":0.0541, "trace_len_kurtosis":0.81, "trace_len_coefficient_variation":0.37, "trace_len_entropy":6.65, "trace_len_hist1":0.004, "trace_len_hist2":0.005, "trace_len_hist3":0.005, "trace_len_hist4":0.024, "trace_len_hist5":0.024, "trace_len_hist6":0.008, "trace_len_hist7":0.005, "trace_len_hist8":0.001, "trace_len_hist9":0.0, "trace_len_hist10":0.00, "trace_len_skewness_hist":0.05, "trace_len_kurtosis_hist":0.8, "ratio_most_common_variant":0.0, "ratio_top_1_variants":0.01, "ratio_top_5_variants":0.05, "ratio_top_10_variants":0.10, "ratio_top_20_variants":0.2, "ratio_top_50_variants":0.5, "ratio_top_75_variants":0.75, "mean_variant_occurrence":1.0, "std_variant_occurrence":0.07, "skewness_variant_occurrence":14.28, "kurtosis_variant_occurrence":202.00, "n_unique_activities":410, "activities_min":1, "activities_max":830, "activities_mean":108.18, "activities_median":12, "activities_std":187.59, "activities_variance":35189, "activities_q1":3, "activities_q3":125, "activities_iqr":122, "activities_skewness":2.13, "activities_kurtosis":3.81, "n_unique_start_activities":14, "start_activities_min":1, "start_activities_max":731, "start_activities_mean":59.43, "start_activities_median":1, "start_activities_std":186.72, "start_activities_variance":34863, "start_activities_q1":1, "start_activities_q3":8, "start_activities_iqr":7, "start_activities_skewness":3, "start_activities_kurtosis":9.0, "n_unique_end_activities":82, "end_activities_min":1, "end_activities_max":216, "end_activities_mean":10, "end_activities_median":1, "end_activities_std":35, "end_activities_variance":1247, "end_activities_q1":1, "end_activities_q3":3, "end_activities_iqr":2, "end_activities_skewness":5, "end_activities_kurtosis":26, "eventropy_trace":10, "eventropy_prefix":15, "eventropy_global_block":19, "eventropy_lempel_ziv":4, "eventropy_k_block_diff_1":7.1, "eventropy_k_block_diff_3":7.1, "eventropy_k_block_diff_5":7.1, "eventropy_k_block_ratio_1":7.1, "eventropy_k_block_ratio_3":7.1, "eventropy_k_block_ratio_5":7.1, "eventropy_knn_3":5.54, "eventropy_knn_5":5.04, "eventropy_knn_7":4.72, "epa_variant_entropy":240512, "epa_normalized_variant_entropy":0.68, "epa_sequence_entropy":285876, "epa_normalized_sequence_entropy":0.60, "epa_sequence_entropy_linear_forgetting":150546, "epa_normalized_sequence_entropy_linear_forgetting":0.32, "epa_sequence_entropy_exponential_forgetting":185312, "epa_normalized_sequence_entropy_exponential_forgetting":0.39},
|
18 |
+
"config_space": {
|
19 |
+
"mode": [5, 20],
|
20 |
+
"sequence": [0.01, 1],
|
21 |
+
"choice": [0.01, 1],
|
22 |
+
"parallel": [0.01, 1],
|
23 |
+
"loop": [0.01, 1],
|
24 |
+
"silent": [0.01, 1],
|
25 |
+
"lt_dependency": [0.01, 1],
|
26 |
+
"num_traces": [10, 100],
|
27 |
+
"duplicate": [0],
|
28 |
+
"or": [0]
|
29 |
+
},
|
30 |
+
"n_trials": 50
|
31 |
+
}
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"pipeline_step": "feature_extraction",
|
35 |
+
"input_path": "data/test",
|
36 |
+
"feature_params": {"feature_set": ["n_traces", "n_unique_traces", "ratio_unique_traces_per_trace", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "eventropy_trace", "eventropy_prefix", "eventropy_prefix_flattened", "eventropy_global_block", "eventropy_global_block_flattened", "eventropy_lempel_ziv", "eventropy_lempel_ziv_flattened", "eventropy_k_block_diff_1", "eventropy_k_block_diff_3", "eventropy_k_block_diff_5", "eventropy_k_block_ratio_1", "eventropy_k_block_ratio_3", "eventropy_k_block_ratio_5", "eventropy_knn_3", "eventropy_knn_5", "eventropy_knn_7", "epa_variant_entropy", "epa_normalized_variant_entropy", "epa_sequence_entropy", "epa_normalized_sequence_entropy", "epa_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_linear_forgetting", "epa_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]},
|
37 |
+
"output_path": "output/plots",
|
38 |
+
"real_eventlog_path": "data/BaselineED_feat.csv",
|
39 |
+
"plot_type": "boxplot"
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"pipeline_step": "benchmark_test",
|
43 |
+
"benchmark_task": "discovery",
|
44 |
+
"input_path":"data/test",
|
45 |
+
"output_path":"output",
|
46 |
+
"miners" : ["inductive", "heu", "imf", "ilp"]
|
47 |
+
}
|
48 |
+
]
|
config_files/{algorithm/experiment_real_targets.json β experiment_real_targets.json}
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_ense_enseef.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_ense_enself.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_ense_enve.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_ense_rmcv.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_ense_rt10v.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_ense_rvpnot.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enseef_enself.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enseef_enve.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enseef_rmcv.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enseef_rt10v.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enseef_rvpnot.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enself_enve.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enself_rmcv.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enself_rt10v.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enself_rvpnot.json
RENAMED
File without changes
|
config_files/grid_2obj/generator_grid_2objectives_enve_mvo.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/shaining/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enve_mvo.csv", "objectives": ["epa_normalized_variant_entropy", "mean_variant_occurrence"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/shaining/grid_2obj/grid_2objectives_enve_mvo/2_enve_mvo", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/shaining/grid_2obj/grid_2objectives_enve_mvo/2_enve_mvo", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enve_rmcv.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enve_rt10v.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_enve_rvpnot.json
RENAMED
File without changes
|
config_files/grid_2obj/generator_grid_2objectives_enve_sam.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/shaining/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enve_sam.csv", "objectives": ["epa_normalized_variant_entropy", "start_activities_median"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/shaining/grid_2obj/grid_2objectives_enve_sam/2_enve_sam", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/shaining/grid_2obj/grid_2objectives_enve_sam/2_enve_sam", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/grid_2obj/generator_grid_2objectives_mvo_sam.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation", "output_path": "output/shaining/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_mvo_sam.csv", "objectives": ["mean_variant_occurrence", "start_activities_median"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/shaining/grid_2obj/grid_2objectives_mvo_sam/2_mvo_sam", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/shaining/grid_2obj/grid_2objectives_mvo_sam/2_mvo_sam", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_rmcv_rt10v.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_rmcv_rvpnot.json
RENAMED
File without changes
|
config_files/{algorithm/grid_2obj β grid_2obj}/generator_grid_2objectives_rt10v_rvpnot.json
RENAMED
File without changes
|
config_files/options/baseline.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"run_option": "baseline",
|
3 |
-
"plot_type": "color_map",
|
4 |
-
"plot_tics": true,
|
5 |
-
"n_components": 2,
|
6 |
-
"input_name": "test",
|
7 |
-
"save_results": false,
|
8 |
-
"load_results": false
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config_files/options/run_params.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"run_option": "compare",
|
3 |
-
"plot_type": "color_map",
|
4 |
-
"plot_tics": true,
|
5 |
-
"n_components": 2,
|
6 |
-
"input_name": "gen20",
|
7 |
-
"save_results": false,
|
8 |
-
"load_results": true
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config_files/{algorithm/pipeline_steps β pipeline_steps}/augmentation.json
RENAMED
File without changes
|
config_files/{algorithm/pipeline_steps β pipeline_steps}/benchmark.json
RENAMED
@@ -4,6 +4,6 @@
|
|
4 |
"benchmark_test": "discovery",
|
5 |
"input_path":"data/test",
|
6 |
"output_path":"output",
|
7 |
-
"miners" : ["
|
8 |
}
|
9 |
]
|
|
|
4 |
"benchmark_test": "discovery",
|
5 |
"input_path":"data/test",
|
6 |
"output_path":"output",
|
7 |
+
"miners" : ["ind", "heu", "imf", "ilp"]
|
8 |
}
|
9 |
]
|
config_files/{algorithm/pipeline_steps β pipeline_steps}/evaluation_plotter.json
RENAMED
File without changes
|
config_files/{algorithm/pipeline_steps β pipeline_steps}/feature_extraction.json
RENAMED
File without changes
|
config_files/{algorithm/pipeline_steps β pipeline_steps}/generation.json
RENAMED
File without changes
|
config_files/{algorithm β test}/experiment_test.json
RENAMED
File without changes
|
config_files/{algorithm/test β test}/generator_2bpic_2objectives_ense_enseef.json
RENAMED
File without changes
|
config_files/{algorithm/test β test}/generator_grid_1objectives_rt10v.json
RENAMED
File without changes
|
config_files/{algorithm/test β test}/generator_grid_2objectives_ense_enself.json
RENAMED
File without changes
|
config_files/test/test_abbrv_generation.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[{"pipeline_step": "event_logs_generation",
|
2 |
+
"output_path": "output/test",
|
3 |
+
"generator_params": {"experiment":
|
4 |
+
{"input_path": "data/test/igedi_table_1.csv",
|
5 |
+
"objectives": ["rmcv","ense"]},
|
6 |
+
"config_space": {"mode": [5, 20], "sequence": [0.01, 1],
|
7 |
+
"choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1],
|
8 |
+
"silent": [0.01, 1], "lt_dependency": [0.01, 1],
|
9 |
+
"num_traces": [10, 10001], "duplicate": [0],
|
10 |
+
"or": [0]}, "n_trials": 2}},
|
11 |
+
{"pipeline_step": "feature_extraction",
|
12 |
+
"input_path": "output/test/igedi_table_1/2_ense_rmcv",
|
13 |
+
"feature_params": {"feature_set": ["simple_stats", "trace_length", "trace_variant",
|
14 |
+
"activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
|
15 |
+
"output_path": "output/plots", "real_eventlog_path": "data/test/2_bpic_features.csv",
|
16 |
+
"plot_type": "boxplot"}]
|
data/test/grid_experiments/rt10v.csv
DELETED
@@ -1,12 +0,0 @@
|
|
1 |
-
task,ratio_top_10_variants
|
2 |
-
task_1,0.0
|
3 |
-
task_2,0.1
|
4 |
-
task_3,0.2
|
5 |
-
task_4,0.3
|
6 |
-
task_5,0.4
|
7 |
-
task_6,0.5
|
8 |
-
task_7,0.6
|
9 |
-
task_8,0.7
|
10 |
-
task_9,0.8
|
11 |
-
task_10,0.9
|
12 |
-
task_11,1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
data/test/grid_feat.csv
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
log,ratio_top_20_variants,epa_normalized_sequence_entropy_linear_forgetting
|
2 |
experiment1,0.2,0.4
|
3 |
experiment2,0.4,0.7
|
|
|
|
|
|
1 |
log,ratio_top_20_variants,epa_normalized_sequence_entropy_linear_forgetting
|
2 |
experiment1,0.2,0.4
|
3 |
experiment2,0.4,0.7
|
4 |
+
experiment3,NaN,0.4
|
5 |
+
experiment4,0.2,NaN
|
data/test/igedi_table_1.csv
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
log,rmcv,ense
|
2 |
+
BPIC15f4,0.003,0.604
|
3 |
+
RTFMP,0.376,0.112
|
4 |
+
HD,0.517,0.254
|
data/validation/2_ense_rmcv_feat.csv
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
log,n_traces,n_unique_traces,trace_len_coefficient_variation,trace_len_entropy,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_hist1,trace_len_hist10,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_iqr,trace_len_kurtosis,trace_len_kurtosis_hist,trace_len_max,trace_len_mean,trace_len_median,trace_len_min,trace_len_mode,trace_len_q1,trace_len_q3,trace_len_skewness,trace_len_skewness_hist,trace_len_std,trace_len_variance,kurtosis_variant_occurrence,mean_variant_occurrence,ratio_most_common_variant,ratio_top_10_variants,ratio_top_1_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_5_variants,ratio_top_75_variants,skewness_variant_occurrence,std_variant_occurrence,activities_iqr,activities_kurtosis,activities_max,activities_mean,activities_median,activities_min,activities_q1,activities_q3,activities_skewness,activities_std,activities_variance,n_unique_activities,n_unique_start_activities,start_activities_iqr,start_activities_kurtosis,start_activities_max,start_activities_mean,start_activities_median,start_activities_min,start_activities_q1,start_activities_q3,start_activities_skewness,start_activities_std,start_activities_variance,end_activities_iqr,end_activities_kurtosis,end_activities_max,end_activities_mean,end_activities_median,end_activities_min,end_activities_q1,end_activities_q3,end_activities_skewness,end_activities_std,end_activities_variance,n_unique_end_activities,eventropy_global_block,eventropy_global_block_flattened,eventropy_k_block_diff_1,eventropy_k_block_diff_3,eventropy_k_block_diff_5,eventropy_k_block_ratio_1,eventropy_k_block_ratio_3,eventropy_k_block_ratio_5,eventropy_knn_3,eventropy_knn_5,eventropy_knn_7,eventropy_lempel_ziv,eventropy_lempel_ziv_flattened,eventropy_prefix,eventropy_prefix_flattened,eventropy_trace,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,ratio_variants_per_number_of_traces
|
2 |
+
genELBPIC15f4_0604_0003,8616,4031,1.0086445672512825,8.700230419287818,8.516920996327995,2.1832133718212567,6.58111248846037,0.05713165933282198,1.682074468800883e-05,0.009932649738269211,0.0033136867035377378,0.0012279143622246447,0.0005214430853282738,0.00017661781922409254,0.0001093348404720574,3.364148937601766e-05,0.0,9.0,11.77613857723645,4.64306597180025,141,11.964136490250697,7.0,3,3,5.0,14.0,2.836323931248485,2.5294876299887217,12.067561272744191,145.6260350714354,1651.5545366193303,2.137434879682461,0.09099350046425256,0.5789229340761374,0.40401578458681525,0.6256963788300836,0.766016713091922,0.5258820798514392,0.883008356545961,36.276105773051086,15.574023282690577,2184.5,1.9085746306932307,34121,12885.375,8627.0,8584,8616.0,10800.5,1.8663249384138656,8507.416043333898,72376127.734375,8,2,2111.0,-2.0,6419,4308.0,4308.0,2197,3252.5,5363.5,0.0,2111.0,4456321.0,768.0,0.0021026107788850723,4895,1723.2,832.0,495,813.0,1581.0,1.331337855426617,1625.5283940922102,2642342.56,5,15.897,16.276,2.756,1.525,1.375,2.756,2.016,1.775,6.564,6.07,5.761,1.405,1.786,12.139,13.493,9.703,365917.06171394786,0.7166786736830569,651595.1462643282,0.5475971681938718,62016.045914910814,0.05211796208164211,266396.7627350506,0.22387845232743814,0.46785051067780875
|
3 |
+
genELHD_0254_0517,6822,565,1.1300022933733087,8.390788875278787,1.9006921917027269,2.263915758458681,1.4763543408149593,0.28822871537617945,0.00010858116985352402,0.04077222927999826,0.02383356678284851,0.006080545511797346,0.005591930247456488,0.002823110416191621,0.0017915893025831464,0.0006514870191211442,0.0004886152643408582,2.0,9.718268017319556,4.770965470001153,28,2.8346525945470535,1.0,1,1,1.0,3.0,2.765986310146101,2.5637920433464965,3.2031639327547703,10.260259180101007,226.4931382842208,12.07433628318584,0.24860744649662855,0.9079448841981823,0.6807387862796834,0.9321313397830548,0.9585165640574611,0.8717384931105248,0.9791849897390794,14.639488482439702,105.6342402074512,1283.0,8.118508585327676,6848,1137.5294117647059,472.0,208,413.0,1696.0,2.9234849385484285,1541.823981624173,2377221.1903114184,17,10,294.25,2.299363631971671,3383,682.2,217.0,101,121.75,416.0,1.9301655015244086,1008.2924972447232,1016653.7600000001,334.5,2.8813625853874614,3383,620.1818181818181,157.0,79,104.5,439.0,2.0614116860983223,981.5564465945092,963453.0578512397,11,9.069,10.932,3.265,0.908,0.67,3.265,1.808,1.456,4.81,4.359,4.05,0.696,2.01,6.995,10.12,4.469,16958.33766640406,0.7450438396474315,70379.87102533762,0.36874603139171797,9719.481922433943,0.050923940806750986,30545.050254490514,0.16003675334882345,0.08282028730577544
|
4 |
+
genELRTFMP_0112_0376,6822,565,1.1300022933733087,8.390788875278787,1.9006921917027269,2.263915758458681,1.4763543408149593,0.28822871537617945,0.00010858116985352402,0.04077222927999826,0.02383356678284851,0.006080545511797346,0.005591930247456488,0.002823110416191621,0.0017915893025831464,0.0006514870191211442,0.0004886152643408582,2.0,9.718268017319556,4.770965470001153,28,2.8346525945470535,1.0,1,1,1.0,3.0,2.765986310146101,2.5637920433464965,3.2031639327547703,10.260259180101007,226.4931382842208,12.07433628318584,0.24860744649662855,0.9079448841981823,0.6807387862796834,0.9321313397830548,0.9585165640574611,0.8717384931105248,0.9791849897390794,14.639488482439702,105.6342402074512,1283.0,8.118508585327676,6848,1137.5294117647059,472.0,208,413.0,1696.0,2.9234849385484285,1541.823981624173,2377221.1903114184,17,10,294.25,2.299363631971671,3383,682.2,217.0,101,121.75,416.0,1.9301655015244086,1008.2924972447232,1016653.7600000001,334.5,2.8813625853874614,3383,620.1818181818181,157.0,79,104.5,439.0,2.0614116860983223,981.5564465945092,963453.0578512397,11,9.069,10.932,3.265,0.908,0.67,3.265,1.808,1.456,4.81,4.359,4.05,0.696,2.01,6.995,10.12,4.469,16958.33766640406,0.7450438396474315,70379.87102533762,0.36874603139171797,9719.481922433943,0.050923940806750986,30545.050254490514,0.16003675334882345,0.08282028730577544
|
data/validation/genELexperiment1_04_02.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"ratio_top_20_variants": 0.20017714791851196, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "
|
|
|
1 |
+
{"ratio_top_20_variants": 0.20017714791851196, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment1_04_02", "target_similarity": 0.7418932364693804}
|
data/validation/genELexperiment3_04_nan.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment3_04_nan", "target_similarity": 0.7418932612931086}
|
data/validation/genELexperiment4_nan_02.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"ratio_top_20_variants": 0.2, "log": "genELexperiment4_nan_02", "target_similarity": 1.0}
|