Spaces:
Sleeping
Sleeping
Andrea Maldonado
commited on
Commit
·
85c7f4a
1
Parent(s):
4df7226
Updates run commands
Browse files- .github/workflows/test_gedi.yml +6 -6
- README.md +8 -8
- config_files/options/baseline.json +0 -9
- config_files/options/run_params.json +0 -9
- execute_grid_experiments.py +1 -1
- utils/config_fabric.py +1 -2
.github/workflows/test_gedi.yml
CHANGED
@@ -29,7 +29,7 @@ jobs:
|
|
29 |
|
30 |
- name: Run test
|
31 |
run:
|
32 |
-
python main.py -
|
33 |
|
34 |
- name: Compare output
|
35 |
run: diff data/validation/test_feat.csv data/test_feat.csv
|
@@ -59,7 +59,7 @@ jobs:
|
|
59 |
|
60 |
- name: Run test
|
61 |
run:
|
62 |
-
python main.py -
|
63 |
|
64 |
- name: Compare output 1
|
65 |
run:
|
@@ -97,7 +97,7 @@ jobs:
|
|
97 |
|
98 |
- name: Run test
|
99 |
run:
|
100 |
-
python main.py -
|
101 |
|
102 |
- name: Convert output and validation to same encoding
|
103 |
run: iconv -f UTF-8 -t ASCII output/benchmark/test_benchmark.csv > data/validation/test_benchmark.csv
|
@@ -126,7 +126,7 @@ jobs:
|
|
126 |
|
127 |
- name: Run test
|
128 |
run:
|
129 |
-
python main.py -
|
130 |
|
131 |
test_evaluation-plotter:
|
132 |
runs-on: ubuntu-latest
|
@@ -153,7 +153,7 @@ jobs:
|
|
153 |
- name: Run test
|
154 |
run:
|
155 |
|
156 |
-
python main.py -
|
157 |
|
158 |
test_integration:
|
159 |
runs-on: ubuntu-latest
|
@@ -179,7 +179,7 @@ jobs:
|
|
179 |
|
180 |
- name: Run test
|
181 |
run:
|
182 |
-
python main.py -
|
183 |
|
184 |
test_grid_experiments_script:
|
185 |
runs-on: ubuntu-latest
|
|
|
29 |
|
30 |
- name: Run test
|
31 |
run:
|
32 |
+
python main.py -a config_files/algorithm/pipeline_steps/feature_extraction.json
|
33 |
|
34 |
- name: Compare output
|
35 |
run: diff data/validation/test_feat.csv data/test_feat.csv
|
|
|
59 |
|
60 |
- name: Run test
|
61 |
run:
|
62 |
+
python main.py -a config_files/algorithm/pipeline_steps/generation.json
|
63 |
|
64 |
- name: Compare output 1
|
65 |
run:
|
|
|
97 |
|
98 |
- name: Run test
|
99 |
run:
|
100 |
+
python main.py -a config_files/algorithm/pipeline_steps/benchmark.json
|
101 |
|
102 |
- name: Convert output and validation to same encoding
|
103 |
run: iconv -f UTF-8 -t ASCII output/benchmark/test_benchmark.csv > data/validation/test_benchmark.csv
|
|
|
126 |
|
127 |
- name: Run test
|
128 |
run:
|
129 |
+
python main.py -a config_files/algorithm/pipeline_steps/augmentation.json
|
130 |
|
131 |
test_evaluation-plotter:
|
132 |
runs-on: ubuntu-latest
|
|
|
153 |
- name: Run test
|
154 |
run:
|
155 |
|
156 |
+
python main.py -a config_files/algorithm/pipeline_steps/evaluation_plotter.json
|
157 |
|
158 |
test_integration:
|
159 |
runs-on: ubuntu-latest
|
|
|
179 |
|
180 |
- name: Run test
|
181 |
run:
|
182 |
+
python main.py -a config_files/algorithm/experiment_test.json
|
183 |
|
184 |
test_grid_experiments_script:
|
185 |
runs-on: ubuntu-latest
|
README.md
CHANGED
@@ -27,7 +27,7 @@ conda install pyrfr swig
|
|
27 |
### Startup
|
28 |
```console
|
29 |
conda activate gedi
|
30 |
-
python main.py -
|
31 |
```
|
32 |
The last step should take only a few minutes to run.
|
33 |
|
@@ -41,7 +41,7 @@ Our pipeline offers several pipeline steps, which can be run sequentially or par
|
|
41 |
To run different steps of the GEDI pipeline, please adapt the `.json` accordingly.
|
42 |
```console
|
43 |
conda activate gedi
|
44 |
-
python main.py -
|
45 |
```
|
46 |
For reference of possible keys and values for each step, please see `config_files/algorithm/experiment_test.json`.
|
47 |
To run the whole pipeline please create a new `.json` file, specifying all steps you want to run and specify desired keys and values for each step.
|
@@ -52,7 +52,7 @@ To reproduce results from out paper, please refer to [Experiments](#experiments)
|
|
52 |
To extract the features on the event-log level and use them for hyperparameter optimization, we employ the following script:
|
53 |
```console
|
54 |
conda activate gedi
|
55 |
-
python main.py -
|
56 |
```
|
57 |
The JSON file consists of the following key-value pairs:
|
58 |
|
@@ -74,7 +74,7 @@ The command to execute the generation step is given by a exemplarily generation.
|
|
74 |
|
75 |
```console
|
76 |
conda activate gedi
|
77 |
-
python main.py -
|
78 |
```
|
79 |
|
80 |
In the `generation.json`, we have the following key-value pairs:
|
@@ -106,7 +106,7 @@ The benchmarking defines the downstream task which is used for evaluating the go
|
|
106 |
|
107 |
```console
|
108 |
conda activate gedi
|
109 |
-
python main.py -
|
110 |
```
|
111 |
|
112 |
In the `benchmark.json`, we have the following key-value pairs:
|
@@ -124,7 +124,7 @@ The purpose of the evaluation plotting step is used just for visualization. Some
|
|
124 |
|
125 |
```console
|
126 |
conda activate gedi
|
127 |
-
python main.py -
|
128 |
```
|
129 |
|
130 |
Generally, in the `evaluation_plotter.json`, we have the following key-value pairs:
|
@@ -144,7 +144,7 @@ To execute the experiments with real targets, we employ the [experiment_real_tar
|
|
144 |
|
145 |
```console
|
146 |
conda activate gedi
|
147 |
-
python main.py -
|
148 |
```
|
149 |
|
150 |
### Generating data with grid targets
|
@@ -167,7 +167,7 @@ open "http://localhost:9000/"
|
|
167 |
To run the visualizations, we employ [jupyter notebooks](https://jupyter.org/install) and [add the installed environment to the jupyter notebook](https://medium.com/@nrk25693/how-to-add-your-conda-environment-to-your-jupyter-notebook-in-just-4-steps-abeab8b8d084). We then start all visualizations by running e.g.: `jupyter noteboook`. In the following, we describe the `.ipynb`-files in the folder `\notebooks` to reproduce the figures from our paper.
|
168 |
|
169 |
#### [Fig. 4 and fig. 5 Representativeness](notebooks/gedi_figs4and5_representativeness.ipynb)
|
170 |
-
To visualize the coverage of the feasible feature space of generated event logs compared to existing real-world benchmark datasets, in this notebook, we conduct a principal component analysis on the features of both settings. The first two principal components are utilized to visualize the coverage which is further highlighted by computing a convex hull of the 2D mapping.
|
171 |
|
172 |
#### [Fig. 6 Benchmark Boxplots](notebooks/gedi_fig6_benchmark_boxplots.ipynb)
|
173 |
This notebook is used to visualize the metric distribution of real event logs compared to the generated ones. It shows 5 different metrics on 3 various process discovery techniques. We use 'fitness,', 'precision', 'fscore', 'size', 'cfc' (control-flow complexity) as metrics and as 'heuristic miner', 'ilp' (integer linear programming), and 'imf' (inductive miner infrequent) as miners. The notebook outputs the visualization shown in Fig.6 in the paper.
|
|
|
27 |
### Startup
|
28 |
```console
|
29 |
conda activate gedi
|
30 |
+
python main.py -a config_files/algorithm/experiment_test.json
|
31 |
```
|
32 |
The last step should take only a few minutes to run.
|
33 |
|
|
|
41 |
To run different steps of the GEDI pipeline, please adapt the `.json` accordingly.
|
42 |
```console
|
43 |
conda activate gedi
|
44 |
+
python main.py -a config_files/algorithm/pipeline_steps/<pipeline-step>.json
|
45 |
```
|
46 |
For reference of possible keys and values for each step, please see `config_files/algorithm/experiment_test.json`.
|
47 |
To run the whole pipeline please create a new `.json` file, specifying all steps you want to run and specify desired keys and values for each step.
|
|
|
52 |
To extract the features on the event-log level and use them for hyperparameter optimization, we employ the following script:
|
53 |
```console
|
54 |
conda activate gedi
|
55 |
+
python main.py -a config_files/algorithm/pipeline_steps/feature_extraction.json
|
56 |
```
|
57 |
The JSON file consists of the following key-value pairs:
|
58 |
|
|
|
74 |
|
75 |
```console
|
76 |
conda activate gedi
|
77 |
+
python main.py -a config_files/algorithm/pipeline_steps/generation.json
|
78 |
```
|
79 |
|
80 |
In the `generation.json`, we have the following key-value pairs:
|
|
|
106 |
|
107 |
```console
|
108 |
conda activate gedi
|
109 |
+
python main.py -a config_files/algorithm/pipeline_steps/benchmark.json
|
110 |
```
|
111 |
|
112 |
In the `benchmark.json`, we have the following key-value pairs:
|
|
|
124 |
|
125 |
```console
|
126 |
conda activate gedi
|
127 |
+
python main.py -a config_files/algorithm/pipeline_steps/evaluation_plotter.json
|
128 |
```
|
129 |
|
130 |
Generally, in the `evaluation_plotter.json`, we have the following key-value pairs:
|
|
|
144 |
|
145 |
```console
|
146 |
conda activate gedi
|
147 |
+
python main.py -a config_files/algorithm/experiment_real_targets.json
|
148 |
```
|
149 |
|
150 |
### Generating data with grid targets
|
|
|
167 |
To run the visualizations, we employ [jupyter notebooks](https://jupyter.org/install) and [add the installed environment to the jupyter notebook](https://medium.com/@nrk25693/how-to-add-your-conda-environment-to-your-jupyter-notebook-in-just-4-steps-abeab8b8d084). We then start all visualizations by running e.g.: `jupyter noteboook`. In the following, we describe the `.ipynb`-files in the folder `\notebooks` to reproduce the figures from our paper.
|
168 |
|
169 |
#### [Fig. 4 and fig. 5 Representativeness](notebooks/gedi_figs4and5_representativeness.ipynb)
|
170 |
+
To visualize the coverage of the feasible feature space of generated event logs compared to existing real-world benchmark datasets, in this notebook, we conduct a principal component analysis on the features of both settings. The first two principal components are utilized to visualize the coverage which is further highlighted by computing a convex hull of the 2D mapping.Additionally, we visualize the distribution of each meta feature we used in the paper as a boxplot. Additional features can be extracted with FEEED. Therefore, the notebook contains the figures 4 and 5 in the paper.
|
171 |
|
172 |
#### [Fig. 6 Benchmark Boxplots](notebooks/gedi_fig6_benchmark_boxplots.ipynb)
|
173 |
This notebook is used to visualize the metric distribution of real event logs compared to the generated ones. It shows 5 different metrics on 3 various process discovery techniques. We use 'fitness,', 'precision', 'fscore', 'size', 'cfc' (control-flow complexity) as metrics and as 'heuristic miner', 'ilp' (integer linear programming), and 'imf' (inductive miner infrequent) as miners. The notebook outputs the visualization shown in Fig.6 in the paper.
|
config_files/options/baseline.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"run_option": "baseline",
|
3 |
-
"plot_type": "color_map",
|
4 |
-
"plot_tics": true,
|
5 |
-
"n_components": 2,
|
6 |
-
"input_name": "test",
|
7 |
-
"save_results": false,
|
8 |
-
"load_results": false
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config_files/options/run_params.json
DELETED
@@ -1,9 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"run_option": "compare",
|
3 |
-
"plot_type": "color_map",
|
4 |
-
"plot_tics": true,
|
5 |
-
"n_components": 2,
|
6 |
-
"input_name": "gen20",
|
7 |
-
"save_results": false,
|
8 |
-
"load_results": true
|
9 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
execute_grid_experiments.py
CHANGED
@@ -10,7 +10,7 @@ from tqdm import tqdm
|
|
10 |
def multi_experiment_wrapper(config_file, i=0):
|
11 |
print(f"=========================STARTING EXPERIMENT #{i+1}=======================")
|
12 |
print(f"INFO: Executing with {config_file}")
|
13 |
-
os.system(f"python -W ignore main.py -
|
14 |
print(f"=========================FINISHED EXPERIMENT #{i+1}=======================")
|
15 |
|
16 |
if __name__ == '__main__':
|
|
|
10 |
def multi_experiment_wrapper(config_file, i=0):
|
11 |
print(f"=========================STARTING EXPERIMENT #{i+1}=======================")
|
12 |
print(f"INFO: Executing with {config_file}")
|
13 |
+
os.system(f"python -W ignore main.py -a {config_file}")
|
14 |
print(f"=========================FINISHED EXPERIMENT #{i+1}=======================")
|
15 |
|
16 |
if __name__ == '__main__':
|
utils/config_fabric.py
CHANGED
@@ -243,8 +243,7 @@ if __name__ == '__main__':
|
|
243 |
st.write("Saved configuration in ", output_path, ". Run command:")
|
244 |
#if create_run_button:
|
245 |
if True:
|
246 |
-
|
247 |
-
var = f"python -W ignore main.py -o {options_path} -a {output_path}"
|
248 |
st.code(var, language='bash')
|
249 |
if False: #FIXME: Command fails when using multiprocessing
|
250 |
command = var.split()
|
|
|
243 |
st.write("Saved configuration in ", output_path, ". Run command:")
|
244 |
#if create_run_button:
|
245 |
if True:
|
246 |
+
var = f"python -W ignore main.py -a {output_path}"
|
|
|
247 |
st.code(var, language='bash')
|
248 |
if False: #FIXME: Command fails when using multiprocessing
|
249 |
command = var.split()
|