Spaces:
Sleeping
Sleeping
Andrea Maldonado
commited on
Commit
·
277c251
1
Parent(s):
1296e6b
WIP Adds config file fabric
Browse files- README.md +10 -2
- config_files/config_layout.json +52 -0
- utils/config_fabric.py +151 -0
README.md
CHANGED
@@ -63,7 +63,7 @@ The JSON file consists of the following key-value pairs:
|
|
63 |
- real_eventlog_path: defines the file with the features extracted from the real event logs
|
64 |
- plot_type: defines the style of the output plotting (possible values: violinplot, boxplot)
|
65 |
- font_size: label font size of the output plot
|
66 |
-
-
|
67 |
|
68 |
|
69 |
### Generation
|
@@ -153,8 +153,16 @@ To execute the experiments with grid targets, a single [configuration](config_fi
|
|
153 |
conda activate gedi
|
154 |
python execute_grid_experiments.py config_files/algorithm/grid_2obj
|
155 |
```
|
156 |
-
We employ the [experiment_grid_2obj_configfiles_fabric.ipynb](notebooks/experiment_grid_2obj_configfiles_fabric.ipynb) to create all necessary [configuration](config_files/algorithm/grid_2obj) and [objective](data/grid_2obj) files for this experiment.
|
|
|
|
|
|
|
|
|
157 |
|
|
|
|
|
|
|
|
|
158 |
### Visualizations
|
159 |
To run the visualizations, we employ [jupyter notebooks](https://jupyter.org/install) and [add the installed environment to the jupyter notebook](https://medium.com/@nrk25693/how-to-add-your-conda-environment-to-your-jupyter-notebook-in-just-4-steps-abeab8b8d084). We then start all visualizations by running e.g.: `jupyter noteboook`. In the following, we describe the `.ipynb`-files in the folder `\notebooks` to reproduce the figures from our paper.
|
160 |
|
|
|
63 |
- real_eventlog_path: defines the file with the features extracted from the real event logs
|
64 |
- plot_type: defines the style of the output plotting (possible values: violinplot, boxplot)
|
65 |
- font_size: label font size of the output plot
|
66 |
+
- boxplot_width: width of the violinplot/boxplot
|
67 |
|
68 |
|
69 |
### Generation
|
|
|
153 |
conda activate gedi
|
154 |
python execute_grid_experiments.py config_files/algorithm/grid_2obj
|
155 |
```
|
156 |
+
We employ the [experiment_grid_2obj_configfiles_fabric.ipynb](notebooks/experiment_grid_2obj_configfiles_fabric.ipynb) to create all necessary [configuration](config_files/algorithm/grid_2obj) and [objective](data/grid_2obj) files for this experiment.
|
157 |
+
For more details about these config_files, please refer to [Feature Extraction](#feature-extraction), [Generation](#generation), and [Benchmark](#benchmark).
|
158 |
+
To create configuration files for grid objectives interactively, you can use the start the following dashboard:
|
159 |
+
```
|
160 |
+
streamlit run utils/config_fabric.py # To tunnel to local machine add: --server.port 8501 --server.headless true
|
161 |
|
162 |
+
# In local machine (only in case you are tunneling):
|
163 |
+
ssh -N -f -L 9000:localhost:8501 <user@remote_machine.com>
|
164 |
+
open "http://localhost:9000/"
|
165 |
+
```
|
166 |
### Visualizations
|
167 |
To run the visualizations, we employ [jupyter notebooks](https://jupyter.org/install) and [add the installed environment to the jupyter notebook](https://medium.com/@nrk25693/how-to-add-your-conda-environment-to-your-jupyter-notebook-in-just-4-steps-abeab8b8d084). We then start all visualizations by running e.g.: `jupyter noteboook`. In the following, we describe the `.ipynb`-files in the folder `\notebooks` to reproduce the figures from our paper.
|
168 |
|
config_files/config_layout.json
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"pipeline_step": "instance_augmentation",
|
4 |
+
"augmentation_params":{"method":"SMOTE", "no_samples":2,
|
5 |
+
"feature_selection": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
|
6 |
+
"input_path": "data/test/bpic_features.csv",
|
7 |
+
"output_path": "output"
|
8 |
+
},
|
9 |
+
{
|
10 |
+
"pipeline_step": "event_logs_generation",
|
11 |
+
"output_path": "output/features/2_bpic_features/2_ense_rmcv_feat.csv",
|
12 |
+
"output_path": "data/test",
|
13 |
+
"generator_params": {
|
14 |
+
"experiment": "data/grid_objectives.csv",
|
15 |
+
"experiment": {"input_path": "data/2_bpic_features.csv",
|
16 |
+
"objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
|
17 |
+
"experiment": [
|
18 |
+
{"epa_normalized_sequence_entropy_linear_forgetting": 0.2, "ratio_top_20_variants": 0.4},
|
19 |
+
{"epa_normalized_sequence_entropy_linear_forgetting": 0.4, "ratio_top_20_variants": 0.7}
|
20 |
+
],
|
21 |
+
"experiment": {"epa_normalized_sequence_entropy_linear_forgetting": 0.2, "ratio_top_20_variants": 0.4},
|
22 |
+
"config_space": {
|
23 |
+
"mode": [5, 20],
|
24 |
+
"sequence": [0.01, 1],
|
25 |
+
"choice": [0.01, 1],
|
26 |
+
"parallel": [0.01, 1],
|
27 |
+
"loop": [0.01, 1],
|
28 |
+
"silent": [0.01, 1],
|
29 |
+
"lt_dependency": [0.01, 1],
|
30 |
+
"num_traces": [10, 100],
|
31 |
+
"duplicate": [0],
|
32 |
+
"or": [0]
|
33 |
+
},
|
34 |
+
"n_trials": 2
|
35 |
+
}
|
36 |
+
},
|
37 |
+
{
|
38 |
+
"pipeline_step": "feature_extraction",
|
39 |
+
"input_path": "data/test",
|
40 |
+
"feature_params": {"feature_set":["trace_length"]},
|
41 |
+
"output_path": "output/plots",
|
42 |
+
"real_eventlog_path": "data/BaselineED_feat.csv",
|
43 |
+
"plot_type": "boxplot"
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"pipeline_step": "benchmark_test",
|
47 |
+
"benchmark_task": "discovery",
|
48 |
+
"input_path":"data/test",
|
49 |
+
"output_path":"output",
|
50 |
+
"miners" : ["inductive", "heu", "imf", "ilp"]
|
51 |
+
}
|
52 |
+
]
|
utils/config_fabric.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from copy import deepcopy
|
2 |
+
from importlib import reload
|
3 |
+
from itertools import product
|
4 |
+
from pylab import *
|
5 |
+
import itertools
|
6 |
+
import json
|
7 |
+
import os
|
8 |
+
import pandas as pd
|
9 |
+
import pm4py
|
10 |
+
import random
|
11 |
+
import streamlit as st
|
12 |
+
|
13 |
+
st.set_page_config(layout='wide')
|
14 |
+
INPUT_XES="output/inputlog_temp.xes"
|
15 |
+
|
16 |
+
"""
|
17 |
+
# Configuration File fabric for
|
18 |
+
## GEDI: **G**enerating **E**vent **D**ata with **I**ntentional Features for Benchmarking Process Mining
|
19 |
+
"""
|
20 |
+
def double_switch(label_left, label_right):
|
21 |
+
# Create two columns for the labels and toggle switch
|
22 |
+
col0, col1, col2, col3, col4 = st.columns([4,1, 1, 1,4])
|
23 |
+
|
24 |
+
# Add labels to the columns
|
25 |
+
with col1:
|
26 |
+
st.write(label_left)
|
27 |
+
|
28 |
+
with col2:
|
29 |
+
# Create the toggle switch
|
30 |
+
toggle_option = st.toggle(" ",value=False,
|
31 |
+
key="toggle_switch_"+label_left,
|
32 |
+
)
|
33 |
+
|
34 |
+
with col3:
|
35 |
+
st.write(label_right)
|
36 |
+
return toggle_option
|
37 |
+
|
38 |
+
def view(config_file):
|
39 |
+
st.write(config_file)
|
40 |
+
|
41 |
+
def get_ranges(stats, tuple_values):
|
42 |
+
col_for_row = ", ".join([f"x[\'{i}\'].astype(float)" for i in tuple_values])
|
43 |
+
stats['range'] = stats.apply(lambda x: tuple([eval(col_for_row)]), axis=1)
|
44 |
+
#tasks = eval(f"list(itertools.product({(parameters*n_para_obj)[:-2]}))")
|
45 |
+
result = [f"np.around({x}, 2)" for x in stats['range']]
|
46 |
+
result = ", ".join(result)
|
47 |
+
return result
|
48 |
+
|
49 |
+
def create_objectives_grid(df, objectives, n_para_obj=2):
|
50 |
+
parameters_o = "objectives, "
|
51 |
+
sel_features = df.index.to_list()
|
52 |
+
if n_para_obj==len(objectives):
|
53 |
+
parameters = get_ranges(df, sorted(objectives))
|
54 |
+
tasks = eval(f"list(itertools.product({parameters}))")[0]
|
55 |
+
cartesian_product = list(product(*tasks))
|
56 |
+
experiments = [{key: value[idx] for idx, key in enumerate(sel_features)} for value in cartesian_product]
|
57 |
+
return experiments
|
58 |
+
else:
|
59 |
+
if n_para_obj==1:
|
60 |
+
experiments = [[exp] for exp in objectives]
|
61 |
+
else:
|
62 |
+
experiments = eval(f"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]")
|
63 |
+
experiments = list(set([tuple(sorted(exp)) for exp in experiments]))
|
64 |
+
parameters = "np.around(np.arange(0.0, 1.5,0.5),2), "
|
65 |
+
tasks = eval(f"list(itertools.product({(parameters*n_para_obj)[:-2]}))")
|
66 |
+
print("TASKS", tasks, type(parameters), type(n_para_obj), parameters*n_para_obj)
|
67 |
+
#print(len(experiments), experiments)
|
68 |
+
|
69 |
+
print(len(tasks))
|
70 |
+
|
71 |
+
for exp in experiments:
|
72 |
+
df = pd.DataFrame(data=tasks, columns=["task", *exp])
|
73 |
+
#experiment_path = os.path.join('..','data', f'grid_{n_para_obj}obj')
|
74 |
+
#os.makedirs(experiment_path, exist_ok=True)
|
75 |
+
#experiment_path = os.path.join(experiment_path, f"grid_{len(df.columns)-1}objectives_{abbrev_obj_keys(exp)}.csv")
|
76 |
+
#df.to_csv(experiment_path, index=False)
|
77 |
+
#print(f"Saved experiment in {experiment_path}")
|
78 |
+
#write_generator_experiment(experiment_path, objectives=exp)
|
79 |
+
|
80 |
+
def set_up(generator_params):
|
81 |
+
create_button = False
|
82 |
+
experiments = []
|
83 |
+
|
84 |
+
col1, col2 = st.columns(2)
|
85 |
+
if True:
|
86 |
+
grid_option = double_switch("Point-", "Grid-based")
|
87 |
+
csv_option = double_switch("Manual", "From CSV")
|
88 |
+
if csv_option:
|
89 |
+
uploaded_file = st.file_uploader(f"Pick a csv-file containing feature values for features:", type="csv")
|
90 |
+
if uploaded_file is not None:
|
91 |
+
df = pd.read_csv(uploaded_file)
|
92 |
+
sel_features = st.multiselect("Selected features", list(df.columns))
|
93 |
+
df = df[sel_features]
|
94 |
+
if grid_option:
|
95 |
+
add_quantile = st.slider('Add %-quantile', min_value=0.0, max_value=100.0, value=50.0, step=5.0)
|
96 |
+
stats = df.describe().transpose()
|
97 |
+
stats[str(int(add_quantile))+"%"] = df.quantile(q=add_quantile/100)
|
98 |
+
view(stats)
|
99 |
+
tuple_values = st.multiselect("Tuples including", list(stats.columns)[3:], default=['min', 'max'])
|
100 |
+
experiments = create_objectives_grid(stats, tuple_values, n_para_obj=len(tuple_values))
|
101 |
+
else:
|
102 |
+
view(df)
|
103 |
+
experiments = df.to_dict(orient='records')
|
104 |
+
else:
|
105 |
+
sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
|
106 |
+
for sel_feature in sel_features:
|
107 |
+
generator_params['experiment'][sel_feature] = float(st.text_input(sel_feature, generator_params['experiment'][sel_feature]))
|
108 |
+
generator_params['experiment'] = experiments
|
109 |
+
st.write(f"...result in {len(generator_params['experiment'])} experiments")
|
110 |
+
|
111 |
+
"""
|
112 |
+
#### Configuration space
|
113 |
+
"""
|
114 |
+
for key in generator_params['config_space'].keys():
|
115 |
+
generator_params['config_space'][key] = st.text_input(key, generator_params['config_space'][key])
|
116 |
+
|
117 |
+
#generator_params['config_space'] = st.text_input('config_space', generator_params['config_space'])
|
118 |
+
generator_params['n_trials'] = int(st.text_input('n_trials', generator_params['n_trials']))
|
119 |
+
return generator_params
|
120 |
+
|
121 |
+
if __name__ == '__main__':
|
122 |
+
config_layout = json.load(open("config_files/config_layout.json"))
|
123 |
+
type(config_layout)
|
124 |
+
step_candidates = ["instance_augmentation","event_logs_generation","feature_extraction","benchmark_test"]
|
125 |
+
pipeline_steps = st.multiselect(
|
126 |
+
"Choose pipeline step",
|
127 |
+
step_candidates,
|
128 |
+
[]
|
129 |
+
)
|
130 |
+
step_configs = []
|
131 |
+
set_col, view_col = st.columns([3, 2])
|
132 |
+
for pipeline_step in pipeline_steps:
|
133 |
+
step_config = [d for d in config_layout if d['pipeline_step'] == pipeline_step][0]
|
134 |
+
with set_col:
|
135 |
+
st.header(pipeline_step)
|
136 |
+
for step_key in step_config.keys():
|
137 |
+
if step_key == "generator_params":
|
138 |
+
st.subheader("Set-up experiments")
|
139 |
+
step_config[step_key] = set_up(step_config[step_key])
|
140 |
+
elif step_key != "pipeline_step":
|
141 |
+
step_config[step_key] = st.text_input(step_key, step_config[step_key])
|
142 |
+
with view_col:
|
143 |
+
view(step_config)
|
144 |
+
step_configs.append(step_config)
|
145 |
+
config_file = json.dumps(step_configs, indent=4)
|
146 |
+
output_path = st.text_input("Output file path", "config_files/experiment_config.json")
|
147 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
148 |
+
create_button = st.button("Save config file")
|
149 |
+
if create_button:
|
150 |
+
with open(output_path, "w") as f:
|
151 |
+
f.write(config_file)
|