Spaces:
Running
Running
Andrea Maldonado
commited on
Commit
·
a281f3a
1
Parent(s):
1539c36
Impelements manual options in dashboard
Browse files- utils/config_fabric.py +81 -36
utils/config_fabric.py
CHANGED
@@ -4,6 +4,7 @@ from itertools import product, combinations
|
|
4 |
from pylab import *
|
5 |
import itertools
|
6 |
import json
|
|
|
7 |
import os
|
8 |
import pandas as pd
|
9 |
import pm4py
|
@@ -17,9 +18,13 @@ INPUT_XES="output/inputlog_temp.xes"
|
|
17 |
# Configuration File fabric for
|
18 |
## GEDI: **G**enerating **E**vent **D**ata with **I**ntentional Features for Benchmarking Process Mining
|
19 |
"""
|
20 |
-
def double_switch(label_left, label_right):
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
23 |
|
24 |
# Add labels to the columns
|
25 |
with col1:
|
@@ -33,12 +38,40 @@ def double_switch(label_left, label_right):
|
|
33 |
|
34 |
with col3:
|
35 |
st.write(label_right)
|
36 |
-
return toggle_option
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
col_for_row = ", ".join([f"x[\'{i}\'].astype(float)" for i in tuple_values])
|
43 |
stats['range'] = stats.apply(lambda x: tuple([eval(col_for_row)]), axis=1)
|
44 |
#tasks = eval(f"list(itertools.product({(parameters*n_para_obj)[:-2]}))")
|
@@ -51,11 +84,10 @@ def create_objectives_grid(df, objectives, n_para_obj=2, method="combinatorial")
|
|
51 |
#if n_para_obj==len(objectives):
|
52 |
sel_features = df.index.to_list()
|
53 |
parameters_o = "objectives, "
|
54 |
-
parameters =
|
55 |
tasks = f"list(itertools.product({parameters}))[0]"
|
56 |
|
57 |
-
|
58 |
-
sel_features = objectives
|
59 |
tasks = ""
|
60 |
for objective in objectives:
|
61 |
min_col, max_col, step_col = st.columns(3)
|
@@ -66,19 +98,26 @@ def create_objectives_grid(df, objectives, n_para_obj=2, method="combinatorial")
|
|
66 |
with step_col:
|
67 |
step_value = st.slider('step', min_value=float(df[objective].min()), max_value=float(df[objective].quantile(0.9)), value=df[objective].median()/df[objective].min(), step=0.01, key=objective+"step")
|
68 |
tasks += f"np.around(np.arange({selcted_min}, {selcted_max}+{step_value}, {step_value}),2), "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
cartesian_product = list(product(*eval(tasks)))
|
71 |
-
experiments = [{key: value[idx] for idx, key in enumerate(
|
72 |
return experiments
|
73 |
|
74 |
-
def
|
75 |
create_button = False
|
76 |
experiments = []
|
77 |
|
78 |
-
|
79 |
-
if True:
|
80 |
-
grid_option = double_switch("Point-", "Grid-based")
|
81 |
-
csv_option = double_switch("Manual", "From CSV")
|
82 |
if csv_option:
|
83 |
uploaded_file = st.file_uploader(f"Pick a csv-file containing feature values for features:", type="csv")
|
84 |
if uploaded_file is not None:
|
@@ -92,7 +131,7 @@ def set_up(generator_params):
|
|
92 |
add_quantile = st.slider('Add %-quantile', min_value=0.0, max_value=100.0, value=50.0, step=5.0)
|
93 |
stats = df.describe().transpose()
|
94 |
stats[str(int(add_quantile))+"%"] = df.quantile(q=add_quantile/100)
|
95 |
-
|
96 |
tuple_values = st.multiselect("Tuples including", list(stats.columns)[3:], default=['min', 'max'])
|
97 |
triangular_option = double_switch("Square", "Triangular")
|
98 |
if triangular_option:
|
@@ -100,7 +139,7 @@ def set_up(generator_params):
|
|
100 |
# List to store all combinations
|
101 |
all_combinations = []
|
102 |
|
103 |
-
# Generate combinations of length 1, 2, and
|
104 |
for r in range(1, len(elements) + 1):
|
105 |
# Generate combinations of length r
|
106 |
combinations_r = list(combinations(elements, r))
|
@@ -109,29 +148,35 @@ def set_up(generator_params):
|
|
109 |
# Print or use the result as needed
|
110 |
for comb in all_combinations:
|
111 |
sel_stats = stats.loc[list(comb)]
|
112 |
-
experiments += create_objectives_grid(sel_stats, tuple_values, n_para_obj=len(tuple_values))
|
113 |
-
else:
|
114 |
-
experiments = create_objectives_grid(stats, tuple_values, n_para_obj=len(tuple_values))
|
115 |
-
else:
|
116 |
-
experiments = create_objectives_grid(df, sel_features, n_para_obj=len(sel_features), method="range")
|
117 |
-
else:
|
118 |
-
|
119 |
experiments = df.to_dict(orient='records')
|
|
|
120 |
else:
|
121 |
sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
|
|
|
122 |
if sel_features != None:
|
123 |
-
|
124 |
-
generator_params['experiment']
|
|
|
|
|
|
|
|
|
|
|
125 |
generator_params['experiment'] = experiments
|
126 |
-
st.write(f"...result in {len(generator_params['experiment'])}
|
127 |
|
128 |
"""
|
129 |
#### Configuration space
|
130 |
"""
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
#generator_params['config_space'] = st.text_input('config_space', generator_params['config_space'])
|
135 |
generator_params['n_trials'] = int(st.text_input('n_trials', generator_params['n_trials']))
|
136 |
return generator_params
|
137 |
|
@@ -153,11 +198,11 @@ if __name__ == '__main__':
|
|
153 |
for step_key in step_config.keys():
|
154 |
if step_key == "generator_params":
|
155 |
st.subheader("Set-up experiments")
|
156 |
-
step_config[step_key] =
|
157 |
elif step_key != "pipeline_step":
|
158 |
step_config[step_key] = st.text_input(step_key, step_config[step_key])
|
159 |
with view_col:
|
160 |
-
|
161 |
step_configs.append(step_config)
|
162 |
config_file = json.dumps(step_configs, indent=4)
|
163 |
output_path = st.text_input("Output file path", "config_files/experiment_config.json")
|
|
|
4 |
from pylab import *
|
5 |
import itertools
|
6 |
import json
|
7 |
+
import math
|
8 |
import os
|
9 |
import pandas as pd
|
10 |
import pm4py
|
|
|
18 |
# Configuration File fabric for
|
19 |
## GEDI: **G**enerating **E**vent **D**ata with **I**ntentional Features for Benchmarking Process Mining
|
20 |
"""
|
21 |
+
def double_switch(label_left, label_right, third_label=None, fourth_label=None):
|
22 |
+
if third_label==None and fourth_label==None:
|
23 |
+
# Create two columns for the labels and toggle switch
|
24 |
+
col0, col1, col2, col3, col4 = st.columns([2,1,1,1,2])
|
25 |
+
else:
|
26 |
+
# Create two columns for the labels and toggle switch
|
27 |
+
col0, col1, col2, col3, col4, col5, col6, col7, col8 = st.columns([1,1,1,1,1,1,1,1,1])
|
28 |
|
29 |
# Add labels to the columns
|
30 |
with col1:
|
|
|
38 |
|
39 |
with col3:
|
40 |
st.write(label_right)
|
41 |
+
if third_label is None and fourth_label is None:return toggle_option
|
42 |
+
else:
|
43 |
+
with col5:
|
44 |
+
st.write(third_label)
|
45 |
+
|
46 |
+
with col6:
|
47 |
+
# Create the toggle switch
|
48 |
+
toggle_option_2 = st.toggle(" ",value=False,
|
49 |
+
key="toggle_switch_"+third_label,
|
50 |
+
)
|
51 |
+
|
52 |
+
with col7:
|
53 |
+
st.write(fourth_label)
|
54 |
+
return toggle_option, toggle_option_2
|
55 |
+
|
56 |
+
def input_multicolumn(labels, default_values, n_cols=5):
|
57 |
+
result = {}
|
58 |
+
cols = st.columns(n_cols)
|
59 |
+
factor = math.ceil(len(labels)/n_cols)
|
60 |
+
extended = cols.copy()
|
61 |
+
for _ in range(factor):
|
62 |
+
extended.extend(cols)
|
63 |
+
for label, default_value, col in zip(labels, default_values, extended):
|
64 |
+
with col:
|
65 |
+
result[label] = col.text_input(label, default_value, key=f"input_"+label+'_'+str(default_value))
|
66 |
+
return result.values()
|
67 |
+
|
68 |
+
def split_list(input_list, n):
|
69 |
+
# Calculate the size of each chunk
|
70 |
+
k, m = divmod(len(input_list), n)
|
71 |
+
# Use list comprehension to create n sublists
|
72 |
+
return [input_list[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)]
|
73 |
+
|
74 |
+
def get_ranges_from_stats(stats, tuple_values):
|
75 |
col_for_row = ", ".join([f"x[\'{i}\'].astype(float)" for i in tuple_values])
|
76 |
stats['range'] = stats.apply(lambda x: tuple([eval(col_for_row)]), axis=1)
|
77 |
#tasks = eval(f"list(itertools.product({(parameters*n_para_obj)[:-2]}))")
|
|
|
84 |
#if n_para_obj==len(objectives):
|
85 |
sel_features = df.index.to_list()
|
86 |
parameters_o = "objectives, "
|
87 |
+
parameters = get_ranges_from_stats(df, sorted(objectives))
|
88 |
tasks = f"list(itertools.product({parameters}))[0]"
|
89 |
|
90 |
+
elif method=="range-from-csv":
|
|
|
91 |
tasks = ""
|
92 |
for objective in objectives:
|
93 |
min_col, max_col, step_col = st.columns(3)
|
|
|
98 |
with step_col:
|
99 |
step_value = st.slider('step', min_value=float(df[objective].min()), max_value=float(df[objective].quantile(0.9)), value=df[objective].median()/df[objective].min(), step=0.01, key=objective+"step")
|
100 |
tasks += f"np.around(np.arange({selcted_min}, {selcted_max}+{step_value}, {step_value}),2), "
|
101 |
+
else :#method=="range-manual":
|
102 |
+
experitments = []
|
103 |
+
tasks=""
|
104 |
+
if objectives != None:
|
105 |
+
cross_labels = [feature[0]+': '+feature[1] for feature in list(product(objectives,['min', 'max', 'step']))]
|
106 |
+
cross_values = [round(eval(str(combination[0])+combination[1]), 2) for combination in list(product(list(df.values()), ['*1', '*2', '/3']))]
|
107 |
+
ranges = zip(objectives, split_list(list(input_multicolumn(cross_labels, cross_values, n_cols=3)), n_para_obj))
|
108 |
+
for objective, range_value in ranges:
|
109 |
+
selcted_min, selcted_max, step_value = range_value
|
110 |
+
tasks += f"np.around(np.arange({selcted_min}, {selcted_max}+{step_value}, {step_value}),2), "
|
111 |
|
112 |
cartesian_product = list(product(*eval(tasks)))
|
113 |
+
experiments = [{key: value[idx] for idx, key in enumerate(objectives)} for value in cartesian_product]
|
114 |
return experiments
|
115 |
|
116 |
+
def set_generator_experiments(generator_params):
|
117 |
create_button = False
|
118 |
experiments = []
|
119 |
|
120 |
+
grid_option, csv_option = double_switch("Point-", "Grid-based", third_label="Manual", fourth_label="From CSV")
|
|
|
|
|
|
|
121 |
if csv_option:
|
122 |
uploaded_file = st.file_uploader(f"Pick a csv-file containing feature values for features:", type="csv")
|
123 |
if uploaded_file is not None:
|
|
|
131 |
add_quantile = st.slider('Add %-quantile', min_value=0.0, max_value=100.0, value=50.0, step=5.0)
|
132 |
stats = df.describe().transpose()
|
133 |
stats[str(int(add_quantile))+"%"] = df.quantile(q=add_quantile/100)
|
134 |
+
st.write(stats)
|
135 |
tuple_values = st.multiselect("Tuples including", list(stats.columns)[3:], default=['min', 'max'])
|
136 |
triangular_option = double_switch("Square", "Triangular")
|
137 |
if triangular_option:
|
|
|
139 |
# List to store all combinations
|
140 |
all_combinations = []
|
141 |
|
142 |
+
# Generate combinations of length 1, 2, ... and len(elements)
|
143 |
for r in range(1, len(elements) + 1):
|
144 |
# Generate combinations of length r
|
145 |
combinations_r = list(combinations(elements, r))
|
|
|
148 |
# Print or use the result as needed
|
149 |
for comb in all_combinations:
|
150 |
sel_stats = stats.loc[list(comb)]
|
151 |
+
experiments += create_objectives_grid(sel_stats, tuple_values, n_para_obj=len(tuple_values), method="combinatorial")
|
152 |
+
else: #Square
|
153 |
+
experiments = create_objectives_grid(stats, tuple_values, n_para_obj=len(tuple_values), method="combinatorial")
|
154 |
+
else: #Range
|
155 |
+
experiments = create_objectives_grid(df, sel_features, n_para_obj=len(sel_features), method="range-from-csv")
|
156 |
+
else: #Point
|
157 |
+
st.write(df)
|
158 |
experiments = df.to_dict(orient='records')
|
159 |
+
#Manual
|
160 |
else:
|
161 |
sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
|
162 |
+
experitments = []
|
163 |
if sel_features != None:
|
164 |
+
if grid_option:
|
165 |
+
experiments = create_objectives_grid(generator_params['experiment'], sel_features, n_para_obj=len(sel_features), method="range-manual")
|
166 |
+
else:
|
167 |
+
experiment = {}
|
168 |
+
for sel_feature in sel_features:
|
169 |
+
experiment[sel_feature] = float(st.text_input(sel_feature, generator_params['experiment'][sel_feature]))
|
170 |
+
experiments.append(experiment)
|
171 |
generator_params['experiment'] = experiments
|
172 |
+
st.write(f"...result in {len(generator_params['experiment'])} experiment(s)")
|
173 |
|
174 |
"""
|
175 |
#### Configuration space
|
176 |
"""
|
177 |
+
updated_values = input_multicolumn(generator_params['config_space'].keys(), generator_params['config_space'].values())
|
178 |
+
for key, new_value in zip(generator_params['config_space'].keys(), updated_values):
|
179 |
+
generator_params['config_space'][key] = new_value
|
|
|
180 |
generator_params['n_trials'] = int(st.text_input('n_trials', generator_params['n_trials']))
|
181 |
return generator_params
|
182 |
|
|
|
198 |
for step_key in step_config.keys():
|
199 |
if step_key == "generator_params":
|
200 |
st.subheader("Set-up experiments")
|
201 |
+
step_config[step_key] = set_generator_experiments(step_config[step_key])
|
202 |
elif step_key != "pipeline_step":
|
203 |
step_config[step_key] = st.text_input(step_key, step_config[step_key])
|
204 |
with view_col:
|
205 |
+
st.write(step_config)
|
206 |
step_configs.append(step_config)
|
207 |
config_file = json.dumps(step_configs, indent=4)
|
208 |
output_path = st.text_input("Output file path", "config_files/experiment_config.json")
|