Spaces:
Sleeping
Sleeping
Andrea Maldonado
commited on
Commit
·
987eb53
1
Parent(s):
23787b5
HF changes
Browse files- README.md +1 -1
- app.py +0 -291
- utils/config_fabric.py +0 -1
README.md
CHANGED
@@ -5,7 +5,7 @@ colorFrom: indigo
|
|
5 |
colorTo: blue
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.37.1
|
8 |
-
app_file:
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
---
|
|
|
5 |
colorTo: blue
|
6 |
sdk: streamlit
|
7 |
sdk_version: 1.37.1
|
8 |
+
app_file: utils/config_fabric.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
---
|
app.py
DELETED
@@ -1,291 +0,0 @@
|
|
1 |
-
from copy import deepcopy
|
2 |
-
from importlib import reload
|
3 |
-
from itertools import product as cproduct
|
4 |
-
from itertools import combinations
|
5 |
-
from pylab import *
|
6 |
-
import itertools
|
7 |
-
import json
|
8 |
-
import math
|
9 |
-
import os
|
10 |
-
import pandas as pd
|
11 |
-
import pm4py
|
12 |
-
import random
|
13 |
-
import streamlit as st
|
14 |
-
import subprocess
|
15 |
-
|
16 |
-
st.set_page_config(layout='wide')
|
17 |
-
INPUT_XES="output/inputlog_temp.xes"
|
18 |
-
|
19 |
-
"""
|
20 |
-
# Configuration File fabric for
|
21 |
-
## GEDI: **G**enerating **E**vent **D**ata with **I**ntentional Features for Benchmarking Process Mining
|
22 |
-
"""
|
23 |
-
def double_switch(label_left, label_right, third_label=None, fourth_label=None):
|
24 |
-
if third_label==None and fourth_label==None:
|
25 |
-
# Create two columns for the labels and toggle switch
|
26 |
-
col0, col1, col2, col3, col4 = st.columns([2,1,1,1,2])
|
27 |
-
else:
|
28 |
-
# Create two columns for the labels and toggle switch
|
29 |
-
col0, col1, col2, col3, col4, col5, col6, col7, col8 = st.columns([1,1,1,1,1,1,1,1,1])
|
30 |
-
|
31 |
-
# Add labels to the columns
|
32 |
-
with col1:
|
33 |
-
st.write(label_left)
|
34 |
-
|
35 |
-
with col2:
|
36 |
-
# Create the toggle switch
|
37 |
-
toggle_option = st.toggle(" ",value=False,
|
38 |
-
key="toggle_switch_"+label_left,
|
39 |
-
)
|
40 |
-
|
41 |
-
with col3:
|
42 |
-
st.write(label_right)
|
43 |
-
if third_label is None and fourth_label is None:return toggle_option
|
44 |
-
else:
|
45 |
-
with col5:
|
46 |
-
st.write(third_label)
|
47 |
-
|
48 |
-
with col6:
|
49 |
-
# Create the toggle switch
|
50 |
-
toggle_option_2 = st.toggle(" ",value=False,
|
51 |
-
key="toggle_switch_"+third_label,
|
52 |
-
)
|
53 |
-
|
54 |
-
with col7:
|
55 |
-
st.write(fourth_label)
|
56 |
-
return toggle_option, toggle_option_2
|
57 |
-
|
58 |
-
def multi_button(labels):
|
59 |
-
cols = st.columns(len(labels))
|
60 |
-
activations = []
|
61 |
-
for col, label in zip(cols, labels):
|
62 |
-
activations.append(col.button(label))
|
63 |
-
return activations
|
64 |
-
|
65 |
-
def input_multicolumn(labels, default_values, n_cols=5):
|
66 |
-
result = {}
|
67 |
-
cols = st.columns(n_cols)
|
68 |
-
factor = math.ceil(len(labels)/n_cols)
|
69 |
-
extended = cols.copy()
|
70 |
-
for _ in range(factor):
|
71 |
-
extended.extend(cols)
|
72 |
-
for label, default_value, col in zip(labels, default_values, extended):
|
73 |
-
with col:
|
74 |
-
result[label] = col.text_input(label, default_value, key=f"input_"+label+'_'+str(default_value))
|
75 |
-
return result.values()
|
76 |
-
|
77 |
-
def split_list(input_list, n):
|
78 |
-
# Calculate the size of each chunk
|
79 |
-
k, m = divmod(len(input_list), n)
|
80 |
-
# Use list comprehension to create n sublists
|
81 |
-
return [input_list[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)]
|
82 |
-
|
83 |
-
def get_ranges_from_stats(stats, tuple_values):
|
84 |
-
col_for_row = ", ".join([f"x[\'{i}\'].astype(float)" for i in tuple_values])
|
85 |
-
stats['range'] = stats.apply(lambda x: tuple([eval(col_for_row)]), axis=1)
|
86 |
-
#tasks = eval(f"list(itertools.product({(parameters*n_para_obj)[:-2]}))")
|
87 |
-
result = [f"np.around({x}, 2)" for x in stats['range']]
|
88 |
-
result = ", ".join(result)
|
89 |
-
return result
|
90 |
-
|
91 |
-
def create_objectives_grid(df, objectives, n_para_obj=2, method="combinatorial"):
|
92 |
-
if "combinatorial" in method:
|
93 |
-
sel_features = df.index.to_list()
|
94 |
-
parameters_o = "objectives, "
|
95 |
-
parameters = get_ranges_from_stats(df, sorted(objectives))
|
96 |
-
objectives = sorted(sel_features)
|
97 |
-
tasks = f"list(cproduct({parameters}))[0]"
|
98 |
-
|
99 |
-
elif method=="range-from-csv":
|
100 |
-
tasks = ""
|
101 |
-
for objective in objectives:
|
102 |
-
min_col, max_col, step_col = st.columns(3)
|
103 |
-
with min_col:
|
104 |
-
selcted_min = st.slider(objective+': min', min_value=float(df[objective].min()), max_value=float(df[objective].max()), value=df[objective].quantile(0.1), step=0.1, key=objective+"min")
|
105 |
-
with max_col:
|
106 |
-
selcted_max = st.slider('max', min_value=selcted_min, max_value=float(df[objective].max()), value=df[objective].quantile(0.9), step=0.1, key=objective+"max")
|
107 |
-
with step_col:
|
108 |
-
step_value = st.slider('step', min_value=float(df[objective].min()), max_value=float(df[objective].quantile(0.9)), value=df[objective].median()/(df[objective].min()+0.0001), step=0.01, key=objective+"step")
|
109 |
-
tasks += f"np.around(np.arange({selcted_min}, {selcted_max}+{step_value}, {step_value}),2), "
|
110 |
-
else :#method=="range-manual":
|
111 |
-
experitments = []
|
112 |
-
tasks=""
|
113 |
-
if objectives != None:
|
114 |
-
cross_labels = [feature[0]+': '+feature[1] for feature in list(cproduct(objectives,['min', 'max', 'step']))]
|
115 |
-
cross_values = [round(eval(str(combination[0])+combination[1]), 2) for combination in list(cproduct(list(df.values()), ['*1', '*2', '/3']))]
|
116 |
-
ranges = zip(objectives, split_list(list(input_multicolumn(cross_labels, cross_values, n_cols=3)), n_para_obj))
|
117 |
-
for objective, range_value in ranges:
|
118 |
-
selcted_min, selcted_max, step_value = range_value
|
119 |
-
tasks += f"np.around(np.arange({selcted_min}, {selcted_max}+{step_value}, {step_value}),2), "
|
120 |
-
|
121 |
-
try:
|
122 |
-
cartesian_product = list(cproduct(*eval(tasks)))
|
123 |
-
experiments = [{key: value[idx] for idx, key in enumerate(objectives)} for value in cartesian_product]
|
124 |
-
return experiments
|
125 |
-
except SyntaxError as e:
|
126 |
-
st.write("Please select valid features above.")
|
127 |
-
sys.exit(1)
|
128 |
-
except TypeError as e:
|
129 |
-
st.write("Please select at least 2 values to define.")
|
130 |
-
sys.exit(1)
|
131 |
-
|
132 |
-
def set_generator_experiments(generator_params):
|
133 |
-
def handle_csv_file(grid_option):
|
134 |
-
uploaded_file = st.file_uploader("Pick a csv-file containing feature values for features:", type="csv")
|
135 |
-
if uploaded_file is not None:
|
136 |
-
df = pd.read_csv(uploaded_file)
|
137 |
-
sel_features = st.multiselect("Selected features", list(df.columns))
|
138 |
-
if sel_features:
|
139 |
-
df = df[sel_features]
|
140 |
-
return df, sel_features
|
141 |
-
return None, None
|
142 |
-
|
143 |
-
def handle_combinatorial(sel_features, stats, tuple_values):
|
144 |
-
triangular_option = double_switch("Square", "Triangular")
|
145 |
-
if triangular_option:
|
146 |
-
experiments = []
|
147 |
-
elements = sel_features
|
148 |
-
# List to store all combinations
|
149 |
-
all_combinations = [combinations(sel_features, r) for r in range(1, len(sel_features) + 1)]
|
150 |
-
all_combinations = [comb for sublist in all_combinations for comb in sublist]
|
151 |
-
|
152 |
-
# Print or use the result as needed
|
153 |
-
for comb in all_combinations:
|
154 |
-
sel_stats = stats.loc[sorted(list(comb))]
|
155 |
-
experiments += create_objectives_grid(sel_stats, tuple_values, n_para_obj=len(tuple_values), method="combinatorial")
|
156 |
-
else: # Square
|
157 |
-
experiments = create_objectives_grid(stats, tuple_values, n_para_obj=len(tuple_values), method="combinatorial")
|
158 |
-
return experiments
|
159 |
-
|
160 |
-
def handle_csv_option(grid_option, df, sel_features):
|
161 |
-
if grid_option:
|
162 |
-
combinatorial = double_switch("Range", "Combinatorial")
|
163 |
-
if combinatorial:
|
164 |
-
add_quantile = st.slider('Add %-quantile', min_value=0.0, max_value=100.0, value=50.0, step=5.0)
|
165 |
-
stats = df.describe().transpose().sort_index()
|
166 |
-
stats[f"{int(add_quantile)}%"] = df.quantile(q=add_quantile / 100)
|
167 |
-
st.write(stats)
|
168 |
-
tuple_values = st.multiselect("Tuples including", list(stats.columns)[3:], default=['min', 'max'])
|
169 |
-
return handle_combinatorial(sel_features, stats, tuple_values)
|
170 |
-
else: # Range
|
171 |
-
return create_objectives_grid(df, sel_features, n_para_obj=len(sel_features), method="range-from-csv")
|
172 |
-
else: # Point
|
173 |
-
st.write(df)
|
174 |
-
return df.to_dict(orient='records')
|
175 |
-
|
176 |
-
def feature_select():
|
177 |
-
return st.multiselect("Selected features", list(generator_params['experiment'].keys()))
|
178 |
-
|
179 |
-
def handle_manual_option(grid_option):
|
180 |
-
if grid_option:
|
181 |
-
combinatorial = double_switch("Range", "Combinatorial")
|
182 |
-
if combinatorial:
|
183 |
-
col1, col2 = st.columns([1,4])
|
184 |
-
with col1:
|
185 |
-
num_values = st.number_input('How many values to define?', min_value=2, step=1)
|
186 |
-
with col2:
|
187 |
-
sel_features = feature_select()
|
188 |
-
|
189 |
-
values_indexes = ["value "+str(i+1) for i in range(num_values)]
|
190 |
-
values_defaults = ['*(1+2*0.'+str(i)+')' for i in range(num_values)]
|
191 |
-
cross_labels = [feature[0]+': '+feature[1] for feature in list(cproduct(sel_features,values_indexes))]
|
192 |
-
cross_values = [round(eval(str(combination[0])+combination[1]), 2) for combination in list(cproduct(list(generator_params['experiment'].values()), values_defaults))]
|
193 |
-
parameters = split_list(list(input_multicolumn(cross_labels, cross_values, n_cols=num_values)), len(sel_features))
|
194 |
-
tasks = f"list({parameters})"
|
195 |
-
|
196 |
-
tasks_df = pd.DataFrame(eval(tasks), index=sel_features, columns=values_indexes)
|
197 |
-
tasks_df = tasks_df.astype(float)
|
198 |
-
return handle_combinatorial(sel_features, tasks_df, values_indexes)
|
199 |
-
|
200 |
-
else: # Range
|
201 |
-
sel_features = feature_select()
|
202 |
-
return create_objectives_grid(generator_params['experiment'], sel_features, n_para_obj=len(sel_features), method="range-manual")
|
203 |
-
|
204 |
-
else: # Point
|
205 |
-
sel_features = feature_select()
|
206 |
-
#sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
|
207 |
-
|
208 |
-
experiment = {sel_feature: float(st.text_input(sel_feature, generator_params['experiment'][sel_feature])) for sel_feature in sel_features}
|
209 |
-
return [experiment]
|
210 |
-
return[]
|
211 |
-
|
212 |
-
|
213 |
-
grid_option, csv_option = double_switch("Point-", "Grid-based", third_label="Manual", fourth_label="From CSV")
|
214 |
-
|
215 |
-
if csv_option:
|
216 |
-
df, sel_features = handle_csv_file(grid_option)
|
217 |
-
if df is not None and sel_features is not None:
|
218 |
-
experiments = handle_csv_option(grid_option, df, sel_features)
|
219 |
-
else:
|
220 |
-
experiments = []
|
221 |
-
else: # Manual
|
222 |
-
experiments = handle_manual_option(grid_option)
|
223 |
-
|
224 |
-
generator_params['experiment'] = experiments
|
225 |
-
st.write(f"...result in {len(generator_params['experiment'])} experiment(s)")
|
226 |
-
|
227 |
-
"""
|
228 |
-
#### Configuration space
|
229 |
-
"""
|
230 |
-
updated_values = input_multicolumn(generator_params['config_space'].keys(), generator_params['config_space'].values())
|
231 |
-
for key, new_value in zip(generator_params['config_space'].keys(), updated_values):
|
232 |
-
generator_params['config_space'][key] = eval(new_value)
|
233 |
-
generator_params['n_trials'] = int(st.text_input('n_trials', generator_params['n_trials']))
|
234 |
-
|
235 |
-
return generator_params
|
236 |
-
|
237 |
-
if __name__ == '__main__':
|
238 |
-
config_layout = json.load(open("config_files/config_layout.json"))
|
239 |
-
type(config_layout)
|
240 |
-
step_candidates = ["instance_augmentation","event_logs_generation","feature_extraction","benchmark_test"]
|
241 |
-
pipeline_steps = st.multiselect(
|
242 |
-
"Choose pipeline step",
|
243 |
-
step_candidates,
|
244 |
-
["event_logs_generation"]
|
245 |
-
)
|
246 |
-
step_configs = []
|
247 |
-
set_col, view_col = st.columns([3, 2])
|
248 |
-
for pipeline_step in pipeline_steps:
|
249 |
-
step_config = [d for d in config_layout if d['pipeline_step'] == pipeline_step][0]
|
250 |
-
with set_col:
|
251 |
-
st.header(pipeline_step)
|
252 |
-
for step_key in step_config.keys():
|
253 |
-
if step_key == "generator_params":
|
254 |
-
st.subheader("Set-up experiments")
|
255 |
-
step_config[step_key] = set_generator_experiments(step_config[step_key])
|
256 |
-
elif step_key == "feature_params":
|
257 |
-
layout_features = list(step_config[step_key]['feature_set'])
|
258 |
-
step_config[step_key]["feature_set"] = st.multiselect(
|
259 |
-
"features to extract",
|
260 |
-
layout_features)
|
261 |
-
elif step_key != "pipeline_step":
|
262 |
-
step_config[step_key] = st.text_input(step_key, step_config[step_key])
|
263 |
-
with view_col:
|
264 |
-
st.write(step_config)
|
265 |
-
step_configs.append(step_config)
|
266 |
-
config_file = json.dumps(step_configs, indent=4)
|
267 |
-
output_path = st.text_input("Output file path", "config_files/experiment_config.json")
|
268 |
-
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
269 |
-
save_labels = ["Save config file", "Save and run config_file"]
|
270 |
-
save_labels = ["Save configuration file"]
|
271 |
-
#create_button, create_run_button = multi_button(save_labels)
|
272 |
-
create_button = multi_button(save_labels)
|
273 |
-
# FIXME: Bug: automatically updates the experiment_config.json file even without pressing the save button
|
274 |
-
if create_button: # or create_run_button:
|
275 |
-
with open(output_path, "w") as f:
|
276 |
-
f.write(config_file)
|
277 |
-
st.write("Saved configuration in ", output_path, ". Run command:")
|
278 |
-
#if create_run_button:
|
279 |
-
if True:
|
280 |
-
var = f"python -W ignore main.py -a {output_path}"
|
281 |
-
st.code(var, language='bash')
|
282 |
-
if False: #FIXME: Command fails when using multiprocessing
|
283 |
-
command = var.split()
|
284 |
-
|
285 |
-
# Run the command
|
286 |
-
result = subprocess.run(command, capture_output=True, text=True)
|
287 |
-
|
288 |
-
if len(result.stderr)==0:
|
289 |
-
st.write(result.stdout)
|
290 |
-
else:
|
291 |
-
st.write("ERROR: ", result.stderr)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/config_fabric.py
CHANGED
@@ -8,7 +8,6 @@ import json
|
|
8 |
import math
|
9 |
import os
|
10 |
import pandas as pd
|
11 |
-
import pm4py
|
12 |
import random
|
13 |
import streamlit as st
|
14 |
import subprocess
|
|
|
8 |
import math
|
9 |
import os
|
10 |
import pandas as pd
|
|
|
11 |
import random
|
12 |
import streamlit as st
|
13 |
import subprocess
|