Andrea Maldonado commited on
Commit
987eb53
·
1 Parent(s): 23787b5

HF changes

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +0 -291
  3. utils/config_fabric.py +0 -1
README.md CHANGED
@@ -5,7 +5,7 @@ colorFrom: indigo
5
  colorTo: blue
6
  sdk: streamlit
7
  sdk_version: 1.37.1
8
- app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
 
5
  colorTo: blue
6
  sdk: streamlit
7
  sdk_version: 1.37.1
8
+ app_file: utils/config_fabric.py
9
  pinned: false
10
  license: mit
11
  ---
app.py DELETED
@@ -1,291 +0,0 @@
1
- from copy import deepcopy
2
- from importlib import reload
3
- from itertools import product as cproduct
4
- from itertools import combinations
5
- from pylab import *
6
- import itertools
7
- import json
8
- import math
9
- import os
10
- import pandas as pd
11
- import pm4py
12
- import random
13
- import streamlit as st
14
- import subprocess
15
-
16
- st.set_page_config(layout='wide')
17
- INPUT_XES="output/inputlog_temp.xes"
18
-
19
- """
20
- # Configuration File fabric for
21
- ## GEDI: **G**enerating **E**vent **D**ata with **I**ntentional Features for Benchmarking Process Mining
22
- """
23
- def double_switch(label_left, label_right, third_label=None, fourth_label=None):
24
- if third_label==None and fourth_label==None:
25
- # Create two columns for the labels and toggle switch
26
- col0, col1, col2, col3, col4 = st.columns([2,1,1,1,2])
27
- else:
28
- # Create two columns for the labels and toggle switch
29
- col0, col1, col2, col3, col4, col5, col6, col7, col8 = st.columns([1,1,1,1,1,1,1,1,1])
30
-
31
- # Add labels to the columns
32
- with col1:
33
- st.write(label_left)
34
-
35
- with col2:
36
- # Create the toggle switch
37
- toggle_option = st.toggle(" ",value=False,
38
- key="toggle_switch_"+label_left,
39
- )
40
-
41
- with col3:
42
- st.write(label_right)
43
- if third_label is None and fourth_label is None:return toggle_option
44
- else:
45
- with col5:
46
- st.write(third_label)
47
-
48
- with col6:
49
- # Create the toggle switch
50
- toggle_option_2 = st.toggle(" ",value=False,
51
- key="toggle_switch_"+third_label,
52
- )
53
-
54
- with col7:
55
- st.write(fourth_label)
56
- return toggle_option, toggle_option_2
57
-
58
- def multi_button(labels):
59
- cols = st.columns(len(labels))
60
- activations = []
61
- for col, label in zip(cols, labels):
62
- activations.append(col.button(label))
63
- return activations
64
-
65
- def input_multicolumn(labels, default_values, n_cols=5):
66
- result = {}
67
- cols = st.columns(n_cols)
68
- factor = math.ceil(len(labels)/n_cols)
69
- extended = cols.copy()
70
- for _ in range(factor):
71
- extended.extend(cols)
72
- for label, default_value, col in zip(labels, default_values, extended):
73
- with col:
74
- result[label] = col.text_input(label, default_value, key=f"input_"+label+'_'+str(default_value))
75
- return result.values()
76
-
77
- def split_list(input_list, n):
78
- # Calculate the size of each chunk
79
- k, m = divmod(len(input_list), n)
80
- # Use list comprehension to create n sublists
81
- return [input_list[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)]
82
-
83
- def get_ranges_from_stats(stats, tuple_values):
84
- col_for_row = ", ".join([f"x[\'{i}\'].astype(float)" for i in tuple_values])
85
- stats['range'] = stats.apply(lambda x: tuple([eval(col_for_row)]), axis=1)
86
- #tasks = eval(f"list(itertools.product({(parameters*n_para_obj)[:-2]}))")
87
- result = [f"np.around({x}, 2)" for x in stats['range']]
88
- result = ", ".join(result)
89
- return result
90
-
91
- def create_objectives_grid(df, objectives, n_para_obj=2, method="combinatorial"):
92
- if "combinatorial" in method:
93
- sel_features = df.index.to_list()
94
- parameters_o = "objectives, "
95
- parameters = get_ranges_from_stats(df, sorted(objectives))
96
- objectives = sorted(sel_features)
97
- tasks = f"list(cproduct({parameters}))[0]"
98
-
99
- elif method=="range-from-csv":
100
- tasks = ""
101
- for objective in objectives:
102
- min_col, max_col, step_col = st.columns(3)
103
- with min_col:
104
- selcted_min = st.slider(objective+': min', min_value=float(df[objective].min()), max_value=float(df[objective].max()), value=df[objective].quantile(0.1), step=0.1, key=objective+"min")
105
- with max_col:
106
- selcted_max = st.slider('max', min_value=selcted_min, max_value=float(df[objective].max()), value=df[objective].quantile(0.9), step=0.1, key=objective+"max")
107
- with step_col:
108
- step_value = st.slider('step', min_value=float(df[objective].min()), max_value=float(df[objective].quantile(0.9)), value=df[objective].median()/(df[objective].min()+0.0001), step=0.01, key=objective+"step")
109
- tasks += f"np.around(np.arange({selcted_min}, {selcted_max}+{step_value}, {step_value}),2), "
110
- else :#method=="range-manual":
111
- experitments = []
112
- tasks=""
113
- if objectives != None:
114
- cross_labels = [feature[0]+': '+feature[1] for feature in list(cproduct(objectives,['min', 'max', 'step']))]
115
- cross_values = [round(eval(str(combination[0])+combination[1]), 2) for combination in list(cproduct(list(df.values()), ['*1', '*2', '/3']))]
116
- ranges = zip(objectives, split_list(list(input_multicolumn(cross_labels, cross_values, n_cols=3)), n_para_obj))
117
- for objective, range_value in ranges:
118
- selcted_min, selcted_max, step_value = range_value
119
- tasks += f"np.around(np.arange({selcted_min}, {selcted_max}+{step_value}, {step_value}),2), "
120
-
121
- try:
122
- cartesian_product = list(cproduct(*eval(tasks)))
123
- experiments = [{key: value[idx] for idx, key in enumerate(objectives)} for value in cartesian_product]
124
- return experiments
125
- except SyntaxError as e:
126
- st.write("Please select valid features above.")
127
- sys.exit(1)
128
- except TypeError as e:
129
- st.write("Please select at least 2 values to define.")
130
- sys.exit(1)
131
-
132
- def set_generator_experiments(generator_params):
133
- def handle_csv_file(grid_option):
134
- uploaded_file = st.file_uploader("Pick a csv-file containing feature values for features:", type="csv")
135
- if uploaded_file is not None:
136
- df = pd.read_csv(uploaded_file)
137
- sel_features = st.multiselect("Selected features", list(df.columns))
138
- if sel_features:
139
- df = df[sel_features]
140
- return df, sel_features
141
- return None, None
142
-
143
- def handle_combinatorial(sel_features, stats, tuple_values):
144
- triangular_option = double_switch("Square", "Triangular")
145
- if triangular_option:
146
- experiments = []
147
- elements = sel_features
148
- # List to store all combinations
149
- all_combinations = [combinations(sel_features, r) for r in range(1, len(sel_features) + 1)]
150
- all_combinations = [comb for sublist in all_combinations for comb in sublist]
151
-
152
- # Print or use the result as needed
153
- for comb in all_combinations:
154
- sel_stats = stats.loc[sorted(list(comb))]
155
- experiments += create_objectives_grid(sel_stats, tuple_values, n_para_obj=len(tuple_values), method="combinatorial")
156
- else: # Square
157
- experiments = create_objectives_grid(stats, tuple_values, n_para_obj=len(tuple_values), method="combinatorial")
158
- return experiments
159
-
160
- def handle_csv_option(grid_option, df, sel_features):
161
- if grid_option:
162
- combinatorial = double_switch("Range", "Combinatorial")
163
- if combinatorial:
164
- add_quantile = st.slider('Add %-quantile', min_value=0.0, max_value=100.0, value=50.0, step=5.0)
165
- stats = df.describe().transpose().sort_index()
166
- stats[f"{int(add_quantile)}%"] = df.quantile(q=add_quantile / 100)
167
- st.write(stats)
168
- tuple_values = st.multiselect("Tuples including", list(stats.columns)[3:], default=['min', 'max'])
169
- return handle_combinatorial(sel_features, stats, tuple_values)
170
- else: # Range
171
- return create_objectives_grid(df, sel_features, n_para_obj=len(sel_features), method="range-from-csv")
172
- else: # Point
173
- st.write(df)
174
- return df.to_dict(orient='records')
175
-
176
- def feature_select():
177
- return st.multiselect("Selected features", list(generator_params['experiment'].keys()))
178
-
179
- def handle_manual_option(grid_option):
180
- if grid_option:
181
- combinatorial = double_switch("Range", "Combinatorial")
182
- if combinatorial:
183
- col1, col2 = st.columns([1,4])
184
- with col1:
185
- num_values = st.number_input('How many values to define?', min_value=2, step=1)
186
- with col2:
187
- sel_features = feature_select()
188
-
189
- values_indexes = ["value "+str(i+1) for i in range(num_values)]
190
- values_defaults = ['*(1+2*0.'+str(i)+')' for i in range(num_values)]
191
- cross_labels = [feature[0]+': '+feature[1] for feature in list(cproduct(sel_features,values_indexes))]
192
- cross_values = [round(eval(str(combination[0])+combination[1]), 2) for combination in list(cproduct(list(generator_params['experiment'].values()), values_defaults))]
193
- parameters = split_list(list(input_multicolumn(cross_labels, cross_values, n_cols=num_values)), len(sel_features))
194
- tasks = f"list({parameters})"
195
-
196
- tasks_df = pd.DataFrame(eval(tasks), index=sel_features, columns=values_indexes)
197
- tasks_df = tasks_df.astype(float)
198
- return handle_combinatorial(sel_features, tasks_df, values_indexes)
199
-
200
- else: # Range
201
- sel_features = feature_select()
202
- return create_objectives_grid(generator_params['experiment'], sel_features, n_para_obj=len(sel_features), method="range-manual")
203
-
204
- else: # Point
205
- sel_features = feature_select()
206
- #sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
207
-
208
- experiment = {sel_feature: float(st.text_input(sel_feature, generator_params['experiment'][sel_feature])) for sel_feature in sel_features}
209
- return [experiment]
210
- return[]
211
-
212
-
213
- grid_option, csv_option = double_switch("Point-", "Grid-based", third_label="Manual", fourth_label="From CSV")
214
-
215
- if csv_option:
216
- df, sel_features = handle_csv_file(grid_option)
217
- if df is not None and sel_features is not None:
218
- experiments = handle_csv_option(grid_option, df, sel_features)
219
- else:
220
- experiments = []
221
- else: # Manual
222
- experiments = handle_manual_option(grid_option)
223
-
224
- generator_params['experiment'] = experiments
225
- st.write(f"...result in {len(generator_params['experiment'])} experiment(s)")
226
-
227
- """
228
- #### Configuration space
229
- """
230
- updated_values = input_multicolumn(generator_params['config_space'].keys(), generator_params['config_space'].values())
231
- for key, new_value in zip(generator_params['config_space'].keys(), updated_values):
232
- generator_params['config_space'][key] = eval(new_value)
233
- generator_params['n_trials'] = int(st.text_input('n_trials', generator_params['n_trials']))
234
-
235
- return generator_params
236
-
237
- if __name__ == '__main__':
238
- config_layout = json.load(open("config_files/config_layout.json"))
239
- type(config_layout)
240
- step_candidates = ["instance_augmentation","event_logs_generation","feature_extraction","benchmark_test"]
241
- pipeline_steps = st.multiselect(
242
- "Choose pipeline step",
243
- step_candidates,
244
- ["event_logs_generation"]
245
- )
246
- step_configs = []
247
- set_col, view_col = st.columns([3, 2])
248
- for pipeline_step in pipeline_steps:
249
- step_config = [d for d in config_layout if d['pipeline_step'] == pipeline_step][0]
250
- with set_col:
251
- st.header(pipeline_step)
252
- for step_key in step_config.keys():
253
- if step_key == "generator_params":
254
- st.subheader("Set-up experiments")
255
- step_config[step_key] = set_generator_experiments(step_config[step_key])
256
- elif step_key == "feature_params":
257
- layout_features = list(step_config[step_key]['feature_set'])
258
- step_config[step_key]["feature_set"] = st.multiselect(
259
- "features to extract",
260
- layout_features)
261
- elif step_key != "pipeline_step":
262
- step_config[step_key] = st.text_input(step_key, step_config[step_key])
263
- with view_col:
264
- st.write(step_config)
265
- step_configs.append(step_config)
266
- config_file = json.dumps(step_configs, indent=4)
267
- output_path = st.text_input("Output file path", "config_files/experiment_config.json")
268
- os.makedirs(os.path.dirname(output_path), exist_ok=True)
269
- save_labels = ["Save config file", "Save and run config_file"]
270
- save_labels = ["Save configuration file"]
271
- #create_button, create_run_button = multi_button(save_labels)
272
- create_button = multi_button(save_labels)
273
- # FIXME: Bug: automatically updates the experiment_config.json file even without pressing the save button
274
- if create_button: # or create_run_button:
275
- with open(output_path, "w") as f:
276
- f.write(config_file)
277
- st.write("Saved configuration in ", output_path, ". Run command:")
278
- #if create_run_button:
279
- if True:
280
- var = f"python -W ignore main.py -a {output_path}"
281
- st.code(var, language='bash')
282
- if False: #FIXME: Command fails when using multiprocessing
283
- command = var.split()
284
-
285
- # Run the command
286
- result = subprocess.run(command, capture_output=True, text=True)
287
-
288
- if len(result.stderr)==0:
289
- st.write(result.stdout)
290
- else:
291
- st.write("ERROR: ", result.stderr)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/config_fabric.py CHANGED
@@ -8,7 +8,6 @@ import json
8
  import math
9
  import os
10
  import pandas as pd
11
- import pm4py
12
  import random
13
  import streamlit as st
14
  import subprocess
 
8
  import math
9
  import os
10
  import pandas as pd
 
11
  import random
12
  import streamlit as st
13
  import subprocess