Andrea Maldonado commited on
Commit
a281f3a
·
1 Parent(s): 1539c36

Impelements manual options in dashboard

Browse files
Files changed (1) hide show
  1. utils/config_fabric.py +81 -36
utils/config_fabric.py CHANGED
@@ -4,6 +4,7 @@ from itertools import product, combinations
4
  from pylab import *
5
  import itertools
6
  import json
 
7
  import os
8
  import pandas as pd
9
  import pm4py
@@ -17,9 +18,13 @@ INPUT_XES="output/inputlog_temp.xes"
17
  # Configuration File fabric for
18
  ## GEDI: **G**enerating **E**vent **D**ata with **I**ntentional Features for Benchmarking Process Mining
19
  """
20
- def double_switch(label_left, label_right):
21
- # Create two columns for the labels and toggle switch
22
- col0, col1, col2, col3, col4 = st.columns([4,1, 1, 1,4])
 
 
 
 
23
 
24
  # Add labels to the columns
25
  with col1:
@@ -33,12 +38,40 @@ def double_switch(label_left, label_right):
33
 
34
  with col3:
35
  st.write(label_right)
36
- return toggle_option
37
-
38
- def view(config_file):
39
- st.write(config_file)
40
-
41
- def get_ranges(stats, tuple_values):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  col_for_row = ", ".join([f"x[\'{i}\'].astype(float)" for i in tuple_values])
43
  stats['range'] = stats.apply(lambda x: tuple([eval(col_for_row)]), axis=1)
44
  #tasks = eval(f"list(itertools.product({(parameters*n_para_obj)[:-2]}))")
@@ -51,11 +84,10 @@ def create_objectives_grid(df, objectives, n_para_obj=2, method="combinatorial")
51
  #if n_para_obj==len(objectives):
52
  sel_features = df.index.to_list()
53
  parameters_o = "objectives, "
54
- parameters = get_ranges(df, sorted(objectives))
55
  tasks = f"list(itertools.product({parameters}))[0]"
56
 
57
- else:
58
- sel_features = objectives
59
  tasks = ""
60
  for objective in objectives:
61
  min_col, max_col, step_col = st.columns(3)
@@ -66,19 +98,26 @@ def create_objectives_grid(df, objectives, n_para_obj=2, method="combinatorial")
66
  with step_col:
67
  step_value = st.slider('step', min_value=float(df[objective].min()), max_value=float(df[objective].quantile(0.9)), value=df[objective].median()/df[objective].min(), step=0.01, key=objective+"step")
68
  tasks += f"np.around(np.arange({selcted_min}, {selcted_max}+{step_value}, {step_value}),2), "
 
 
 
 
 
 
 
 
 
 
69
 
70
  cartesian_product = list(product(*eval(tasks)))
71
- experiments = [{key: value[idx] for idx, key in enumerate(sel_features)} for value in cartesian_product]
72
  return experiments
73
 
74
- def set_up(generator_params):
75
  create_button = False
76
  experiments = []
77
 
78
- col1, col2 = st.columns(2)
79
- if True:
80
- grid_option = double_switch("Point-", "Grid-based")
81
- csv_option = double_switch("Manual", "From CSV")
82
  if csv_option:
83
  uploaded_file = st.file_uploader(f"Pick a csv-file containing feature values for features:", type="csv")
84
  if uploaded_file is not None:
@@ -92,7 +131,7 @@ def set_up(generator_params):
92
  add_quantile = st.slider('Add %-quantile', min_value=0.0, max_value=100.0, value=50.0, step=5.0)
93
  stats = df.describe().transpose()
94
  stats[str(int(add_quantile))+"%"] = df.quantile(q=add_quantile/100)
95
- view(stats)
96
  tuple_values = st.multiselect("Tuples including", list(stats.columns)[3:], default=['min', 'max'])
97
  triangular_option = double_switch("Square", "Triangular")
98
  if triangular_option:
@@ -100,7 +139,7 @@ def set_up(generator_params):
100
  # List to store all combinations
101
  all_combinations = []
102
 
103
- # Generate combinations of length 1, 2, and 3
104
  for r in range(1, len(elements) + 1):
105
  # Generate combinations of length r
106
  combinations_r = list(combinations(elements, r))
@@ -109,29 +148,35 @@ def set_up(generator_params):
109
  # Print or use the result as needed
110
  for comb in all_combinations:
111
  sel_stats = stats.loc[list(comb)]
112
- experiments += create_objectives_grid(sel_stats, tuple_values, n_para_obj=len(tuple_values))
113
- else:
114
- experiments = create_objectives_grid(stats, tuple_values, n_para_obj=len(tuple_values))
115
- else:
116
- experiments = create_objectives_grid(df, sel_features, n_para_obj=len(sel_features), method="range")
117
- else:
118
- view(df)
119
  experiments = df.to_dict(orient='records')
 
120
  else:
121
  sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
 
122
  if sel_features != None:
123
- for sel_feature in sel_features:
124
- generator_params['experiment'][sel_feature] = float(st.text_input(sel_feature, generator_params['experiment'][sel_feature]))
 
 
 
 
 
125
  generator_params['experiment'] = experiments
126
- st.write(f"...result in {len(generator_params['experiment'])} experiments")
127
 
128
  """
129
  #### Configuration space
130
  """
131
- for key in generator_params['config_space'].keys():
132
- generator_params['config_space'][key] = st.text_input(key, generator_params['config_space'][key])
133
-
134
- #generator_params['config_space'] = st.text_input('config_space', generator_params['config_space'])
135
  generator_params['n_trials'] = int(st.text_input('n_trials', generator_params['n_trials']))
136
  return generator_params
137
 
@@ -153,11 +198,11 @@ if __name__ == '__main__':
153
  for step_key in step_config.keys():
154
  if step_key == "generator_params":
155
  st.subheader("Set-up experiments")
156
- step_config[step_key] = set_up(step_config[step_key])
157
  elif step_key != "pipeline_step":
158
  step_config[step_key] = st.text_input(step_key, step_config[step_key])
159
  with view_col:
160
- view(step_config)
161
  step_configs.append(step_config)
162
  config_file = json.dumps(step_configs, indent=4)
163
  output_path = st.text_input("Output file path", "config_files/experiment_config.json")
 
4
  from pylab import *
5
  import itertools
6
  import json
7
+ import math
8
  import os
9
  import pandas as pd
10
  import pm4py
 
18
  # Configuration File fabric for
19
  ## GEDI: **G**enerating **E**vent **D**ata with **I**ntentional Features for Benchmarking Process Mining
20
  """
21
+ def double_switch(label_left, label_right, third_label=None, fourth_label=None):
22
+ if third_label==None and fourth_label==None:
23
+ # Create two columns for the labels and toggle switch
24
+ col0, col1, col2, col3, col4 = st.columns([2,1,1,1,2])
25
+ else:
26
+ # Create two columns for the labels and toggle switch
27
+ col0, col1, col2, col3, col4, col5, col6, col7, col8 = st.columns([1,1,1,1,1,1,1,1,1])
28
 
29
  # Add labels to the columns
30
  with col1:
 
38
 
39
  with col3:
40
  st.write(label_right)
41
+ if third_label is None and fourth_label is None:return toggle_option
42
+ else:
43
+ with col5:
44
+ st.write(third_label)
45
+
46
+ with col6:
47
+ # Create the toggle switch
48
+ toggle_option_2 = st.toggle(" ",value=False,
49
+ key="toggle_switch_"+third_label,
50
+ )
51
+
52
+ with col7:
53
+ st.write(fourth_label)
54
+ return toggle_option, toggle_option_2
55
+
56
+ def input_multicolumn(labels, default_values, n_cols=5):
57
+ result = {}
58
+ cols = st.columns(n_cols)
59
+ factor = math.ceil(len(labels)/n_cols)
60
+ extended = cols.copy()
61
+ for _ in range(factor):
62
+ extended.extend(cols)
63
+ for label, default_value, col in zip(labels, default_values, extended):
64
+ with col:
65
+ result[label] = col.text_input(label, default_value, key=f"input_"+label+'_'+str(default_value))
66
+ return result.values()
67
+
68
+ def split_list(input_list, n):
69
+ # Calculate the size of each chunk
70
+ k, m = divmod(len(input_list), n)
71
+ # Use list comprehension to create n sublists
72
+ return [input_list[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)]
73
+
74
+ def get_ranges_from_stats(stats, tuple_values):
75
  col_for_row = ", ".join([f"x[\'{i}\'].astype(float)" for i in tuple_values])
76
  stats['range'] = stats.apply(lambda x: tuple([eval(col_for_row)]), axis=1)
77
  #tasks = eval(f"list(itertools.product({(parameters*n_para_obj)[:-2]}))")
 
84
  #if n_para_obj==len(objectives):
85
  sel_features = df.index.to_list()
86
  parameters_o = "objectives, "
87
+ parameters = get_ranges_from_stats(df, sorted(objectives))
88
  tasks = f"list(itertools.product({parameters}))[0]"
89
 
90
+ elif method=="range-from-csv":
 
91
  tasks = ""
92
  for objective in objectives:
93
  min_col, max_col, step_col = st.columns(3)
 
98
  with step_col:
99
  step_value = st.slider('step', min_value=float(df[objective].min()), max_value=float(df[objective].quantile(0.9)), value=df[objective].median()/df[objective].min(), step=0.01, key=objective+"step")
100
  tasks += f"np.around(np.arange({selcted_min}, {selcted_max}+{step_value}, {step_value}),2), "
101
+ else :#method=="range-manual":
102
+ experitments = []
103
+ tasks=""
104
+ if objectives != None:
105
+ cross_labels = [feature[0]+': '+feature[1] for feature in list(product(objectives,['min', 'max', 'step']))]
106
+ cross_values = [round(eval(str(combination[0])+combination[1]), 2) for combination in list(product(list(df.values()), ['*1', '*2', '/3']))]
107
+ ranges = zip(objectives, split_list(list(input_multicolumn(cross_labels, cross_values, n_cols=3)), n_para_obj))
108
+ for objective, range_value in ranges:
109
+ selcted_min, selcted_max, step_value = range_value
110
+ tasks += f"np.around(np.arange({selcted_min}, {selcted_max}+{step_value}, {step_value}),2), "
111
 
112
  cartesian_product = list(product(*eval(tasks)))
113
+ experiments = [{key: value[idx] for idx, key in enumerate(objectives)} for value in cartesian_product]
114
  return experiments
115
 
116
+ def set_generator_experiments(generator_params):
117
  create_button = False
118
  experiments = []
119
 
120
+ grid_option, csv_option = double_switch("Point-", "Grid-based", third_label="Manual", fourth_label="From CSV")
 
 
 
121
  if csv_option:
122
  uploaded_file = st.file_uploader(f"Pick a csv-file containing feature values for features:", type="csv")
123
  if uploaded_file is not None:
 
131
  add_quantile = st.slider('Add %-quantile', min_value=0.0, max_value=100.0, value=50.0, step=5.0)
132
  stats = df.describe().transpose()
133
  stats[str(int(add_quantile))+"%"] = df.quantile(q=add_quantile/100)
134
+ st.write(stats)
135
  tuple_values = st.multiselect("Tuples including", list(stats.columns)[3:], default=['min', 'max'])
136
  triangular_option = double_switch("Square", "Triangular")
137
  if triangular_option:
 
139
  # List to store all combinations
140
  all_combinations = []
141
 
142
+ # Generate combinations of length 1, 2, ... and len(elements)
143
  for r in range(1, len(elements) + 1):
144
  # Generate combinations of length r
145
  combinations_r = list(combinations(elements, r))
 
148
  # Print or use the result as needed
149
  for comb in all_combinations:
150
  sel_stats = stats.loc[list(comb)]
151
+ experiments += create_objectives_grid(sel_stats, tuple_values, n_para_obj=len(tuple_values), method="combinatorial")
152
+ else: #Square
153
+ experiments = create_objectives_grid(stats, tuple_values, n_para_obj=len(tuple_values), method="combinatorial")
154
+ else: #Range
155
+ experiments = create_objectives_grid(df, sel_features, n_para_obj=len(sel_features), method="range-from-csv")
156
+ else: #Point
157
+ st.write(df)
158
  experiments = df.to_dict(orient='records')
159
+ #Manual
160
  else:
161
  sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
162
+ experitments = []
163
  if sel_features != None:
164
+ if grid_option:
165
+ experiments = create_objectives_grid(generator_params['experiment'], sel_features, n_para_obj=len(sel_features), method="range-manual")
166
+ else:
167
+ experiment = {}
168
+ for sel_feature in sel_features:
169
+ experiment[sel_feature] = float(st.text_input(sel_feature, generator_params['experiment'][sel_feature]))
170
+ experiments.append(experiment)
171
  generator_params['experiment'] = experiments
172
+ st.write(f"...result in {len(generator_params['experiment'])} experiment(s)")
173
 
174
  """
175
  #### Configuration space
176
  """
177
+ updated_values = input_multicolumn(generator_params['config_space'].keys(), generator_params['config_space'].values())
178
+ for key, new_value in zip(generator_params['config_space'].keys(), updated_values):
179
+ generator_params['config_space'][key] = new_value
 
180
  generator_params['n_trials'] = int(st.text_input('n_trials', generator_params['n_trials']))
181
  return generator_params
182
 
 
198
  for step_key in step_config.keys():
199
  if step_key == "generator_params":
200
  st.subheader("Set-up experiments")
201
+ step_config[step_key] = set_generator_experiments(step_config[step_key])
202
  elif step_key != "pipeline_step":
203
  step_config[step_key] = st.text_input(step_key, step_config[step_key])
204
  with view_col:
205
+ st.write(step_config)
206
  step_configs.append(step_config)
207
  config_file = json.dumps(step_configs, indent=4)
208
  output_path = st.text_input("Output file path", "config_files/experiment_config.json")