Spaces:
Sleeping
Sleeping
Merge pull request #7 from andreamalhera/6-frontend-for-configfiles
Browse files- .conda.yml +22 -0
- .gitignore +7 -6
- README.md +10 -2
- config_files/config_layout.json +48 -0
- main.py +1 -1
- setup.py +1 -0
- utils/config_fabric.py +258 -0
- merge_csvs.py β utils/merge_csvs.py +0 -0
- merge_jsons.py β utils/merge_jsons.py +0 -0
.conda.yml
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: gedi
|
2 |
+
channels:
|
3 |
+
- conda-forge
|
4 |
+
dependencies:
|
5 |
+
- python=3.9
|
6 |
+
- numpy=1.23.1
|
7 |
+
- scikit-learn=1.2.2
|
8 |
+
- scipy
|
9 |
+
- pandas
|
10 |
+
- matplotlib
|
11 |
+
- pip
|
12 |
+
- pip:
|
13 |
+
- pm4py==2.7.2
|
14 |
+
- imblearn
|
15 |
+
- seaborn
|
16 |
+
- feeed
|
17 |
+
- smac
|
18 |
+
- ConfigSpace
|
19 |
+
- tqdm
|
20 |
+
- Levenshtein
|
21 |
+
- streamlit
|
22 |
+
- streamlit-toggle-switch
|
.gitignore
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
-
|
2 |
-
data/
|
3 |
-
output/
|
4 |
.ipynb_checkpoints/
|
5 |
-
notebooks/.ipynb_checkpoints/*
|
6 |
-
gedi.egg-info/
|
7 |
build/
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.pyc
|
|
|
|
|
2 |
.ipynb_checkpoints/
|
|
|
|
|
3 |
build/
|
4 |
+
config_files/algorithm/grid_*/*
|
5 |
+
data/
|
6 |
+
gedi.egg-info/
|
7 |
+
notebooks/.ipynb_checkpoints/*
|
8 |
+
output/
|
9 |
+
smac3_output/
|
README.md
CHANGED
@@ -63,7 +63,7 @@ The JSON file consists of the following key-value pairs:
|
|
63 |
- real_eventlog_path: defines the file with the features extracted from the real event logs
|
64 |
- plot_type: defines the style of the output plotting (possible values: violinplot, boxplot)
|
65 |
- font_size: label font size of the output plot
|
66 |
-
-
|
67 |
|
68 |
|
69 |
### Generation
|
@@ -153,8 +153,16 @@ To execute the experiments with grid targets, a single [configuration](config_fi
|
|
153 |
conda activate gedi
|
154 |
python execute_grid_experiments.py config_files/algorithm/grid_2obj
|
155 |
```
|
156 |
-
We employ the [experiment_grid_2obj_configfiles_fabric.ipynb](notebooks/experiment_grid_2obj_configfiles_fabric.ipynb) to create all necessary [configuration](config_files/algorithm/grid_2obj) and [objective](data/grid_2obj) files for this experiment.
|
|
|
|
|
|
|
|
|
157 |
|
|
|
|
|
|
|
|
|
158 |
### Visualizations
|
159 |
To run the visualizations, we employ [jupyter notebooks](https://jupyter.org/install) and [add the installed environment to the jupyter notebook](https://medium.com/@nrk25693/how-to-add-your-conda-environment-to-your-jupyter-notebook-in-just-4-steps-abeab8b8d084). We then start all visualizations by running e.g.: `jupyter noteboook`. In the following, we describe the `.ipynb`-files in the folder `\notebooks` to reproduce the figures from our paper.
|
160 |
|
|
|
63 |
- real_eventlog_path: defines the file with the features extracted from the real event logs
|
64 |
- plot_type: defines the style of the output plotting (possible values: violinplot, boxplot)
|
65 |
- font_size: label font size of the output plot
|
66 |
+
- boxplot_width: width of the violinplot/boxplot
|
67 |
|
68 |
|
69 |
### Generation
|
|
|
153 |
conda activate gedi
|
154 |
python execute_grid_experiments.py config_files/algorithm/grid_2obj
|
155 |
```
|
156 |
+
We employ the [experiment_grid_2obj_configfiles_fabric.ipynb](notebooks/experiment_grid_2obj_configfiles_fabric.ipynb) to create all necessary [configuration](config_files/algorithm/grid_2obj) and [objective](data/grid_2obj) files for this experiment.
|
157 |
+
For more details about these config_files, please refer to [Feature Extraction](#feature-extraction), [Generation](#generation), and [Benchmark](#benchmark).
|
158 |
+
To create configuration files for grid objectives interactively, you can use the start the following dashboard:
|
159 |
+
```
|
160 |
+
streamlit run utils/config_fabric.py # To tunnel to local machine add: --server.port 8501 --server.headless true
|
161 |
|
162 |
+
# In local machine (only in case you are tunneling):
|
163 |
+
ssh -N -f -L 9000:localhost:8501 <user@remote_machine.com>
|
164 |
+
open "http://localhost:9000/"
|
165 |
+
```
|
166 |
### Visualizations
|
167 |
To run the visualizations, we employ [jupyter notebooks](https://jupyter.org/install) and [add the installed environment to the jupyter notebook](https://medium.com/@nrk25693/how-to-add-your-conda-environment-to-your-jupyter-notebook-in-just-4-steps-abeab8b8d084). We then start all visualizations by running e.g.: `jupyter noteboook`. In the following, we describe the `.ipynb`-files in the folder `\notebooks` to reproduce the figures from our paper.
|
168 |
|
config_files/config_layout.json
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"pipeline_step": "instance_augmentation",
|
4 |
+
"augmentation_params":{"method":"SMOTE", "no_samples":2,
|
5 |
+
"feature_selection": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
|
6 |
+
"input_path": "data/test/bpic_features.csv",
|
7 |
+
"output_path": "output"
|
8 |
+
},
|
9 |
+
{
|
10 |
+
"pipeline_step": "event_logs_generation",
|
11 |
+
"output_path": "output/features/2_bpic_features/2_ense_rmcv_feat.csv",
|
12 |
+
"output_path": "data/test",
|
13 |
+
"generator_params": {
|
14 |
+
"experiment": "data/grid_objectives.csv",
|
15 |
+
"experiment": {"input_path": "data/2_bpic_features.csv",
|
16 |
+
"objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
|
17 |
+
"experiment": {"n_traces":832, "n_unique_traces":828, "ratio_variants_per_number_of_traces":0.99, "trace_len_min":1, "trace_len_max":132, "trace_len_mean":53.31, "trace_len_median":54, "trace_len_mode":61, "trace_len_std":19.89, "trace_len_variance":395.81, "trace_len_q1":44, "trace_len_q3":62, "trace_len_iqr":18, "trace_len_geometric_mean":48.15, "trace_len_geometric_std":1.69, "trace_len_harmonic_mean":37.58, "trace_len_skewness":0.0541, "trace_len_kurtosis":0.81, "trace_len_coefficient_variation":0.37, "trace_len_entropy":6.65, "trace_len_hist1":0.004, "trace_len_hist2":0.005, "trace_len_hist3":0.005, "trace_len_hist4":0.024, "trace_len_hist5":0.024, "trace_len_hist6":0.008, "trace_len_hist7":0.005, "trace_len_hist8":0.001, "trace_len_hist9":0.0, "trace_len_hist10":0.00, "trace_len_skewness_hist":0.05, "trace_len_kurtosis_hist":0.8, "ratio_most_common_variant":0.0, "ratio_top_1_variants":0.01, "ratio_top_5_variants":0.05, "ratio_top_10_variants":0.10, "ratio_top_20_variants":0.2, "ratio_top_50_variants":0.5, "ratio_top_75_variants":0.75, "mean_variant_occurrence":1.0, "std_variant_occurrence":0.07, "skewness_variant_occurrence":14.28, "kurtosis_variant_occurrence":202.00, "n_unique_activities":410, "activities_min":1, "activities_max":830, "activities_mean":108.18, "activities_median":12, "activities_std":187.59, "activities_variance":35189, "activities_q1":3, "activities_q3":125, "activities_iqr":122, "activities_skewness":2.13, "activities_kurtosis":3.81, "n_unique_start_activities":14, "start_activities_min":1, "start_activities_max":731, "start_activities_mean":59.43, "start_activities_median":1, "start_activities_std":186.72, "start_activities_variance":34863, "start_activities_q1":1, "start_activities_q3":8, "start_activities_iqr":7, "start_activities_skewness":3, "start_activities_kurtosis":9.0, "n_unique_end_activities":82, "end_activities_min":1, "end_activities_max":216, "end_activities_mean":10, "end_activities_median":1, "end_activities_std":35, "end_activities_variance":1247, "end_activities_q1":1, "end_activities_q3":3, "end_activities_iqr":2, "end_activities_skewness":5, "end_activities_kurtosis":26, "eventropy_trace":10, "eventropy_prefix":15, "eventropy_global_block":19, "eventropy_lempel_ziv":4, "eventropy_k_block_diff_1":7.1, "eventropy_k_block_diff_3":7.1, "eventropy_k_block_diff_5":7.1, "eventropy_k_block_ratio_1":7.1, "eventropy_k_block_ratio_3":7.1, "eventropy_k_block_ratio_5":7.1, "eventropy_knn_3":5.54, "eventropy_knn_5":5.04, "eventropy_knn_7":4.72, "epa_variant_entropy":240512, "epa_normalized_variant_entropy":0.68, "epa_sequence_entropy":285876, "epa_normalized_sequence_entropy":0.60, "epa_sequence_entropy_linear_forgetting":150546, "epa_normalized_sequence_entropy_linear_forgetting":0.32, "epa_sequence_entropy_exponential_forgetting":185312, "epa_normalized_sequence_entropy_exponential_forgetting":0.39},
|
18 |
+
"config_space": {
|
19 |
+
"mode": [5, 20],
|
20 |
+
"sequence": [0.01, 1],
|
21 |
+
"choice": [0.01, 1],
|
22 |
+
"parallel": [0.01, 1],
|
23 |
+
"loop": [0.01, 1],
|
24 |
+
"silent": [0.01, 1],
|
25 |
+
"lt_dependency": [0.01, 1],
|
26 |
+
"num_traces": [10, 100],
|
27 |
+
"duplicate": [0],
|
28 |
+
"or": [0]
|
29 |
+
},
|
30 |
+
"n_trials": 2
|
31 |
+
}
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"pipeline_step": "feature_extraction",
|
35 |
+
"input_path": "data/test",
|
36 |
+
"feature_params": {"feature_set": ["n_traces", "n_unique_traces", "ratio_unique_traces_per_trace", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "eventropy_trace", "eventropy_prefix", "eventropy_prefix_flattened", "eventropy_global_block", "eventropy_global_block_flattened", "eventropy_lempel_ziv", "eventropy_lempel_ziv_flattened", "eventropy_k_block_diff_1", "eventropy_k_block_diff_3", "eventropy_k_block_diff_5", "eventropy_k_block_ratio_1", "eventropy_k_block_ratio_3", "eventropy_k_block_ratio_5", "eventropy_knn_3", "eventropy_knn_5", "eventropy_knn_7", "epa_variant_entropy", "epa_normalized_variant_entropy", "epa_sequence_entropy", "epa_normalized_sequence_entropy", "epa_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_linear_forgetting", "epa_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]},
|
37 |
+
"output_path": "output/plots",
|
38 |
+
"real_eventlog_path": "data/BaselineED_feat.csv",
|
39 |
+
"plot_type": "boxplot"
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"pipeline_step": "benchmark_test",
|
43 |
+
"benchmark_task": "discovery",
|
44 |
+
"input_path":"data/test",
|
45 |
+
"output_path":"output",
|
46 |
+
"miners" : ["inductive", "heu", "imf", "ilp"]
|
47 |
+
}
|
48 |
+
]
|
main.py
CHANGED
@@ -35,7 +35,7 @@ def run(kwargs:dict, model_paramas_list: list, filename_list:list):
|
|
35 |
elif model_params.get(PIPELINE_STEP) == 'event_logs_generation':
|
36 |
gen = pd.DataFrame(GenerateEventLogs(model_params).log_config)
|
37 |
#gen = pd.read_csv("output/features/generated/grid_2objectives_enseef_enve/2_enseef_enve_feat.csv")
|
38 |
-
GenerationPlotter(gen, model_params, output_path="output/plots")
|
39 |
elif model_params.get(PIPELINE_STEP) == 'benchmark_test':
|
40 |
benchmark = BenchmarkTest(model_params, event_logs=gen['log'])
|
41 |
# BenchmarkPlotter(benchmark.features, output_path="output/plots")
|
|
|
35 |
elif model_params.get(PIPELINE_STEP) == 'event_logs_generation':
|
36 |
gen = pd.DataFrame(GenerateEventLogs(model_params).log_config)
|
37 |
#gen = pd.read_csv("output/features/generated/grid_2objectives_enseef_enve/2_enseef_enve_feat.csv")
|
38 |
+
#GenerationPlotter(gen, model_params, output_path="output/plots")
|
39 |
elif model_params.get(PIPELINE_STEP) == 'benchmark_test':
|
40 |
benchmark = BenchmarkTest(model_params, event_logs=gen['log'])
|
41 |
# BenchmarkPlotter(benchmark.features, output_path="output/plots")
|
setup.py
CHANGED
@@ -32,6 +32,7 @@ setup(
|
|
32 |
'seaborn==0.13.2',
|
33 |
'smac==2.0.2',
|
34 |
'tqdm==4.65.0',
|
|
|
35 |
],
|
36 |
packages = ['gedi'],
|
37 |
classifiers=[
|
|
|
32 |
'seaborn==0.13.2',
|
33 |
'smac==2.0.2',
|
34 |
'tqdm==4.65.0',
|
35 |
+
'streamlit-toggle-switch>=1.0.2'
|
36 |
],
|
37 |
packages = ['gedi'],
|
38 |
classifiers=[
|
utils/config_fabric.py
ADDED
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from copy import deepcopy
|
2 |
+
from importlib import reload
|
3 |
+
from itertools import product as cproduct
|
4 |
+
from itertools import combinations
|
5 |
+
from pylab import *
|
6 |
+
import itertools
|
7 |
+
import json
|
8 |
+
import math
|
9 |
+
import os
|
10 |
+
import pandas as pd
|
11 |
+
import pm4py
|
12 |
+
import random
|
13 |
+
import streamlit as st
|
14 |
+
import subprocess
|
15 |
+
|
16 |
+
st.set_page_config(layout='wide')
|
17 |
+
INPUT_XES="output/inputlog_temp.xes"
|
18 |
+
|
19 |
+
"""
|
20 |
+
# Configuration File fabric for
|
21 |
+
## GEDI: **G**enerating **E**vent **D**ata with **I**ntentional Features for Benchmarking Process Mining
|
22 |
+
"""
|
23 |
+
def double_switch(label_left, label_right, third_label=None, fourth_label=None):
|
24 |
+
if third_label==None and fourth_label==None:
|
25 |
+
# Create two columns for the labels and toggle switch
|
26 |
+
col0, col1, col2, col3, col4 = st.columns([2,1,1,1,2])
|
27 |
+
else:
|
28 |
+
# Create two columns for the labels and toggle switch
|
29 |
+
col0, col1, col2, col3, col4, col5, col6, col7, col8 = st.columns([1,1,1,1,1,1,1,1,1])
|
30 |
+
|
31 |
+
# Add labels to the columns
|
32 |
+
with col1:
|
33 |
+
st.write(label_left)
|
34 |
+
|
35 |
+
with col2:
|
36 |
+
# Create the toggle switch
|
37 |
+
toggle_option = st.toggle(" ",value=False,
|
38 |
+
key="toggle_switch_"+label_left,
|
39 |
+
)
|
40 |
+
|
41 |
+
with col3:
|
42 |
+
st.write(label_right)
|
43 |
+
if third_label is None and fourth_label is None:return toggle_option
|
44 |
+
else:
|
45 |
+
with col5:
|
46 |
+
st.write(third_label)
|
47 |
+
|
48 |
+
with col6:
|
49 |
+
# Create the toggle switch
|
50 |
+
toggle_option_2 = st.toggle(" ",value=False,
|
51 |
+
key="toggle_switch_"+third_label,
|
52 |
+
)
|
53 |
+
|
54 |
+
with col7:
|
55 |
+
st.write(fourth_label)
|
56 |
+
return toggle_option, toggle_option_2
|
57 |
+
|
58 |
+
def multi_button(labels):
|
59 |
+
cols = st.columns(len(labels))
|
60 |
+
activations = []
|
61 |
+
for col, label in zip(cols, labels):
|
62 |
+
activations.append(col.button(label))
|
63 |
+
return activations
|
64 |
+
|
65 |
+
def input_multicolumn(labels, default_values, n_cols=5):
|
66 |
+
result = {}
|
67 |
+
cols = st.columns(n_cols)
|
68 |
+
factor = math.ceil(len(labels)/n_cols)
|
69 |
+
extended = cols.copy()
|
70 |
+
for _ in range(factor):
|
71 |
+
extended.extend(cols)
|
72 |
+
for label, default_value, col in zip(labels, default_values, extended):
|
73 |
+
with col:
|
74 |
+
result[label] = col.text_input(label, default_value, key=f"input_"+label+'_'+str(default_value))
|
75 |
+
return result.values()
|
76 |
+
|
77 |
+
def split_list(input_list, n):
|
78 |
+
# Calculate the size of each chunk
|
79 |
+
k, m = divmod(len(input_list), n)
|
80 |
+
# Use list comprehension to create n sublists
|
81 |
+
return [input_list[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)]
|
82 |
+
|
83 |
+
def get_ranges_from_stats(stats, tuple_values):
|
84 |
+
col_for_row = ", ".join([f"x[\'{i}\'].astype(float)" for i in tuple_values])
|
85 |
+
stats['range'] = stats.apply(lambda x: tuple([eval(col_for_row)]), axis=1)
|
86 |
+
#tasks = eval(f"list(itertools.product({(parameters*n_para_obj)[:-2]}))")
|
87 |
+
result = [f"np.around({x}, 2)" for x in stats['range']]
|
88 |
+
result = ", ".join(result)
|
89 |
+
return result
|
90 |
+
|
91 |
+
def create_objectives_grid(df, objectives, n_para_obj=2, method="combinatorial"):
|
92 |
+
if method=="combinatorial":
|
93 |
+
sel_features = df.index.to_list()
|
94 |
+
parameters_o = "objectives, "
|
95 |
+
parameters = get_ranges_from_stats(df, sorted(objectives))
|
96 |
+
objectives = sorted(sel_features)
|
97 |
+
tasks = f"list(cproduct({parameters}))[0]"
|
98 |
+
|
99 |
+
elif method=="range-from-csv":
|
100 |
+
tasks = ""
|
101 |
+
for objective in objectives:
|
102 |
+
min_col, max_col, step_col = st.columns(3)
|
103 |
+
with min_col:
|
104 |
+
selcted_min = st.slider(objective+': min', min_value=float(df[objective].min()), max_value=float(df[objective].max()), value=df[objective].quantile(0.1), step=0.1, key=objective+"min")
|
105 |
+
with max_col:
|
106 |
+
selcted_max = st.slider('max', min_value=selcted_min, max_value=float(df[objective].max()), value=df[objective].quantile(0.9), step=0.1, key=objective+"max")
|
107 |
+
with step_col:
|
108 |
+
step_value = st.slider('step', min_value=float(df[objective].min()), max_value=float(df[objective].quantile(0.9)), value=df[objective].median()/(df[objective].min()+0.0001), step=0.01, key=objective+"step")
|
109 |
+
tasks += f"np.around(np.arange({selcted_min}, {selcted_max}+{step_value}, {step_value}),2), "
|
110 |
+
else :#method=="range-manual":
|
111 |
+
experitments = []
|
112 |
+
tasks=""
|
113 |
+
if objectives != None:
|
114 |
+
cross_labels = [feature[0]+': '+feature[1] for feature in list(cproduct(objectives,['min', 'max', 'step']))]
|
115 |
+
cross_values = [round(eval(str(combination[0])+combination[1]), 2) for combination in list(cproduct(list(df.values()), ['*1', '*2', '/3']))]
|
116 |
+
ranges = zip(objectives, split_list(list(input_multicolumn(cross_labels, cross_values, n_cols=3)), n_para_obj))
|
117 |
+
for objective, range_value in ranges:
|
118 |
+
selcted_min, selcted_max, step_value = range_value
|
119 |
+
tasks += f"np.around(np.arange({selcted_min}, {selcted_max}+{step_value}, {step_value}),2), "
|
120 |
+
|
121 |
+
#import pdb; pdb.set_trace()
|
122 |
+
cartesian_product = list(cproduct(*eval(tasks)))
|
123 |
+
experiments = [{key: value[idx] for idx, key in enumerate(objectives)} for value in cartesian_product]
|
124 |
+
return experiments
|
125 |
+
|
126 |
+
def set_generator_experiments(generator_params):
|
127 |
+
def handle_csv_file(grid_option):
|
128 |
+
uploaded_file = st.file_uploader("Pick a csv-file containing feature values for features:", type="csv")
|
129 |
+
if uploaded_file is not None:
|
130 |
+
df = pd.read_csv(uploaded_file)
|
131 |
+
sel_features = st.multiselect("Selected features", list(df.columns))
|
132 |
+
if sel_features:
|
133 |
+
df = df[sel_features]
|
134 |
+
return df, sel_features
|
135 |
+
return None, None
|
136 |
+
|
137 |
+
def handle_combinatorial(sel_features, stats, tuple_values):
|
138 |
+
triangular_option = double_switch("Square", "Triangular")
|
139 |
+
if triangular_option:
|
140 |
+
experiments = []
|
141 |
+
elements = sel_features
|
142 |
+
# List to store all combinations
|
143 |
+
all_combinations = [combinations(sel_features, r) for r in range(1, len(sel_features) + 1)]
|
144 |
+
all_combinations = [comb for sublist in all_combinations for comb in sublist]
|
145 |
+
|
146 |
+
# Print or use the result as needed
|
147 |
+
for comb in all_combinations:
|
148 |
+
sel_stats = stats.loc[sorted(list(comb))]
|
149 |
+
experiments += create_objectives_grid(sel_stats, tuple_values, n_para_obj=len(tuple_values), method="combinatorial")
|
150 |
+
else:
|
151 |
+
experiments = create_objectives_grid(stats, tuple_values, n_para_obj=len(tuple_values))
|
152 |
+
return experiments
|
153 |
+
|
154 |
+
def handle_grid_option(grid_option, df, sel_features):
|
155 |
+
if grid_option:
|
156 |
+
combinatorial = double_switch("Range", "Combinatorial")
|
157 |
+
if combinatorial:
|
158 |
+
add_quantile = st.slider('Add %-quantile', min_value=0.0, max_value=100.0, value=50.0, step=5.0)
|
159 |
+
stats = df.describe().transpose().sort_index()
|
160 |
+
stats[f"{int(add_quantile)}%"] = df.quantile(q=add_quantile / 100)
|
161 |
+
st.write(stats)
|
162 |
+
tuple_values = st.multiselect("Tuples including", list(stats.columns)[3:], default=['min', 'max'])
|
163 |
+
return handle_combinatorial(sel_features, stats, tuple_values)
|
164 |
+
else: # Range
|
165 |
+
return create_objectives_grid(df, sel_features, n_para_obj=len(sel_features), method="range-from-csv")
|
166 |
+
else: # Point
|
167 |
+
st.write(df)
|
168 |
+
return df.to_dict(orient='records')
|
169 |
+
|
170 |
+
def handle_manual_option(sel_features, grid_option):
|
171 |
+
if sel_features:
|
172 |
+
if grid_option:
|
173 |
+
return create_objectives_grid(generator_params['experiment'], sel_features, n_para_obj=len(sel_features), method="range-manual")
|
174 |
+
else:
|
175 |
+
experiment = {sel_feature: float(st.text_input(sel_feature, generator_params['experiment'][sel_feature])) for sel_feature in sel_features}
|
176 |
+
return [experiment]
|
177 |
+
return []
|
178 |
+
|
179 |
+
grid_option, csv_option = double_switch("Point-", "Grid-based", third_label="Manual", fourth_label="From CSV")
|
180 |
+
|
181 |
+
if csv_option:
|
182 |
+
df, sel_features = handle_csv_file(grid_option)
|
183 |
+
if df is not None and sel_features is not None:
|
184 |
+
experiments = handle_grid_option(grid_option, df, sel_features)
|
185 |
+
else:
|
186 |
+
experiments = []
|
187 |
+
else: # Manual
|
188 |
+
sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
|
189 |
+
experiments = handle_manual_option(sel_features, grid_option)
|
190 |
+
|
191 |
+
generator_params['experiment'] = experiments
|
192 |
+
st.write(f"...result in {len(generator_params['experiment'])} experiment(s)")
|
193 |
+
|
194 |
+
"""
|
195 |
+
#### Configuration space
|
196 |
+
"""
|
197 |
+
updated_values = input_multicolumn(generator_params['config_space'].keys(), generator_params['config_space'].values())
|
198 |
+
for key, new_value in zip(generator_params['config_space'].keys(), updated_values):
|
199 |
+
generator_params['config_space'][key] = eval(new_value)
|
200 |
+
generator_params['n_trials'] = int(st.text_input('n_trials', generator_params['n_trials']))
|
201 |
+
|
202 |
+
return generator_params
|
203 |
+
|
204 |
+
if __name__ == '__main__':
|
205 |
+
config_layout = json.load(open("config_files/config_layout.json"))
|
206 |
+
type(config_layout)
|
207 |
+
step_candidates = ["instance_augmentation","event_logs_generation","feature_extraction","benchmark_test"]
|
208 |
+
pipeline_steps = st.multiselect(
|
209 |
+
"Choose pipeline step",
|
210 |
+
step_candidates,
|
211 |
+
[]
|
212 |
+
)
|
213 |
+
step_configs = []
|
214 |
+
set_col, view_col = st.columns([3, 2])
|
215 |
+
for pipeline_step in pipeline_steps:
|
216 |
+
step_config = [d for d in config_layout if d['pipeline_step'] == pipeline_step][0]
|
217 |
+
with set_col:
|
218 |
+
st.header(pipeline_step)
|
219 |
+
for step_key in step_config.keys():
|
220 |
+
if step_key == "generator_params":
|
221 |
+
st.subheader("Set-up experiments")
|
222 |
+
step_config[step_key] = set_generator_experiments(step_config[step_key])
|
223 |
+
elif step_key == "feature_params":
|
224 |
+
layout_features = list(step_config[step_key]['feature_set'])
|
225 |
+
step_config[step_key]["feature_set"] = st.multiselect(
|
226 |
+
"features to extract",
|
227 |
+
layout_features)
|
228 |
+
elif step_key != "pipeline_step":
|
229 |
+
step_config[step_key] = st.text_input(step_key, step_config[step_key])
|
230 |
+
with view_col:
|
231 |
+
st.write(step_config)
|
232 |
+
step_configs.append(step_config)
|
233 |
+
config_file = json.dumps(step_configs, indent=4)
|
234 |
+
output_path = st.text_input("Output file path", "config_files/experiment_config.json")
|
235 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
236 |
+
save_labels = ["Save config file", "Save and run config_file"]
|
237 |
+
save_labels = ["Save configuration file"]
|
238 |
+
#create_button, create_run_button = multi_button(save_labels)
|
239 |
+
create_button = multi_button(save_labels)
|
240 |
+
if create_button: # or create_run_button:
|
241 |
+
with open(output_path, "w") as f:
|
242 |
+
f.write(config_file)
|
243 |
+
st.write("Saved configuration in ", output_path, ". Run command:")
|
244 |
+
#if create_run_button:
|
245 |
+
if True:
|
246 |
+
options_path = os.path.join("config_files", "options", "baseline.json")
|
247 |
+
var = f"python -W ignore main.py -o {options_path} -a {output_path}"
|
248 |
+
st.code(var, language='bash')
|
249 |
+
if False: #FIXME: Command fails when using multiprocessing
|
250 |
+
command = var.split()
|
251 |
+
|
252 |
+
# Run the command
|
253 |
+
result = subprocess.run(command, capture_output=True, text=True)
|
254 |
+
|
255 |
+
if len(result.stderr)==0:
|
256 |
+
st.write(result.stdout)
|
257 |
+
else:
|
258 |
+
st.write("ERROR: ", result.stderr)
|
merge_csvs.py β utils/merge_csvs.py
RENAMED
File without changes
|
merge_jsons.py β utils/merge_jsons.py
RENAMED
File without changes
|