File size: 17,015 Bytes
2ea3878
 
1120efa
277c251
c9ea140
277c251
a281f3a
277c251
 
 
21a7e92
2b39384
da11610
277c251
 
 
b899c89
c9ea140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b899c89
 
e571083
 
 
c9ea140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277c251
a281f3a
 
 
 
 
 
 
277c251
 
 
 
 
 
 
 
 
 
 
 
 
a281f3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21a7e92
 
 
 
 
 
 
a281f3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277c251
 
 
 
 
 
 
1405a67
fb3f6c7
1405a67
 
 
 
 
277c251
a281f3a
1539c36
 
 
 
 
 
 
 
2ea3878
1539c36
a281f3a
 
 
 
2ea3878
 
a281f3a
 
 
 
277c251
1405a67
 
 
 
 
 
 
 
 
 
277c251
a281f3a
d614255
 
277c251
 
80803f1
1539c36
 
d614255
 
 
 
 
065d4e7
 
 
 
 
 
 
 
 
0a54e95
065d4e7
1405a67
 
d614255
 
1405a67
d614255
 
 
 
065d4e7
d614255
9f94d52
d614255
 
 
 
 
 
 
 
fb3f6c7
80803f1
 
d2d5696
fb3f6c7
 
 
 
 
 
1405a67
fb3f6c7
 
d2d5696
1120efa
1405a67
 
 
1120efa
1405a67
 
 
 
 
 
d2d5696
fb3f6c7
 
 
d2d5696
fb3f6c7
 
 
d2d5696
fb3f6c7
 
 
d614255
d2d5696
d614255
 
 
 
 
1405a67
d614255
 
 
fb3f6c7
d614255
277c251
a281f3a
277c251
 
 
 
a281f3a
 
813228e
277c251
d614255
277c251
 
 
c9ea140
6b51487
 
277c251
6b51487
 
 
 
 
277c251
 
 
1405a67
277c251
6b51487
277c251
 
6b51487
 
277c251
6b51487
 
277c251
 
6b51487
 
277c251
 
 
a281f3a
e374916
 
 
6b51487
 
 
277c251
 
6b51487
277c251
a281f3a
6b51487
277c251
6b51487
 
277c251
6b51487
 
277c251
6b51487
 
277c251
6b51487
 
21a7e92
1120efa
2b39384
1120efa
6b51487
277c251
 
6b51487
21a7e92
6b51487
1120efa
 
 
 
6b51487
21a7e92
6b51487
 
 
da11610
 
 
6b51487
 
 
 
 
defdb7f
 
 
21a7e92
2b39384
32b5982
 
 
1120efa
 
6b51487
 
 
 
 
 
 
 
 
 
 
 
 
 
1120efa
6b51487
aff5dca
 
 
 
 
 
 
 
 
 
 
 
 
6b51487
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
from itertools import product as cproduct
from itertools import combinations
from pathlib import Path
from pylab import *
import base64
import json
import math
import os
import pandas as pd
import streamlit as st
import subprocess
import time
import shutil

st.set_page_config(layout='wide')
INPUT_XES="output/inputlog_temp.xes"
LOGO_PATH="gedi/utils/logo.png"

def get_base64_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode()

def play_header():
    # Convert local image to base64
    logo_base64 = get_base64_image(LOGO_PATH)

    # HTML and CSS for placing the logo at the top left corner
    head1, head2 = st.columns([1,8])
    head1.markdown(
        f"""
        <style>
        .header-logo {{
            display: flex;
            align-items: center;
            justify-content: flex-start;
        }}
        .header-logo img {{
            max-width: 100%; /* Adjust the size as needed */
            overflow: hidden;
            object-fit: contain;
            padding-top: 12px;
       }}
        </style>
        <div class="header-logo">
            <img src="data:image/png;base64,{logo_base64}" alt="Logo">
        </div>
        """,
        unsafe_allow_html=True
    )
    with head2:
        """
        # interactive GEDI
        """
    """
    ## **G**enerating **E**vent **D**ata with **I**ntentional Features for Benchmarking Process Mining
    """
    return

def double_switch(label_left, label_right, third_label=None, fourth_label=None):
    if third_label==None and fourth_label==None:
        # Create two columns for the labels and toggle switch
        col0, col1, col2, col3, col4 = st.columns([2,1,1,1,2])
    else:
        # Create two columns for the labels and toggle switch
        col0, col1, col2, col3, col4, col5, col6, col7, col8 = st.columns([1,1,1,1,1,1,1,1,1])

    # Add labels to the columns
    with col1:
        st.write(label_left)

    with col2:
        # Create the toggle switch
        toggle_option = st.toggle(" ",value=False,
            key="toggle_switch_"+label_left,
        )

    with col3:
        st.write(label_right)
    if third_label is None and fourth_label is None:return toggle_option
    else:
        with col5:
            st.write(third_label)

        with col6:
            # Create the toggle switch
            toggle_option_2 = st.toggle(" ",value=False,
                key="toggle_switch_"+third_label,
            )

        with col7:
            st.write(fourth_label)
        return toggle_option, toggle_option_2

def multi_button(labels):
    cols = st.columns(len(labels))
    activations = []
    for col, label in zip(cols, labels):
        activations.append(col.button(label))
    return activations

def input_multicolumn(labels, default_values, n_cols=5):
    result = {}
    cols = st.columns(n_cols)
    factor = math.ceil(len(labels)/n_cols)
    extended = cols.copy()
    for _ in range(factor):
        extended.extend(cols)
    for label, default_value, col in zip(labels, default_values, extended):
        with col:
            result[label] = col.text_input(label, default_value, key=f"input_"+label+'_'+str(default_value))
    return result.values()

def split_list(input_list, n):
    # Calculate the size of each chunk
    k, m = divmod(len(input_list), n)
    # Use list comprehension to create n sublists
    return [input_list[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in range(n)]

def get_ranges_from_stats(stats, tuple_values):
    col_for_row = ", ".join([f"x[\'{i}\'].astype(float)" for i in tuple_values])
    stats['range'] = stats.apply(lambda x: tuple([eval(col_for_row)]), axis=1)
    #tasks = eval(f"list(itertools.product({(parameters*n_para_obj)[:-2]}))")
    result = [f"np.around({x}, 2)" for x in stats['range']]
    result = ", ".join(result)
    return result

def create_objectives_grid(df, objectives, n_para_obj=2, method="combinatorial"):
        if "combinatorial" in method:
            sel_features = df.index.to_list()
            parameters_o = "objectives, "
            parameters = get_ranges_from_stats(df, sorted(objectives))
            objectives = sorted(sel_features)
            tasks = f"list(cproduct({parameters}))[0]"

        elif method=="range-from-csv":
            tasks = ""
            for objective in objectives:
                min_col, max_col, step_col = st.columns(3)
                with min_col:
                    selcted_min = st.slider(objective+': min', min_value=float(df[objective].min()), max_value=float(df[objective].max()), value=df[objective].quantile(0.1), step=0.1, key=objective+"min")
                with max_col:
                    selcted_max = st.slider('max', min_value=selcted_min, max_value=float(df[objective].max()), value=df[objective].quantile(0.9), step=0.1, key=objective+"max")
                with step_col:
                    step_value = st.slider('step', min_value=float(df[objective].min()), max_value=float(df[objective].quantile(0.9)), value=df[objective].median()/(df[objective].min()+0.0001), step=0.01, key=objective+"step")
                tasks += f"np.around(np.arange({selcted_min}, {selcted_max}+{step_value}, {step_value}),2), "
        else :#method=="range-manual":
            experitments = []
            tasks=""
            if objectives != None:
                cross_labels =  [feature[0]+': '+feature[1] for feature in list(cproduct(objectives,['min', 'max', 'step']))]
                cross_values = [round(eval(str(combination[0])+combination[1]), 2) for combination in list(cproduct(list(df.values()), ['*1', '*2', '/3']))]
                ranges = zip(objectives, split_list(list(input_multicolumn(cross_labels, cross_values, n_cols=3)), n_para_obj))
                for objective, range_value in ranges:
                    selcted_min, selcted_max, step_value = range_value
                    tasks += f"np.around(np.arange({selcted_min}, {selcted_max}+{step_value}, {step_value}),2), "

        try:
            cartesian_product = list(cproduct(*eval(tasks)))
            experiments = [{key: value[idx] for idx, key in enumerate(objectives)} for value in cartesian_product]
            return experiments
        except SyntaxError as e:
            st.write("Please select valid features above.")
            sys.exit(1)
        except TypeError as e:
            st.write("Please select at least 2 values to define.")
            sys.exit(1)

def set_generator_experiments(generator_params):
    def handle_csv_file(grid_option):
        uploaded_file = st.file_uploader("Pick a csv-file containing feature values for features:", type="csv")
        if uploaded_file is not None:
            df = pd.read_csv(uploaded_file)
            sel_features = st.multiselect("Selected features", list(df.columns), list(df.columns)[-1])
            if sel_features:
                df = df[sel_features]
                return df, sel_features
        return None, None

    def handle_combinatorial(sel_features, stats, tuple_values):
        triangular_option = double_switch("Square", "Triangular")
        if triangular_option:
            experiments = []
            elements = sel_features
            # List to store all combinations
            all_combinations = [combinations(sel_features, r) for r in range(1, len(sel_features) + 1)]
            all_combinations = [comb for sublist in all_combinations for comb in sublist]

            # Print or use the result as needed
            for comb in all_combinations:
                sel_stats = stats.loc[sorted(list(comb))]
                experiments += create_objectives_grid(sel_stats, tuple_values, n_para_obj=len(tuple_values), method="combinatorial")
        else: # Square
            experiments = create_objectives_grid(stats, tuple_values, n_para_obj=len(tuple_values), method="combinatorial")
        return experiments

    def handle_csv_option(grid_option, df, sel_features):
        if grid_option:
            combinatorial = double_switch("Range", "Combinatorial")
            if combinatorial:
                add_quantile = st.slider('Add %-quantile', min_value=0.0, max_value=100.0, value=50.0, step=5.0)
                stats = df.describe().transpose().sort_index()
                stats[f"{int(add_quantile)}%"] = df.quantile(q=add_quantile / 100)
                st.write(stats)
                tuple_values = st.multiselect("Tuples including", list(stats.columns)[3:], default=['min', 'max'])
                return handle_combinatorial(sel_features, stats, tuple_values)
            else:  # Range
                return create_objectives_grid(df, sel_features, n_para_obj=len(sel_features), method="range-from-csv")
        else:  # Point
            st.write(df)
            return df.to_dict(orient='records')

    def feature_select():
        return st.multiselect("Selected features", list(generator_params['experiment'].keys()),
                                                        list(generator_params['experiment'].keys())[-1])

    def handle_manual_option(grid_option):
        if grid_option:
            combinatorial = double_switch("Range", "Combinatorial")
            if combinatorial:
                col1, col2 = st.columns([1,4])
                with col1:
                    num_values = st.number_input('How many values to define?', min_value=2, step=1)
                with col2:
                    sel_features = feature_select()

                filtered_dict = {key: generator_params['experiment'][key] for key in sel_features if key in generator_params['experiment']}
                values_indexes = ["value "+str(i+1) for i in range(num_values)]
                values_defaults = ['*(1+2*0.'+str(i)+')' for i in range(num_values)]
                cross_labels =  [feature[0]+': '+feature[1] for feature in list(cproduct(sel_features,values_indexes))]
                cross_values = [round(eval(str(combination[0])+combination[1]), 2) for combination in list(cproduct(list(filtered_dict.values()), values_defaults))]
                parameters = split_list(list(input_multicolumn(cross_labels, cross_values, n_cols=num_values)), len(sel_features))
                tasks = f"list({parameters})"

                tasks_df = pd.DataFrame(eval(tasks), index=sel_features, columns=values_indexes)
                tasks_df = tasks_df.astype(float)
                return handle_combinatorial(sel_features, tasks_df, values_indexes)

            else: # Range
                sel_features = feature_select()
                return create_objectives_grid(generator_params['experiment'], sel_features, n_para_obj=len(sel_features), method="range-manual")

        else: # Point
            sel_features = feature_select()
            #sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))

            experiment = {sel_feature: float(st.text_input(sel_feature, generator_params['experiment'][sel_feature])) for sel_feature in sel_features}
            return [experiment]
        return[]


    grid_option, csv_option = double_switch("Point-", "Grid-based", third_label="Manual", fourth_label="From CSV")

    if csv_option:
        df, sel_features = handle_csv_file(grid_option)
        if df is not None and sel_features is not None:
            experiments = handle_csv_option(grid_option, df, sel_features)
        else:
            experiments = []
    else:  # Manual
        experiments = handle_manual_option(grid_option)

    generator_params['experiment'] = experiments
    st.write(f"...result in {len(generator_params['experiment'])} experiment(s)")

    """
    #### Configuration space
    """
    updated_values = input_multicolumn(generator_params['config_space'].keys(), generator_params['config_space'].values())
    for key, new_value in zip(generator_params['config_space'].keys(), updated_values):
        generator_params['config_space'][key] = eval(new_value)
    generator_params['n_trials'] = int(st.text_input('n_trials', generator_params['n_trials']))

    return generator_params

if __name__ == '__main__':
    play_header()

    # Load the configuration layout from a JSON file
    config_layout = json.load(open("config_files/config_layout.json"))

    # Define available pipeline steps
    step_candidates = ["instance_augmentation", "event_logs_generation", "feature_extraction", "benchmark_test"]

    # Streamlit multi-select for pipeline steps
    pipeline_steps = st.multiselect(
        "Choose pipeline step",
        step_candidates,
        ["event_logs_generation"]
    )

    step_configs = []
    set_col, view_col = st.columns([3, 2])

    # Iterate through selected pipeline steps
    for pipeline_step in pipeline_steps:
        step_config = next(d for d in config_layout if d['pipeline_step'] == pipeline_step)
        
        with set_col:
            st.header(pipeline_step)

            # Iterate through step configuration keys
            for step_key in step_config.keys():
                if step_key == "generator_params":
                    st.subheader("Set-up experiments")
                    step_config[step_key] = set_generator_experiments(step_config[step_key])
                elif step_key == "feature_params":
                    layout_features = list(step_config[step_key]['feature_set'])
                    step_config[step_key]["feature_set"] = st.multiselect(
                        "features to extract",
                        layout_features
                    )
                elif step_key != "pipeline_step":
                    step_config[step_key] = st.text_input(step_key, step_config[step_key])
        
        with view_col:
            st.write(step_config)
        
        step_configs.append(step_config)

    # Convert step configurations to JSON
    config_file = json.dumps(step_configs, indent=4)

    # Streamlit input for output file path
    output_path = st.text_input("Output file path", "config_files/experiment_config.json")
    
    # Ensure output directory exists
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # Streamlit multi-button for saving options
    save_labels = ["Save config file", "Save and run config_file"]
    create_button, create_run_button = multi_button(save_labels)

    if create_button or create_run_button:
        # Save configuration to the specified output path
        with open(output_path, "w") as f:
            f.write(config_file)

        st.write("Saved configuration in ", output_path, ". Run command:")
        
        var = f"python -W ignore main.py -a {output_path}"
        st.code(var, language='bash')

        if create_run_button:
            # Split the command for subprocess
            command = var.split()
            progress_bar = st.progress(0)

            # Prepare output path for feature extraction
            directory = Path(step_config['output_path']).parts
            path = os.path.join(directory[0], 'features', *directory[1:])
            
            # Clean existing output path if it exists
            if os.path.exists(path): 
                shutil.rmtree(path)

            # Simulate running the command with a loop and update progress
            # for i in range(95):
            #     time.sleep(0.2)
            #     progress_bar.progress(i + 1)

            # Run the actual command
            result = subprocess.run(command, capture_output=True, text=True)
            # result = subprocess.run(['python', '-W', 'ignore', '-c', 'print("Hello, World!")'], capture_output=True, text=True)
            st.write("bash results:",result.stdout, result.stderr)
            st.write("## Results")

            # Collect all file paths from the output directory
            file_paths = [os.path.join(root, file)
                          for root, _, files in os.walk(path)
                          for file in files]

            # Read and concatenate all JSON files into a DataFrame
            dataframes = pd.concat([pd.read_json(file, lines=True) for file in file_paths], ignore_index=True)

            # Reorder columns with 'target_similarity' as the last column
            columns = [col for col in dataframes.columns if col != 'target_similarity'] + ['target_similarity']
            dataframes = dataframes[columns]

            # Set 'log' as the index
            dataframes.set_index('log', inplace=True)

            col1, col2 = st.columns([2, 3])

            with col1:
                st.dataframe(dataframes)

            with col2:
                plt.figure(figsize=(4, 2))
                plt.plot(dataframes.index, dataframes['target_similarity'], 'o-')
                plt.xlabel('log', fontsize=5)
                plt.ylabel('target_similarity', fontsize=5)
                plt.xticks(rotation=45, ha='right', fontsize=5)
                plt.tight_layout()
                st.pyplot(plt)
            
            # Update progress bar to indicate completion
            progress_bar.progress(100)