## Grid Objectives
Iterating between min and max for each column

### Glossary
- **task**: Refers to the set of values (row) and corresponding keys to be aimed at sequentially.
- **objective**: Refers to one key (column) and respective value to be aimed at simultaneously during a task.
- **experiment**: Refers to one file containing a multiple of objectives and tasks for a fixed number of each, respectively. 

In [1]:
import itertools
import json
import numpy as np
import os
import pandas as pd

In [2]:
#Features between 0 and 1: 
normalized_feature_names = ['ratio_unique_traces_per_trace', 'trace_len_hist1', 'trace_len_hist2',
                            'trace_len_hist3', 'trace_len_hist4', 'trace_len_hist5', 'trace_len_hist7',
                            'trace_len_hist8', 'trace_len_hist9', 'ratio_most_common_variant', 
                            'ratio_top_1_variants', 'ratio_top_5_variants', 'ratio_top_10_variants', 
                            'ratio_top_20_variants', 'ratio_top_50_variants', 'ratio_top_75_variants', 
                            'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 
                            'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']

normalized_feature_names = ['ratio_unique_traces_per_trace', 'ratio_most_common_variant', 
                            'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 
                            'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']

def abbrev_obj_keys(obj_keys):
    abbreviated_keys = []
    for obj_key in obj_keys:
        key_slices = obj_key.split("_")
        chars = []
        for key_slice in key_slices:
            for idx, single_char in enumerate(key_slice):
                if idx == 0 or single_char.isdigit():
                    chars.append(single_char)
        abbreviated_key = ''.join(chars)
        abbreviated_keys.append(abbreviated_key)
    return '_'.join(abbreviated_keys) 

In [6]:
def write_generator_experiment(experiment_path, objectives=["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]):
    first_dir = os.path.split(experiment_path[3:])[-1].replace(".csv","")
    second_dir = first_dir.replace("grid_","").replace("objectives","")

    experiment = [
      {
        'pipeline_step': 'event_logs_generation',
        'output_path':'output/generated/grid_2obj',
        'generator_params': {
          "experiment": {"input_path": experiment_path[3:],
            "objectives": objectives},
          'config_space': {
            'mode': [5, 20],
            'sequence': [0.01, 1],
            'choice': [0.01, 1],
            'parallel': [0.01, 1],
            'loop': [0.01, 1],
            'silent': [0.01, 1],
            'lt_dependency': [0.01, 1],
            'num_traces': [10, 10001],
            'duplicate': [0],
            'or': [0]
          },
          'n_trials': 200
        }
      },
      {
        'pipeline_step': 'feature_extraction',
        'input_path': os.path.join('output','features', 'generated', 'grid_2obj', first_dir, second_dir),
        'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},
        'output_path': 'output/plots',
        'real_eventlog_path': 'data/34_bpic_features.csv',
        'plot_type': 'boxplot'
  }
    ]

    #print("EXPERIMENT:", experiment[1]['input_path'])
    output_path = os.path.join('..', 'config_files','algorithm','grid_2obj')
    os.makedirs(output_path, exist_ok=True)
    output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(".")[0]}.json') 
    with open(output_path, 'w') as f:
        json.dump(experiment, f, ensure_ascii=False)
    print(f"Saved experiment config in {output_path}")
    
    return experiment

def create_objectives_grid(objectives, n_para_obj=2):
    parameters_o = "objectives, "
    if n_para_obj==1:
        experiments = [[exp] for exp in objectives]
    else:
        experiments = eval(f"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]")
    experiments = list(set([tuple(sorted(exp)) for exp in experiments]))
    print(len(experiments), experiments)
    
    parameters = "np.around(np.arange(0, 1.1,0.1),2), "
    tasks = eval(f"list(itertools.product({(parameters*n_para_obj)[:-2]}))")
    tasks = [(f'task_{i+1}',)+task for i, task in enumerate(tasks)]
    print(len(tasks))
    for exp in experiments:
        df = pd.DataFrame(data=tasks, columns=["task", *exp])
        experiment_path = os.path.join('..','data', 'grid_2obj')
        os.makedirs(experiment_path, exist_ok=True)
        experiment_path = os.path.join(experiment_path, f"grid_{len(df.columns)-1}objectives_{abbrev_obj_keys(exp)}.csv") 
        df.to_csv(experiment_path, index=False)
        print(f"Saved experiment in {experiment_path}")
        write_generator_experiment(experiment_path, objectives=exp)
    #df.to_csv(f"../data/grid_{}objectives_{abbrev_obj_keys(objectives.tolist())}.csv" ,index=False)
        
exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=2)        
print(exp_test)

21 [('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_unique_traces_per_trace'), ('ratio_top_10_variants', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy', 'ratio_most_common_variant'), ('ratio_most_common_variant', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_most_common_variant'), ('epa_normalized_variant_entropy', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy',

### Helper prototypes

In [11]:
df = pd.DataFrame(columns=["log","ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"])    

In [12]:
k=0
for i in np.arange(0, 1.1,0.2):
    for j in np.arange(0,0.55,0.1):
        k+=1
        new_entry = pd.Series({'log':f"objective_{k}", "ratio_top_20_variants":round(i,1),
                   "epa_normalized_sequence_entropy_linear_forgetting":round(j,1)})
        df = pd.concat([
                df, 
                pd.DataFrame([new_entry], columns=new_entry.index)]
           ).reset_index(drop=True)
    

In [13]:
df.to_csv("../data/grid_objectives.csv" ,index=False)

In [6]:
df

Unnamed: 0,log,ratio_top_20_variants,normalized_sequence_entropy_linear_forgetting
0,objective_1,0.0,0.0
1,objective_2,0.0,0.1
2,objective_3,0.0,0.2
3,objective_4,0.0,0.3
4,objective_5,0.0,0.4
5,objective_6,0.0,0.5
6,objective_7,0.2,0.0
7,objective_8,0.2,0.1
8,objective_9,0.2,0.2
9,objective_10,0.2,0.3


## Objectives from real logs
(Feature selection)

In [20]:
bpic_features = pd.read_csv("../data/34_bpic_features.csv", index_col=None)
#bpic_features = pd.read_csv("../gedi/output/features/real_event_logs.csv", index_col=None)

#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)
print(bpic_features.shape)
print(len(bpic_features), " Event-Logs: ", bpic_features.sort_values('log')['log'].unique())

#bpic_features.rename(columns={"variant_entropy":"epa_variant_entropy", "normalized_variant_entropy":"epa_normalized_variant_entropy", "sequence_entropy":"epa_sequence_entropy", "normalized_sequence_entropy":"epa_normalized_sequence_entropy", "sequence_entropy_linear_forgetting":"epa_sequence_entropy_linear_forgetting", "normalized_sequence_entropy_linear_forgetting":"epa_normalized_sequence_entropy_linear_forgetting", "sequence_entropy_exponential_forgetting":"epa_sequence_entropy_exponential_forgetting", "normalized_sequence_entropy_exponential_forgetting":"epa_normalized_sequence_entropy_exponential_forgetting"},
#          errors="raise", inplace=True)

bpic_features.head()
#bpic_features.to_csv("../data/34_bpic_features.csv", index=False)

(34, 178)
34  Event-Logs:  ['BPI2016_Complaints' 'BPI2016_Questions' 'BPI2016_Werkmap_Messages'
 'BPIC15_1' 'BPIC15_2' 'BPIC15_3' 'BPIC15_4' 'BPIC15_5'
 'BPI_Challenge_2012' 'BPI_Challenge_2013_closed_problems'
 'BPI_Challenge_2013_incidents' 'BPI_Challenge_2013_open_problems'
 'BPI_Challenge_2017' 'BPI_Challenge_2017_Offer_log' 'BPI_Challenge_2018'
 'BPI_Challenge_2019' 'CoSeLoG_WABO_1' 'CoSeLoG_WABO_2' 'CoSeLoG_WABO_3'
 'CoSeLoG_WABO_4' 'CoSeLoG_WABO_5' 'Detail_Change'
 'Detail_Incident_Activity' 'Detail_Interaction' 'DomesticDeclarations'
 'Hospital_log' 'InternationalDeclarations' 'PermitLog'
 'PrepaidTravelCost' 'Receipt_WABO_CoSeLoG' 'RequestForPayment'
 'Road_Traffic_Fine_Management_Process' 'Sepsis_Cases_Event_Log' 'finale']


Unnamed: 0,log,n_traces,n_unique_traces,ratio_unique_traces_per_trace,trace_len_min,trace_len_max,trace_len_mean,trace_len_median,trace_len_mode,trace_len_std,trace_len_variance,trace_len_q1,trace_len_q3,trace_len_iqr,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_skewness,trace_len_kurtosis,trace_len_coefficient_variation,trace_len_entropy,trace_len_hist1,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_hist10,trace_len_skewness_hist,trace_len_kurtosis_hist,ratio_most_common_variant,ratio_top_1_variants,ratio_top_5_variants,ratio_top_10_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_75_variants,mean_variant_occurrence,std_variant_occurrence,skewness_variant_occurrence,kurtosis_variant_occurrence,n_unique_activities,activities_min,activities_max,activities_mean,activities_median,activities_std,activities_variance,activities_q1,activities_q3,activities_iqr,activities_skewness,activities_kurtosis,n_unique_start_activities,start_activities_min,start_activities_max,start_activities_mean,start_activities_median,start_activities_std,start_activities_variance,start_activities_q1,start_activities_q3,start_activities_iqr,start_activities_skewness,start_activities_kurtosis,n_unique_end_activities,end_activities_min,end_activities_max,end_activities_mean,end_activities_median,end_activities_std,end_activities_variance,end_activities_q1,end_activities_q3,end_activities_iqr,end_activities_skewness,end_activities_kurtosis,entropy_trace,entropy_prefix,entropy_global_block,entropy_lempel_ziv,entropy_k_block_diff_1,entropy_k_block_diff_3,entropy_k_block_diff_5,entropy_k_block_ratio_1,entropy_k_block_ratio_3,entropy_k_block_ratio_5,entropy_knn_3,entropy_knn_5,entropy_knn_7,Log Nature,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,accumulated_time_time_min,accumulated_time_time_max,accumulated_time_time_mean,accumulated_time_time_median,accumulated_time_time_mode,accumulated_time_time_std,accumulated_time_time_variance,accumulated_time_time_q1,accumulated_time_time_q3,accumulated_time_time_iqr,accumulated_time_time_geometric_mean,accumulated_time_time_geometric_std,accumulated_time_time_harmonic_mean,accumulated_time_time_skewness,accumulated_time_time_kurtosis,accumulated_time_time_coefficient_variation,accumulated_time_time_entropy,accumulated_time_time_skewness_hist,accumulated_time_time_kurtosis_hist,execution_time_time_min,execution_time_time_max,execution_time_time_mean,execution_time_time_median,execution_time_time_mode,execution_time_time_std,execution_time_time_variance,execution_time_time_q1,execution_time_time_q3,execution_time_time_iqr,execution_time_time_geometric_mean,execution_time_time_geometric_std,execution_time_time_harmonic_mean,execution_time_time_skewness,execution_time_time_kurtosis,execution_time_time_coefficient_variation,execution_time_time_entropy,execution_time_time_skewness_hist,execution_time_time_kurtosis_hist,remaining_time_time_min,remaining_time_time_max,remaining_time_time_mean,remaining_time_time_median,remaining_time_time_mode,remaining_time_time_std,remaining_time_time_variance,remaining_time_time_q1,remaining_time_time_q3,remaining_time_time_iqr,remaining_time_time_geometric_mean,remaining_time_time_geometric_std,remaining_time_time_harmonic_mean,remaining_time_time_skewness,remaining_time_time_kurtosis,remaining_time_time_coefficient_variation,remaining_time_time_entropy,remaining_time_time_skewness_hist,remaining_time_time_kurtosis_hist,within_day_time_min,within_day_time_max,within_day_time_mean,within_day_time_median,within_day_time_mode,within_day_time_std,within_day_time_variance,within_day_time_q1,within_day_time_q3,within_day_time_iqr,within_day_time_geometric_mean,within_day_time_geometric_std,within_day_time_harmonic_mean,within_day_time_skewness,within_day_time_kurtosis,within_day_time_coefficient_variation,within_day_time_entropy,within_day_time_skewness_hist,within_day_time_kurtosis_hist
0,BPIC15_2,832,828,0.995192,1,132,53.310096,54.0,61,19.894977,395.81009,44.0,62.0,18.0,48.150111,1.695311,37.583741,0.054138,0.804992,0.373193,6.646715,0.003853,0.004863,0.004679243,0.023947,0.02376321,0.008257487,0.004771,0.001376248,0.000642249,0.0001834997,0.054138,0.804992,0.002404,0.014423,0.054087,0.103365,0.203125,0.502404,0.751202,1.004831,0.069337,14.283027,202.004854,410,1,830,108.180488,12.0,187.588162,35189.32,3.0,125.5,122.5,2.129412,3.808278,14,1,731,59.428571,1.0,186.717401,34863.39,1.0,8.25,7.25,3.300411,8.960767,82,1,216,10.146341,1.0,35.3188,1247.418,1.0,3.0,2.0,5.098791,25.861991,9.691,14.524,19.448,3.859,7.105,7.105,7.105,7.105,7.105,7.105,5.545,5.039,4.721,Real,240512.2,0.627973,285876.9,0.602371,150546.6,0.317217,185312.9,0.390473,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,BPI_Challenge_2018,43809,28457,0.64957,24,2973,57.391541,49.0,49,34.872131,1216.065487,44.0,59.0,15.0,53.775008,1.367397,51.651502,26.126459,1720.399665,0.607618,10.598758,0.003385,5e-06,9.288448e-07,0.0,0.0,0.0,0.0,0.0,7.740373e-08,7.740373e-08,26.126459,1720.399665,0.026981,0.290374,0.373006,0.415371,0.480335,0.675204,0.83759,1.539481,12.487438,64.62568,5083.455806,41,17,466141,61323.560976,7530.0,120522.247417,14525610000.0,902.0,45907.0,45005.0,2.444007,4.773254,4,2,38623,10952.25,2592.0,16111.407548,259577500.0,36.5,13507.75,13471.25,1.098736,-0.7148,21,1,34830,2086.142857,13.0,7431.744981,55230830.0,2.0,193.0,191.0,4.062387,14.952824,13.191,16.272,20.972,1.023,-0.01,1.855,0.511,1.403,3.572,2.001,7.849,7.371,7.067,Real,11563840.0,0.712079,21146260.0,0.570688,14140230.0,0.381612,15576080.0,0.420362,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,Receipt_WABO_CoSeLoG,1434,116,0.080893,1,25,5.981172,6.0,6,2.166129,4.692114,6.0,6.0,0.0,5.414708,1.704965,4.356445,1.276525,12.296006,0.362158,7.197193,0.03603,0.008136,0.3411204,0.023536,0.003777313,0.001743375,0.000291,0.001452813,0.0,0.0005811251,1.276525,12.296006,0.497211,0.497211,0.796374,0.887029,0.930265,0.959554,0.979777,12.362069,68.360277,9.380687,92.281919,27,1,1434,317.666667,27.0,553.389823,306240.3,8.0,50.0,42.0,1.342951,-0.178094,1,1434,1434,1434.0,1434.0,0.0,0.0,1434.0,1434.0,0.0,,,14,1,828,102.428571,6.0,225.871555,51017.96,1.25,33.25,32.0,2.471765,4.846541,3.209,4.746,7.019,0.385,2.672,2.966,0.804,1.484,2.966,2.966,3.26,2.845,2.584,Real,2382.326,0.689363,18296.27,0.235532,7814.868,0.100603,10728.7,0.138113,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,BPIC15_3,1409,1349,0.957417,3,124,42.356991,42.0,44,16.138406,260.448143,38.0,47.0,9.0,37.637731,1.786726,29.092933,-0.009541,1.543369,0.381009,7.167153,0.006921,0.00434,0.01630604,0.036953,0.01173096,0.004105837,0.001584,0.0005278933,0.0001173096,5.865481e-05,-0.009541,1.543369,0.010646,0.049681,0.090135,0.137686,0.233499,0.520937,0.760114,1.044477,0.592348,17.96413,358.019511,383,1,1409,155.825065,16.0,306.310544,93826.15,5.0,108.5,103.5,2.446349,5.280931,9,1,1348,156.555556,8.0,421.270858,177469.1,3.0,14.0,11.0,2.474158,4.122971,119,1,342,11.840336,2.0,39.55721,1564.773,1.0,7.0,6.0,6.217217,43.335525,10.317,14.226,18.743,3.182,-0.007,6.78,6.78,6.78,6.78,6.78,5.701,5.212,4.9,Real,298146.4,0.661781,397504.3,0.605676,224139.3,0.341521,265757.1,0.404934,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,BPI_Challenge_2019,251734,11973,0.047562,1,990,6.33972,5.0,5,13.057417,170.496137,5.0,6.0,1.0,5.173569,1.635822,4.592844,22.132989,753.772202,2.059621,12.044057,0.010078,2e-05,9.559579e-06,3e-06,3.614967e-07,1.606652e-07,0.0,4.01663e-08,8.03326e-08,8.03326e-08,22.132989,753.772202,0.199758,0.871424,0.92999,0.946368,0.959767,0.976217,0.988106,21.02514,594.255619,64.772702,4917.319751,42,2,314097,37998.166667,1628.0,80833.669206,6534082000.0,202.0,11536.0,11334.0,2.169648,3.263594,8,2,199867,31466.75,869.0,65387.493286,4275524000.0,97.0,14224.25,14127.25,2.059742,2.535789,32,1,181328,7866.6875,64.5,31658.428996,1002256000.0,9.0,1027.25,1018.25,5.135607,25.170543,6.243,8.811,19.447,0.346,-0.041,1.53,0.84,0.62,3.244,1.913,7.333,6.882,6.601,Real,1690369.0,0.64553,7477256.0,0.328029,7298458.0,0.320185,7300663.0,0.320282,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [21]:
bpic_stats = bpic_features.describe().transpose()
normalized_feature_names = bpic_stats[(bpic_stats['min']>=0)&(bpic_stats['max']<=1)].index.to_list() 
normalized_feature_names = ['ratio_unique_traces_per_trace', 'ratio_most_common_variant', 
                            'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 
                            'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']
print(normalized_feature_names)
bpic_features[['log']+normalized_feature_names]

['ratio_unique_traces_per_trace', 'ratio_most_common_variant', 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']


Unnamed: 0,log,ratio_unique_traces_per_trace,ratio_most_common_variant,ratio_top_10_variants,epa_normalized_variant_entropy,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_exponential_forgetting
0,BPIC15_2,0.995192,0.002404,0.103365,0.627973,0.602371,0.317217,0.390473
1,BPI_Challenge_2018,0.64957,0.026981,0.415371,0.712079,0.570688,0.381612,0.420362
2,Receipt_WABO_CoSeLoG,0.080893,0.497211,0.887029,0.689363,0.235532,0.100603,0.138113
3,BPIC15_3,0.957417,0.010646,0.137686,0.661781,0.605676,0.341521,0.404934
4,BPI_Challenge_2019,0.047562,0.199758,0.946368,0.64553,0.328029,0.320185,0.320282
5,RequestForPayment,0.012925,0.437264,0.933488,0.703735,0.189048,0.097572,0.118744
6,PrepaidTravelCost,0.096236,0.271081,0.822773,0.723785,0.317044,0.184879,0.214387
7,DomesticDeclarations,0.009429,0.43981,0.950095,0.696474,0.164758,0.085439,0.104389
8,BPIC15_4,0.996201,0.002849,0.102564,0.652985,0.603866,0.355927,0.412835
9,BPI_Challenge_2012,0.333614,0.262016,0.686254,0.70828,0.423074,0.226133,0.275551


In [29]:
#Features between 0 and 1: 
def write_generator_bpic_experiment(objectives, n_para_obj=2):
    parameters_o = "objectives, "
    experiments = eval(f"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]")
    experiments = list(set([tuple(sorted(exp)) for exp in experiments]))
    for exp in experiments:
        experiment_path = os.path.join('..','data', '34_bpic_features')
        os.makedirs(experiment_path, exist_ok=True)
        experiment_path = os.path.join(experiment_path, f"34bpic_{len(exp)}objectives_{abbrev_obj_keys(exp)}.csv") 


        first_dir = os.path.split(experiment_path[3:])[-1].replace(".csv","")
        second_dir = first_dir.replace("grid_","").replace("objectives","")

        experiment = [
          {
            'pipeline_step': 'event_logs_generation',
            'output_path':'output/generated',
            'generator_params': {
              "experiment": {"input_path": "data/34_bpic_features.csv",
                "objectives": exp},
              'config_space': {
                'mode': [5, 20],
                'sequence': [0.01, 1],
                'choice': [0.01, 1],
                'parallel': [0.01, 1],
                'loop': [0.01, 1],
                'silent': [0.01, 1],
                'lt_dependency': [0.01, 1],
                'num_traces': [10, 10001],
                'duplicate': [0],
                'or': [0]
              },
              'n_trials': 200
            }
          },
          {
            'pipeline_step': 'feature_extraction',
            'input_path': os.path.join('output', 'features', 'generated', '34_bpic_features', second_dir),
            'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},
            'output_path': 'output/plots',
            'real_eventlog_path': 'data/34_bpic_features.csv',
            'plot_type': 'boxplot'
      }
        ]

        output_path = os.path.join('..', 'config_files','algorithm','34_bpic_features')
        os.makedirs(output_path, exist_ok=True)
        output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(".")[0]}.json') 

        with open(output_path, 'w') as f:
            json.dump(experiment, f, ensure_ascii=False)
        print(f"Saved experiment config in {output_path}")
        return experiment


def create_objectives_grid(objectives, n_para_obj=2):
    parameters_o = "objectives, "
    experiments = eval(f"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]")
    experiments = list(set([tuple(sorted(exp)) for exp in experiments]))
    print(len(experiments))
    
    for exp in experiments:
        write_generator_bpic_experiment(objectives=exp)
        
exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=2)        
print(exp_test)

21
Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_rt10v.json
Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_rmcv.json
Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_enself.json
Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enve_rt10v.json
Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_rt10v.json
Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_enseef.json
Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_rmcv.json
Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_rmcv_rutpt.json
Saved experiment config in ../config_files/algorithm/34_bpic_features/g

## Single objective from real logs
(Feature selection)

In [3]:
def write_single_objective_experiment(experiment_path, objectives=["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]):
    first_dir = os.path.split(experiment_path[3:])[-1].replace(".csv","")
    second_dir = first_dir.replace("grid_","").replace("objectives","")

    experiment = [
      {
        'pipeline_step': 'event_logs_generation',
        'output_path':os.path.join('output','generated', 'grid_1obj'),
        'generator_params': {
          "experiment": {"input_path": experiment_path[3:],
            "objectives": objectives},
          'config_space': {
            'mode': [5, 20],
            'sequence': [0.01, 1],
            'choice': [0.01, 1],
            'parallel': [0.01, 1],
            'loop': [0.01, 1],
            'silent': [0.01, 1],
            'lt_dependency': [0.01, 1],
            'num_traces': [10, 10001],
            'duplicate': [0],
            'or': [0]
          },
          'n_trials': 200
        }
      },
      {
        'pipeline_step': 'feature_extraction',
        'input_path': os.path.join('output','features', 'generated', 'grid_1obj', first_dir, second_dir),
        'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},
        'output_path': 'output/plots',
        'real_eventlog_path': 'data/34_bpic_features.csv',
        'plot_type': 'boxplot'
  }
    ]

    #print("EXPERIMENT:", experiment)
    output_path = os.path.join('..', 'config_files','algorithm','grid_experiments')
    os.makedirs(output_path, exist_ok=True)
    output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(".")[0]}.json') 
    with open(output_path, 'w') as f:
        json.dump(experiment, f, ensure_ascii=False)
    print(f"Saved experiment config in {output_path}")
    
    return experiment

def create_objectives_grid(objectives, n_para_obj=2):
    parameters_o = "objectives, "
    if n_para_obj==1:
        experiments = [[exp] for exp in objectives]
    else:
        experiments = eval(f"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]")
    experiments = list(set([tuple(sorted(exp)) for exp in experiments]))
    print(len(experiments), "experiments: ", experiments)
    
    parameters = "np.around(np.arange(0, 1.1,0.1),2), "
    tasks = eval(f"list(itertools.product({(parameters*n_para_obj)[:-2]}))")
    tasks = [(f'task_{i+1}',)+task for i, task in enumerate(tasks)]
    print(len(tasks))
    for exp in experiments:
        df = pd.DataFrame(data=tasks, columns=["task", *exp])
        experiment_path = os.path.join('..','data', 'grid_experiments')
        os.makedirs(experiment_path, exist_ok=True)
        experiment_path = os.path.join(experiment_path, f"grid_{len(df.columns)-1}objectives_{abbrev_obj_keys(exp)}.csv") 
        df.to_csv(experiment_path, index=False)
        print(f"Saved experiment in {experiment_path}")
        write_single_objective_experiment(experiment_path, objectives=exp)
    #df.to_csv(f"../data/grid_{}objectives_{abbrev_obj_keys(objectives.tolist())}.csv" ,index=False)
        
exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=1)        
print(exp_test)

7 experiments:  [('epa_normalized_sequence_entropy_exponential_forgetting',), ('epa_normalized_variant_entropy',), ('ratio_top_10_variants',), ('epa_normalized_sequence_entropy',), ('epa_normalized_sequence_entropy_linear_forgetting',), ('ratio_most_common_variant',), ('ratio_unique_traces_per_trace',)]
11
Saved experiment in ../data/grid_experiments/grid_1objectives_enseef.csv
Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enseef.json
Saved experiment in ../data/grid_experiments/grid_1objectives_enve.csv
Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enve.json
Saved experiment in ../data/grid_experiments/grid_1objectives_rt10v.csv
Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rt10v.json
Saved experiment in ../data/grid_experiments/grid_1objectives_ense.csv
Saved experiment config in ../config_files/algorithm/grid_experiments/generator_g