{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "08ee6ee0",
   "metadata": {},
   "source": [
    "## Grid Objectives\n",
    "Iterating between min and max for each column\n",
    "\n",
    "### Glossary\n",
    "- **task**: Refers to the set of values (row) and corresponding keys to be aimed at sequentially.\n",
    "- **objective**: Refers to one key (column) and respective value to be aimed at simultaneously during a task.\n",
    "- **experiment**: Refers to one file containing a multiple of objectives and tasks for a fixed number of each, respectively. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e5aa7223",
   "metadata": {},
   "outputs": [],
   "source": [
    "import itertools\n",
    "import json\n",
    "import numpy as np\n",
    "import os\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "472fd031",
   "metadata": {},
   "outputs": [],
   "source": [
    "#Features between 0 and 1: \n",
    "\"\"\"\n",
    "normalized_feature_names = ['ratio_variants_per_number_of_traces', 'trace_len_hist1', 'trace_len_hist2',\n",
    "                            'trace_len_hist3', 'trace_len_hist4', 'trace_len_hist5', 'trace_len_hist7',\n",
    "                            'trace_len_hist8', 'trace_len_hist9', 'ratio_most_common_variant', \n",
    "                            'ratio_top_1_variants', 'ratio_top_5_variants', 'ratio_top_10_variants', \n",
    "                            'ratio_top_20_variants', 'ratio_top_50_variants', 'ratio_top_75_variants', \n",
    "                            'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n",
    "                            'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n",
    "\"\"\"\n",
    "normalized_feature_names = ['ratio_variants_per_number_of_traces', 'ratio_most_common_variant', \n",
    "                            'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n",
    "                            'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n",
    "def abbrev_obj_keys(obj_keys):\n",
    "    abbreviated_keys = []\n",
    "    for obj_key in obj_keys:\n",
    "        key_slices = obj_key.split(\"_\")\n",
    "        chars = []\n",
    "        for key_slice in key_slices:\n",
    "            for idx, single_char in enumerate(key_slice):\n",
    "                if idx == 0 or single_char.isdigit():\n",
    "                    chars.append(single_char)\n",
    "        abbreviated_key = ''.join(chars)\n",
    "        abbreviated_keys.append(abbreviated_key)\n",
    "    return '_'.join(abbreviated_keys) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "2be119c8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TASKS <class 'str'> <class 'int'> np.around(np.arange(0.0, 1.5,0.5),2), np.around(np.arange(0.0, 1.5,0.5),2), \n",
      "21 [('mean_variant_occurrence', 'trace_len_coefficient_variation'), ('activities_std', 'eventropy_trace'), ('epa_normalized_variant_entropy', 'ratio_variants_per_number_of_traces'), ('activities_std', 'epa_normalized_variant_entropy'), ('eventropy_trace', 'trace_len_coefficient_variation'), ('ratio_variants_per_number_of_traces', 'trace_len_coefficient_variation'), ('activities_std', 'trace_len_coefficient_variation'), ('eventropy_trace', 'mean_variant_occurrence'), ('activities_std', 'mean_variant_occurrence'), ('epa_normalized_variant_entropy', 'eventropy_trace'), ('mean_variant_occurrence', 'start_activities_median'), ('ratio_variants_per_number_of_traces', 'start_activities_median'), ('eventropy_trace', 'start_activities_median'), ('activities_std', 'start_activities_median'), ('epa_normalized_variant_entropy', 'trace_len_coefficient_variation'), ('epa_normalized_variant_entropy', 'mean_variant_occurrence'), ('mean_variant_occurrence', 'ratio_variants_per_number_of_traces'), ('eventropy_trace', 'ratio_variants_per_number_of_traces'), ('start_activities_median', 'trace_len_coefficient_variation'), ('activities_std', 'ratio_variants_per_number_of_traces'), ('epa_normalized_variant_entropy', 'start_activities_median')]\n",
      "9\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_mvo_tlcv.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_mvo_tlcv.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_as_et.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_as_et.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rvpnot.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rvpnot.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_as_enve.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_as_enve.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_et_tlcv.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_et_tlcv.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_rvpnot_tlcv.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rvpnot_tlcv.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_as_tlcv.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_as_tlcv.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_et_mvo.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_et_mvo.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_as_mvo.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_as_mvo.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_et.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_et.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_mvo_sam.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_mvo_sam.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_rvpnot_sam.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rvpnot_sam.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_et_sam.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_et_sam.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_as_sam.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_as_sam.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_tlcv.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_tlcv.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_mvo.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_mvo.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_mvo_rvpnot.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_mvo_rvpnot.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_et_rvpnot.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_et_rvpnot.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_sam_tlcv.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_sam_tlcv.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_as_rvpnot.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_as_rvpnot.json\n",
      "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_sam.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_sam.json\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "def write_generator_experiment(experiment_path, objectives=[\"ratio_top_20_variants\", \"epa_normalized_sequence_entropy_linear_forgetting\"]):\n",
    "    first_dir = os.path.split(experiment_path[3:])[-1].replace(\".csv\",\"\")\n",
    "    second_dir = first_dir.replace(\"grid_\",\"\").replace(\"objectives\",\"\")\n",
    "\n",
    "    experiment = [\n",
    "      {\n",
    "        'pipeline_step': 'event_logs_generation',\n",
    "        'output_path':'output/generated/grid_2obj',\n",
    "        'generator_params': {\n",
    "          \"experiment\": {\"input_path\": experiment_path[3:],\n",
    "            \"objectives\": objectives},\n",
    "          'config_space': {\n",
    "            'mode': [5, 20],\n",
    "            'sequence': [0.01, 1],\n",
    "            'choice': [0.01, 1],\n",
    "            'parallel': [0.01, 1],\n",
    "            'loop': [0.01, 1],\n",
    "            'silent': [0.01, 1],\n",
    "            'lt_dependency': [0.01, 1],\n",
    "            'num_traces': [10, 10001],\n",
    "            'duplicate': [0],\n",
    "            'or': [0]\n",
    "          },\n",
    "          'n_trials': 200\n",
    "        }\n",
    "      },\n",
    "      {\n",
    "        'pipeline_step': 'feature_extraction',\n",
    "        'input_path': os.path.join('output','features', 'generated', 'grid_2obj', first_dir, second_dir),\n",
    "        \"feature_params\": {\"feature_set\":[\"ratio_variants_per_number_of_traces\",\"ratio_most_common_variant\",\"ratio_top_10_variants\",\"epa_normalized_variant_entropy\",\"epa_normalized_sequence_entropy\",\"epa_normalized_sequence_entropy_linear_forgetting\",\"epa_normalized_sequence_entropy_exponential_forgetting\"]},\n",
    "        'output_path': 'output/plots',\n",
    "        'real_eventlog_path': 'data/BaselineED_feat.csv',\n",
    "        'plot_type': 'boxplot'\n",
    "      },\n",
    "      {\n",
    "        \"pipeline_step\": \"benchmark_test\",\n",
    "        \"benchmark_test\": \"discovery\",\n",
    "        \"input_path\": os.path.join('output', 'generated', 'grid_2obj', first_dir, second_dir),\n",
    "        \"output_path\":\"output\",\n",
    "        \"miners\" : [\"heu\", \"imf\", \"ilp\"]\n",
    "      }\n",
    "    ]\n",
    "\n",
    "    #print(\"EXPERIMENT:\", experiment[1]['input_path'])\n",
    "    output_path = os.path.join('..', 'config_files','algorithm',f'grid_{len(objectives)}obj')\n",
    "    os.makedirs(output_path, exist_ok=True)\n",
    "    output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n",
    "    with open(output_path, 'w') as f:\n",
    "        json.dump(experiment, f, ensure_ascii=False)\n",
    "    print(f\"Saved experiment config in {output_path}\")\n",
    "    \n",
    "    return experiment\n",
    "\n",
    "def create_objectives_grid(objectives, n_para_obj=2):\n",
    "    parameters_o = \"objectives, \"\n",
    "    if n_para_obj==len(objectives):\n",
    "        experiments = [tuple(sorted(objectives))]\n",
    "        print(len(experiments), experiments)\n",
    "        parameters = get_ranges_from_data(sorted(objectives))\n",
    "        tasks = eval(f\"list(itertools.product({parameters}))\")\n",
    "        #tasks = eval(f\"list(itertools.product({(parameters*n_para_obj)[:-2]}))\")\n",
    "    else: \n",
    "        if n_para_obj==1:\n",
    "            experiments = [[exp] for exp in objectives]\n",
    "        else:\n",
    "            experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n",
    "        experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n",
    "        parameters = \"np.around(np.arange(0.0, 1.5,0.5),2), \"\n",
    "        tasks = eval(f\"list(itertools.product({(parameters*n_para_obj)[:-2]}))\")\n",
    "    print(\"TASKS\", type(parameters), type(n_para_obj), parameters*n_para_obj)\n",
    "    print(len(experiments), experiments)\n",
    "\n",
    "    tasks = [(f'task_{i+1}',)+task for i, task in enumerate(tasks)]\n",
    "    print(len(tasks))\n",
    "    for exp in experiments:\n",
    "        df = pd.DataFrame(data=tasks, columns=[\"task\", *exp])\n",
    "        experiment_path = os.path.join('..','data', f'grid_{n_para_obj}obj')\n",
    "        os.makedirs(experiment_path, exist_ok=True)\n",
    "        experiment_path = os.path.join(experiment_path, f\"grid_{len(df.columns)-1}objectives_{abbrev_obj_keys(exp)}.csv\") \n",
    "        df.to_csv(experiment_path, index=False)\n",
    "        print(f\"Saved experiment in {experiment_path}\")\n",
    "        write_generator_experiment(experiment_path, objectives=exp)\n",
    "    #df.to_csv(f\"../data/grid_{}objectives_{abbrev_obj_keys(objectives.tolist())}.csv\" ,index=False)\n",
    "\n",
    "exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=2)        \n",
    "print(exp_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9cc84ef2",
   "metadata": {},
   "source": [
    "## Grid Objectives\n",
    "Based on real ED ranges."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "ae86005f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['ratio_variants_per_number_of_traces', 'trace_len_coefficient_variation', 'mean_variant_occurrence', 'activities_std', 'start_activities_median', 'eventropy_trace', 'epa_normalized_variant_entropy']\n",
      "ratio_variants_per_number_of_traces    (4.081521591249218e-05, 0.4659094439111451, 0....\n",
      "trace_len_coefficient_variation             (0.0, 0.6838390025070027, 4.744080106525514)\n",
      "mean_variant_occurrence                (1.001552795031056, 838.6048767068644, 24500.6...\n",
      "activities_std                             (0.0, 12982.056069959535, 120522.24741658216)\n",
      "start_activities_median                               (1.0, 7975.705882352941, 150370.0)\n",
      "eventropy_trace                                        (0.0, 6.2416470588235295, 13.362)\n",
      "epa_normalized_variant_entropy              (0.0, 0.6773545645863115, 0.899497456838069)\n",
      "Name: range, dtype: object\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2)'"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "DF_PATH = \"../../shampu/data/bench_baseline_feat.csv\"\n",
    "def get_ranges_from_data(objectives, df_path = DF_PATH):\n",
    "    #print(objectives)\n",
    "    dmf = pd.read_csv(DF_PATH, index_col=None)\n",
    "    dmf = dmf[objectives].describe()\n",
    "    dmf = dmf.transpose()[['min', 'mean','max']]\n",
    "    dmf['range'] = dmf.apply(lambda x: tuple([x['min'], x['mean'], x['max']]), axis=1)\n",
    "    print(dmf['range'])\n",
    "    #tasks = eval(f\"list(itertools.product({(parameters*n_para_obj)[:-2]}))\")\n",
    "    result = [f\"np.around({x}, 2)\" for x in dmf['range']]\n",
    "    result = \", \".join(result)\n",
    "    return result\n",
    "\n",
    "print(normalized_feature_names)\n",
    "get_ranges_from_data(normalized_feature_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "a7a4c864",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1 [('activities_std', 'epa_normalized_variant_entropy', 'eventropy_trace', 'mean_variant_occurrence', 'ratio_variants_per_number_of_traces', 'start_activities_median', 'trace_len_coefficient_variation')]\n",
      "activities_std                             (0.0, 12982.056069959535, 120522.24741658216)\n",
      "epa_normalized_variant_entropy              (0.0, 0.6773545645863115, 0.899497456838069)\n",
      "eventropy_trace                                        (0.0, 6.2416470588235295, 13.362)\n",
      "mean_variant_occurrence                (1.001552795031056, 838.6048767068644, 24500.6...\n",
      "ratio_variants_per_number_of_traces    (4.081521591249218e-05, 0.4659094439111451, 0....\n",
      "start_activities_median                               (1.0, 7975.705882352941, 150370.0)\n",
      "trace_len_coefficient_variation             (0.0, 0.6838390025070027, 4.744080106525514)\n",
      "Name: range, dtype: object\n",
      "TASKS <class 'str'> <class 'int'> np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2)np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2)np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2)np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2)np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2)np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2)np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2)\n",
      "1 [('activities_std', 'epa_normalized_variant_entropy', 'eventropy_trace', 'mean_variant_occurrence', 'ratio_variants_per_number_of_traces', 'start_activities_median', 'trace_len_coefficient_variation')]\n",
      "2187\n",
      "Saved experiment in ../data/grid_7obj/grid_7objectives_as_enve_et_mvo_rvpnot_sam_tlcv.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_7obj/generator_grid_7objectives_as_enve_et_mvo_rvpnot_sam_tlcv.json\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "normalized_feature_names = ['ratio_variants_per_number_of_traces', 'trace_len_coefficient_variation', 'mean_variant_occurrence', 'activities_std', 'start_activities_median', 'eventropy_trace', 'epa_normalized_variant_entropy']\n",
    "exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=len(normalized_feature_names))        \n",
    "print(exp_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "56ab613b",
   "metadata": {},
   "source": [
    "### Helper prototypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "dfd1a302",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.DataFrame(columns=[\"log\",\"ratio_top_20_variants\", \"epa_normalized_sequence_entropy_linear_forgetting\"])    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "218946b7",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/d0/btmbyskx4t106_l2zghzln2w0000gn/T/ipykernel_12596/3751377549.py:7: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n",
      "  df = pd.concat([\n"
     ]
    }
   ],
   "source": [
    "k=0\n",
    "for i in np.arange(0, 1.1,0.5):\n",
    "    for j in np.arange(0,0.55,0.5):\n",
    "        k+=1\n",
    "        new_entry = pd.Series({'log':f\"objective_{k}\", \"ratio_top_20_variants\":round(i,1),\n",
    "                   \"epa_normalized_sequence_entropy_linear_forgetting\":round(j,1)})\n",
    "        df = pd.concat([\n",
    "                df, \n",
    "                pd.DataFrame([new_entry], columns=new_entry.index)]\n",
    "           ).reset_index(drop=True)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "b1e3bb5a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv(\"../data/grid_objectives.csv\" ,index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c12bc19d",
   "metadata": {},
   "source": [
    "## Objectives from real logs\n",
    "(Feature selection)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "39ac74bb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(26, 8)\n",
      "26  Event-Logs:  ['BPIC12' 'BPIC13cp' 'BPIC13inc' 'BPIC13op' 'BPIC14dc_p' 'BPIC14di_p'\n",
      " 'BPIC14dia_p' 'BPIC15f1' 'BPIC15f2' 'BPIC15f3' 'BPIC15f4' 'BPIC15f5'\n",
      " 'BPIC16c_p' 'BPIC16wm_p' 'BPIC17' 'BPIC17ol' 'BPIC19' 'BPIC20a' 'BPIC20b'\n",
      " 'BPIC20c' 'BPIC20d' 'BPIC20e' 'HD' 'RTFMP' 'RWABOCSL' 'SEPSIS']\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>log</th>\n",
       "      <th>ratio_variants_per_number_of_traces</th>\n",
       "      <th>ratio_most_common_variant</th>\n",
       "      <th>ratio_top_10_variants</th>\n",
       "      <th>epa_normalized_variant_entropy</th>\n",
       "      <th>epa_normalized_sequence_entropy</th>\n",
       "      <th>epa_normalized_sequence_entropy_linear_forgetting</th>\n",
       "      <th>epa_normalized_sequence_entropy_exponential_forgetting</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>BPIC16wm_p</td>\n",
       "      <td>0.002882</td>\n",
       "      <td>0.295803</td>\n",
       "      <td>0.714106</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>BPIC15f5</td>\n",
       "      <td>0.997405</td>\n",
       "      <td>0.001730</td>\n",
       "      <td>0.102076</td>\n",
       "      <td>0.648702</td>\n",
       "      <td>0.603260</td>\n",
       "      <td>0.342410</td>\n",
       "      <td>0.404580</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>BPIC15f1</td>\n",
       "      <td>0.975813</td>\n",
       "      <td>0.006672</td>\n",
       "      <td>0.121768</td>\n",
       "      <td>0.652855</td>\n",
       "      <td>0.610294</td>\n",
       "      <td>0.270241</td>\n",
       "      <td>0.363928</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>BPIC19</td>\n",
       "      <td>0.047562</td>\n",
       "      <td>0.199758</td>\n",
       "      <td>0.946368</td>\n",
       "      <td>0.645530</td>\n",
       "      <td>0.328029</td>\n",
       "      <td>0.320185</td>\n",
       "      <td>0.320282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>BPIC14dia_p</td>\n",
       "      <td>0.496847</td>\n",
       "      <td>0.037455</td>\n",
       "      <td>0.552836</td>\n",
       "      <td>0.774743</td>\n",
       "      <td>0.608350</td>\n",
       "      <td>0.305614</td>\n",
       "      <td>0.377416</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           log  ratio_variants_per_number_of_traces  \\\n",
       "0   BPIC16wm_p                             0.002882   \n",
       "1     BPIC15f5                             0.997405   \n",
       "2     BPIC15f1                             0.975813   \n",
       "3       BPIC19                             0.047562   \n",
       "4  BPIC14dia_p                             0.496847   \n",
       "\n",
       "   ratio_most_common_variant  ratio_top_10_variants  \\\n",
       "0                   0.295803               0.714106   \n",
       "1                   0.001730               0.102076   \n",
       "2                   0.006672               0.121768   \n",
       "3                   0.199758               0.946368   \n",
       "4                   0.037455               0.552836   \n",
       "\n",
       "   epa_normalized_variant_entropy  epa_normalized_sequence_entropy  \\\n",
       "0                        0.000000                         0.000000   \n",
       "1                        0.648702                         0.603260   \n",
       "2                        0.652855                         0.610294   \n",
       "3                        0.645530                         0.328029   \n",
       "4                        0.774743                         0.608350   \n",
       "\n",
       "   epa_normalized_sequence_entropy_linear_forgetting  \\\n",
       "0                                           0.000000   \n",
       "1                                           0.342410   \n",
       "2                                           0.270241   \n",
       "3                                           0.320185   \n",
       "4                                           0.305614   \n",
       "\n",
       "   epa_normalized_sequence_entropy_exponential_forgetting  \n",
       "0                                           0.000000       \n",
       "1                                           0.404580       \n",
       "2                                           0.363928       \n",
       "3                                           0.320282       \n",
       "4                                           0.377416       "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bpic_features = pd.read_csv(\"../data/BaselineED_feat.csv\", index_col=None)\n",
    "#bpic_features = pd.read_csv(\"../gedi/output/features/real_event_logs.csv\", index_col=None)\n",
    "\n",
    "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n",
    "print(bpic_features.shape)\n",
    "print(len(bpic_features), \" Event-Logs: \", bpic_features.sort_values('log')['log'].unique())\n",
    "\n",
    "#bpic_features.rename(columns={\"variant_entropy\":\"epa_variant_entropy\", \"normalized_variant_entropy\":\"epa_normalized_variant_entropy\", \"sequence_entropy\":\"epa_sequence_entropy\", \"normalized_sequence_entropy\":\"epa_normalized_sequence_entropy\", \"sequence_entropy_linear_forgetting\":\"epa_sequence_entropy_linear_forgetting\", \"normalized_sequence_entropy_linear_forgetting\":\"epa_normalized_sequence_entropy_linear_forgetting\", \"sequence_entropy_exponential_forgetting\":\"epa_sequence_entropy_exponential_forgetting\", \"normalized_sequence_entropy_exponential_forgetting\":\"epa_normalized_sequence_entropy_exponential_forgetting\"},\n",
    "#          errors=\"raise\", inplace=True)\n",
    "\n",
    "bpic_features.head()\n",
    "#bpic_features.to_csv(\"../data/BaselineED_feat.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "ef0df0b9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['ratio_variants_per_number_of_traces', 'ratio_most_common_variant', 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>log</th>\n",
       "      <th>ratio_variants_per_number_of_traces</th>\n",
       "      <th>ratio_most_common_variant</th>\n",
       "      <th>ratio_top_10_variants</th>\n",
       "      <th>epa_normalized_variant_entropy</th>\n",
       "      <th>epa_normalized_sequence_entropy</th>\n",
       "      <th>epa_normalized_sequence_entropy_linear_forgetting</th>\n",
       "      <th>epa_normalized_sequence_entropy_exponential_forgetting</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>BPIC16wm_p</td>\n",
       "      <td>0.002882</td>\n",
       "      <td>0.295803</td>\n",
       "      <td>0.714106</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>BPIC15f5</td>\n",
       "      <td>0.997405</td>\n",
       "      <td>0.001730</td>\n",
       "      <td>0.102076</td>\n",
       "      <td>0.648702</td>\n",
       "      <td>0.603260</td>\n",
       "      <td>0.342410</td>\n",
       "      <td>0.404580</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>BPIC15f1</td>\n",
       "      <td>0.975813</td>\n",
       "      <td>0.006672</td>\n",
       "      <td>0.121768</td>\n",
       "      <td>0.652855</td>\n",
       "      <td>0.610294</td>\n",
       "      <td>0.270241</td>\n",
       "      <td>0.363928</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>BPIC19</td>\n",
       "      <td>0.047562</td>\n",
       "      <td>0.199758</td>\n",
       "      <td>0.946368</td>\n",
       "      <td>0.645530</td>\n",
       "      <td>0.328029</td>\n",
       "      <td>0.320185</td>\n",
       "      <td>0.320282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>BPIC14dia_p</td>\n",
       "      <td>0.496847</td>\n",
       "      <td>0.037455</td>\n",
       "      <td>0.552836</td>\n",
       "      <td>0.774743</td>\n",
       "      <td>0.608350</td>\n",
       "      <td>0.305614</td>\n",
       "      <td>0.377416</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>BPIC15f2</td>\n",
       "      <td>0.995192</td>\n",
       "      <td>0.002404</td>\n",
       "      <td>0.103365</td>\n",
       "      <td>0.627973</td>\n",
       "      <td>0.602371</td>\n",
       "      <td>0.317217</td>\n",
       "      <td>0.390473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>BPIC15f3</td>\n",
       "      <td>0.957417</td>\n",
       "      <td>0.010646</td>\n",
       "      <td>0.137686</td>\n",
       "      <td>0.661781</td>\n",
       "      <td>0.605676</td>\n",
       "      <td>0.341521</td>\n",
       "      <td>0.404934</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>BPIC13cp</td>\n",
       "      <td>0.123067</td>\n",
       "      <td>0.331540</td>\n",
       "      <td>0.840619</td>\n",
       "      <td>0.705383</td>\n",
       "      <td>0.310940</td>\n",
       "      <td>0.286515</td>\n",
       "      <td>0.288383</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>BPIC14dc_p</td>\n",
       "      <td>0.048444</td>\n",
       "      <td>0.074944</td>\n",
       "      <td>0.765056</td>\n",
       "      <td>0.470758</td>\n",
       "      <td>0.419266</td>\n",
       "      <td>0.312599</td>\n",
       "      <td>0.326719</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>BPIC20a</td>\n",
       "      <td>0.009429</td>\n",
       "      <td>0.439810</td>\n",
       "      <td>0.950095</td>\n",
       "      <td>0.696474</td>\n",
       "      <td>0.164758</td>\n",
       "      <td>0.085439</td>\n",
       "      <td>0.104389</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>BPIC14di_p</td>\n",
       "      <td>0.000041</td>\n",
       "      <td>0.787081</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.044018</td>\n",
       "      <td>0.033322</td>\n",
       "      <td>0.034685</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>BPIC17ol</td>\n",
       "      <td>0.000372</td>\n",
       "      <td>0.380626</td>\n",
       "      <td>0.380626</td>\n",
       "      <td>0.813479</td>\n",
       "      <td>0.105130</td>\n",
       "      <td>0.052672</td>\n",
       "      <td>0.066000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>BPIC13op</td>\n",
       "      <td>0.131868</td>\n",
       "      <td>0.217338</td>\n",
       "      <td>0.769231</td>\n",
       "      <td>0.702960</td>\n",
       "      <td>0.276771</td>\n",
       "      <td>0.262094</td>\n",
       "      <td>0.263029</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>RTFMP</td>\n",
       "      <td>0.001536</td>\n",
       "      <td>0.375620</td>\n",
       "      <td>0.993104</td>\n",
       "      <td>0.769353</td>\n",
       "      <td>0.111932</td>\n",
       "      <td>0.052586</td>\n",
       "      <td>0.068442</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>BPIC20d</td>\n",
       "      <td>0.096236</td>\n",
       "      <td>0.271081</td>\n",
       "      <td>0.822773</td>\n",
       "      <td>0.723785</td>\n",
       "      <td>0.317044</td>\n",
       "      <td>0.184879</td>\n",
       "      <td>0.214387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>BPIC12</td>\n",
       "      <td>0.333614</td>\n",
       "      <td>0.262016</td>\n",
       "      <td>0.686254</td>\n",
       "      <td>0.708280</td>\n",
       "      <td>0.423074</td>\n",
       "      <td>0.226133</td>\n",
       "      <td>0.275551</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>RWABOCSL</td>\n",
       "      <td>0.080893</td>\n",
       "      <td>0.497211</td>\n",
       "      <td>0.887029</td>\n",
       "      <td>0.689363</td>\n",
       "      <td>0.235532</td>\n",
       "      <td>0.100603</td>\n",
       "      <td>0.138113</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>BPIC20e</td>\n",
       "      <td>0.012925</td>\n",
       "      <td>0.437264</td>\n",
       "      <td>0.933488</td>\n",
       "      <td>0.703735</td>\n",
       "      <td>0.189048</td>\n",
       "      <td>0.097572</td>\n",
       "      <td>0.118744</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>BPIC16c_p</td>\n",
       "      <td>0.438053</td>\n",
       "      <td>0.101770</td>\n",
       "      <td>0.424779</td>\n",
       "      <td>0.899497</td>\n",
       "      <td>0.683796</td>\n",
       "      <td>0.404685</td>\n",
       "      <td>0.470116</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>BPIC13inc</td>\n",
       "      <td>0.200026</td>\n",
       "      <td>0.232195</td>\n",
       "      <td>0.794414</td>\n",
       "      <td>0.717846</td>\n",
       "      <td>0.404651</td>\n",
       "      <td>0.391097</td>\n",
       "      <td>0.391625</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>BPIC15f4</td>\n",
       "      <td>0.996201</td>\n",
       "      <td>0.002849</td>\n",
       "      <td>0.102564</td>\n",
       "      <td>0.652985</td>\n",
       "      <td>0.603866</td>\n",
       "      <td>0.355927</td>\n",
       "      <td>0.412835</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>BPIC17</td>\n",
       "      <td>0.505570</td>\n",
       "      <td>0.033514</td>\n",
       "      <td>0.531340</td>\n",
       "      <td>0.741706</td>\n",
       "      <td>0.461565</td>\n",
       "      <td>0.231922</td>\n",
       "      <td>0.290464</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>BPIC20c</td>\n",
       "      <td>0.209200</td>\n",
       "      <td>0.135315</td>\n",
       "      <td>0.757537</td>\n",
       "      <td>0.733653</td>\n",
       "      <td>0.420150</td>\n",
       "      <td>0.137287</td>\n",
       "      <td>0.215490</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>BPIC20b</td>\n",
       "      <td>0.116762</td>\n",
       "      <td>0.212281</td>\n",
       "      <td>0.811289</td>\n",
       "      <td>0.758268</td>\n",
       "      <td>0.339380</td>\n",
       "      <td>0.145611</td>\n",
       "      <td>0.193753</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>HD</td>\n",
       "      <td>0.049345</td>\n",
       "      <td>0.516594</td>\n",
       "      <td>0.906332</td>\n",
       "      <td>0.799120</td>\n",
       "      <td>0.254066</td>\n",
       "      <td>0.118478</td>\n",
       "      <td>0.154576</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>SEPSIS</td>\n",
       "      <td>0.805714</td>\n",
       "      <td>0.033333</td>\n",
       "      <td>0.274286</td>\n",
       "      <td>0.695759</td>\n",
       "      <td>0.522343</td>\n",
       "      <td>0.219365</td>\n",
       "      <td>0.299505</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            log  ratio_variants_per_number_of_traces  \\\n",
       "0    BPIC16wm_p                             0.002882   \n",
       "1      BPIC15f5                             0.997405   \n",
       "2      BPIC15f1                             0.975813   \n",
       "3        BPIC19                             0.047562   \n",
       "4   BPIC14dia_p                             0.496847   \n",
       "5      BPIC15f2                             0.995192   \n",
       "6      BPIC15f3                             0.957417   \n",
       "7      BPIC13cp                             0.123067   \n",
       "8    BPIC14dc_p                             0.048444   \n",
       "9       BPIC20a                             0.009429   \n",
       "10   BPIC14di_p                             0.000041   \n",
       "11     BPIC17ol                             0.000372   \n",
       "12     BPIC13op                             0.131868   \n",
       "13        RTFMP                             0.001536   \n",
       "14      BPIC20d                             0.096236   \n",
       "15       BPIC12                             0.333614   \n",
       "16     RWABOCSL                             0.080893   \n",
       "17      BPIC20e                             0.012925   \n",
       "18    BPIC16c_p                             0.438053   \n",
       "19    BPIC13inc                             0.200026   \n",
       "20     BPIC15f4                             0.996201   \n",
       "21       BPIC17                             0.505570   \n",
       "22      BPIC20c                             0.209200   \n",
       "23      BPIC20b                             0.116762   \n",
       "24           HD                             0.049345   \n",
       "25       SEPSIS                             0.805714   \n",
       "\n",
       "    ratio_most_common_variant  ratio_top_10_variants  \\\n",
       "0                    0.295803               0.714106   \n",
       "1                    0.001730               0.102076   \n",
       "2                    0.006672               0.121768   \n",
       "3                    0.199758               0.946368   \n",
       "4                    0.037455               0.552836   \n",
       "5                    0.002404               0.103365   \n",
       "6                    0.010646               0.137686   \n",
       "7                    0.331540               0.840619   \n",
       "8                    0.074944               0.765056   \n",
       "9                    0.439810               0.950095   \n",
       "10                   0.787081               0.000000   \n",
       "11                   0.380626               0.380626   \n",
       "12                   0.217338               0.769231   \n",
       "13                   0.375620               0.993104   \n",
       "14                   0.271081               0.822773   \n",
       "15                   0.262016               0.686254   \n",
       "16                   0.497211               0.887029   \n",
       "17                   0.437264               0.933488   \n",
       "18                   0.101770               0.424779   \n",
       "19                   0.232195               0.794414   \n",
       "20                   0.002849               0.102564   \n",
       "21                   0.033514               0.531340   \n",
       "22                   0.135315               0.757537   \n",
       "23                   0.212281               0.811289   \n",
       "24                   0.516594               0.906332   \n",
       "25                   0.033333               0.274286   \n",
       "\n",
       "    epa_normalized_variant_entropy  epa_normalized_sequence_entropy  \\\n",
       "0                         0.000000                         0.000000   \n",
       "1                         0.648702                         0.603260   \n",
       "2                         0.652855                         0.610294   \n",
       "3                         0.645530                         0.328029   \n",
       "4                         0.774743                         0.608350   \n",
       "5                         0.627973                         0.602371   \n",
       "6                         0.661781                         0.605676   \n",
       "7                         0.705383                         0.310940   \n",
       "8                         0.470758                         0.419266   \n",
       "9                         0.696474                         0.164758   \n",
       "10                        1.000000                         0.044018   \n",
       "11                        0.813479                         0.105130   \n",
       "12                        0.702960                         0.276771   \n",
       "13                        0.769353                         0.111932   \n",
       "14                        0.723785                         0.317044   \n",
       "15                        0.708280                         0.423074   \n",
       "16                        0.689363                         0.235532   \n",
       "17                        0.703735                         0.189048   \n",
       "18                        0.899497                         0.683796   \n",
       "19                        0.717846                         0.404651   \n",
       "20                        0.652985                         0.603866   \n",
       "21                        0.741706                         0.461565   \n",
       "22                        0.733653                         0.420150   \n",
       "23                        0.758268                         0.339380   \n",
       "24                        0.799120                         0.254066   \n",
       "25                        0.695759                         0.522343   \n",
       "\n",
       "    epa_normalized_sequence_entropy_linear_forgetting  \\\n",
       "0                                            0.000000   \n",
       "1                                            0.342410   \n",
       "2                                            0.270241   \n",
       "3                                            0.320185   \n",
       "4                                            0.305614   \n",
       "5                                            0.317217   \n",
       "6                                            0.341521   \n",
       "7                                            0.286515   \n",
       "8                                            0.312599   \n",
       "9                                            0.085439   \n",
       "10                                           0.033322   \n",
       "11                                           0.052672   \n",
       "12                                           0.262094   \n",
       "13                                           0.052586   \n",
       "14                                           0.184879   \n",
       "15                                           0.226133   \n",
       "16                                           0.100603   \n",
       "17                                           0.097572   \n",
       "18                                           0.404685   \n",
       "19                                           0.391097   \n",
       "20                                           0.355927   \n",
       "21                                           0.231922   \n",
       "22                                           0.137287   \n",
       "23                                           0.145611   \n",
       "24                                           0.118478   \n",
       "25                                           0.219365   \n",
       "\n",
       "    epa_normalized_sequence_entropy_exponential_forgetting  \n",
       "0                                            0.000000       \n",
       "1                                            0.404580       \n",
       "2                                            0.363928       \n",
       "3                                            0.320282       \n",
       "4                                            0.377416       \n",
       "5                                            0.390473       \n",
       "6                                            0.404934       \n",
       "7                                            0.288383       \n",
       "8                                            0.326719       \n",
       "9                                            0.104389       \n",
       "10                                           0.034685       \n",
       "11                                           0.066000       \n",
       "12                                           0.263029       \n",
       "13                                           0.068442       \n",
       "14                                           0.214387       \n",
       "15                                           0.275551       \n",
       "16                                           0.138113       \n",
       "17                                           0.118744       \n",
       "18                                           0.470116       \n",
       "19                                           0.391625       \n",
       "20                                           0.412835       \n",
       "21                                           0.290464       \n",
       "22                                           0.215490       \n",
       "23                                           0.193753       \n",
       "24                                           0.154576       \n",
       "25                                           0.299505       "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bpic_stats = bpic_features.describe().transpose()\n",
    "normalized_feature_names = bpic_stats[(bpic_stats['min']>=0)&(bpic_stats['max']<=1)].index.to_list() \n",
    "normalized_feature_names = ['ratio_variants_per_number_of_traces', 'ratio_most_common_variant', \n",
    "                            'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n",
    "                            'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n",
    "print(normalized_feature_names)\n",
    "bpic_features[['log']+normalized_feature_names]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "44909860",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "21\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enself_rvpnot.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_rmcv_rvpnot.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_ense_enself.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_ense_enseef.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enve_rvpnot.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enseef_rt10v.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enself_rt10v.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enseef_enve.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_rmcv_rt10v.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enself_enve.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_ense_rvpnot.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enve_rt10v.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enseef_rmcv.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enself_rmcv.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enve_rmcv.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_ense_rt10v.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enseef_enself.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_ense_enve.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_rt10v_rvpnot.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_ense_rmcv.json\n",
      "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enseef_rvpnot.json\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "#Features between 0 and 1: \n",
    "def write_generator_bpic_experiment(objectives, n_para_obj=2):\n",
    "    parameters_o = \"objectives, \"\n",
    "    experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n",
    "    experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n",
    "    for exp in experiments:\n",
    "        experiment_path = os.path.join('..','data', 'BaselineED_feat')\n",
    "        os.makedirs(experiment_path, exist_ok=True)\n",
    "        experiment_path = os.path.join(experiment_path, f\"{len(exp)}_{abbrev_obj_keys(exp)}.csv\") \n",
    "\n",
    "\n",
    "        first_dir = os.path.split(experiment_path[3:])[-1].replace(\".csv\",\"\")\n",
    "        second_dir = first_dir.replace(\"grid_\",\"\").replace(\"objectives\",\"\")\n",
    "\n",
    "        experiment = [\n",
    "          {\n",
    "            'pipeline_step': 'event_logs_generation',\n",
    "            'output_path':'output/generated',\n",
    "            'generator_params': {\n",
    "              \"experiment\": {\"input_path\": \"data/BaselineED_feat.csv\",\n",
    "                \"objectives\": exp},\n",
    "              'config_space': {\n",
    "                'mode': [5, 20],\n",
    "                'sequence': [0.01, 1],\n",
    "                'choice': [0.01, 1],\n",
    "                'parallel': [0.01, 1],\n",
    "                'loop': [0.01, 1],\n",
    "                'silent': [0.01, 1],\n",
    "                'lt_dependency': [0.01, 1],\n",
    "                'num_traces': [10, 10001],\n",
    "                'duplicate': [0],\n",
    "                'or': [0]\n",
    "              },\n",
    "              'n_trials': 200\n",
    "            }\n",
    "          },\n",
    "          {\n",
    "            'pipeline_step': 'feature_extraction',\n",
    "            'input_path': os.path.join('output', 'features', 'generated', 'BaselineED_feat', first_dir),\n",
    "            'input_path': os.path.join('output', 'generated', 'BaselineED_feat', first_dir),\n",
    "            'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n",
    "            'feature_params': {\"feature_set\":[\"ratio_variants_per_number_of_traces\",\"ratio_most_common_variant\",\"ratio_top_10_variants\",\"epa_normalized_variant_entropy\",\"epa_normalized_sequence_entropy\",\"epa_normalized_sequence_entropy_linear_forgetting\",\"epa_normalized_sequence_entropy_exponential_forgetting\"]},\n",
    "            'output_path': 'output/plots',\n",
    "            'real_eventlog_path': 'data/BaselineED_feat.csv',\n",
    "            'plot_type': 'boxplot'\n",
    "          },\n",
    "          {\n",
    "            \"pipeline_step\": \"benchmark_test\",\n",
    "            \"benchmark_test\": \"discovery\",\n",
    "            \"input_path\": os.path.join('output', 'generated', 'BaselineED_feat', first_dir),\n",
    "            \"output_path\":\"output\",\n",
    "            \"miners\" : [\"heu\", \"imf\", \"ilp\"]\n",
    "          }\n",
    "        ]\n",
    "\n",
    "        output_path = os.path.join('..', 'config_files','algorithm','BaselineED_feat')\n",
    "        os.makedirs(output_path, exist_ok=True)\n",
    "        output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n",
    "\n",
    "        with open(output_path, 'w') as f:\n",
    "            json.dump(experiment, f, ensure_ascii=False)\n",
    "        print(f\"Saved experiment config in {output_path}\")\n",
    "        return experiment\n",
    "\n",
    "\n",
    "def create_objectives_grid(objectives, n_para_obj=2):\n",
    "    parameters_o = \"objectives, \"\n",
    "    experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n",
    "    experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n",
    "    print(len(experiments))\n",
    "    \n",
    "    for exp in experiments:\n",
    "        write_generator_bpic_experiment(objectives=exp)\n",
    "        \n",
    "exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=2)        \n",
    "print(exp_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b07e9753",
   "metadata": {},
   "source": [
    "## Single objective from real logs\n",
    "(Feature selection)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "d759a677",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7 experiments:  [('epa_normalized_sequence_entropy_linear_forgetting',), ('ratio_most_common_variant',), ('epa_normalized_sequence_entropy_exponential_forgetting',), ('epa_normalized_sequence_entropy',), ('ratio_top_10_variants',), ('ratio_variants_per_number_of_traces',), ('epa_normalized_variant_entropy',)]\n",
      "11\n",
      "Saved experiment in ../data/grid_experiments/grid_1objectives_enself.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enself.json\n",
      "Saved experiment in ../data/grid_experiments/grid_1objectives_rmcv.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rmcv.json\n",
      "Saved experiment in ../data/grid_experiments/grid_1objectives_enseef.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enseef.json\n",
      "Saved experiment in ../data/grid_experiments/grid_1objectives_ense.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_ense.json\n",
      "Saved experiment in ../data/grid_experiments/grid_1objectives_rt10v.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rt10v.json\n",
      "Saved experiment in ../data/grid_experiments/grid_1objectives_rvpnot.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rvpnot.json\n",
      "Saved experiment in ../data/grid_experiments/grid_1objectives_enve.csv\n",
      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enve.json\n",
      "None\n"
     ]
    }
   ],
   "source": [
    "def write_single_objective_experiment(experiment_path, objectives=[\"ratio_top_20_variants\", \"epa_normalized_sequence_entropy_linear_forgetting\"]):\n",
    "    first_dir = os.path.split(experiment_path[3:])[-1].replace(\".csv\",\"\")\n",
    "    second_dir = first_dir.replace(\"grid_\",\"\").replace(\"objectives\",\"\")\n",
    "\n",
    "    experiment = [\n",
    "      {\n",
    "        'pipeline_step': 'event_logs_generation',\n",
    "        'output_path':os.path.join('output','generated', 'grid_1obj'),\n",
    "        'generator_params': {\n",
    "          \"experiment\": {\"input_path\": experiment_path[3:],\n",
    "            \"objectives\": objectives},\n",
    "          'config_space': {\n",
    "            'mode': [5, 20],\n",
    "            'sequence': [0.01, 1],\n",
    "            'choice': [0.01, 1],\n",
    "            'parallel': [0.01, 1],\n",
    "            'loop': [0.01, 1],\n",
    "            'silent': [0.01, 1],\n",
    "            'lt_dependency': [0.01, 1],\n",
    "            'num_traces': [10, 10001],\n",
    "            'duplicate': [0],\n",
    "            'or': [0]\n",
    "          },\n",
    "          'n_trials': 200\n",
    "        }\n",
    "      },\n",
    "      {\n",
    "        'pipeline_step': 'feature_extraction',\n",
    "        'input_path': os.path.join('output','features', 'generated', 'grid_1obj', first_dir, second_dir),\n",
    "        'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n",
    "        'feature_params': {\"feature_set\":[\"ratio_variants_per_number_of_traces\",\"ratio_most_common_variant\",\"ratio_top_10_variants\",\"epa_normalized_variant_entropy\",\"epa_normalized_sequence_entropy\",\"epa_normalized_sequence_entropy_linear_forgetting\",\"epa_normalized_sequence_entropy_exponential_forgetting\"]},\n",
    "        'output_path': 'output/plots',\n",
    "        'real_eventlog_path': 'data/BaselineED_feat.csv',\n",
    "        'plot_type': 'boxplot'\n",
    "      },\n",
    "      {\n",
    "        \"pipeline_step\": \"benchmark_test\",\n",
    "        \"benchmark_test\": \"discovery\",\n",
    "        \"input_path\": os.path.join('output', 'generated', 'grid_1obj', first_dir, second_dir),\n",
    "        \"output_path\":\"output\",\n",
    "        \"miners\" : [\"heu\", \"imf\", \"ilp\"]\n",
    "      }\n",
    "    ]\n",
    "\n",
    "    #print(\"EXPERIMENT:\", experiment)\n",
    "    output_path = os.path.join('..', 'config_files','algorithm','grid_experiments')\n",
    "    os.makedirs(output_path, exist_ok=True)\n",
    "    output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n",
    "    with open(output_path, 'w') as f:\n",
    "        json.dump(experiment, f, ensure_ascii=False)\n",
    "    print(f\"Saved experiment config in {output_path}\")\n",
    "    \n",
    "    return experiment\n",
    "\n",
    "def create_objectives_grid(objectives, n_para_obj=2):\n",
    "    parameters_o = \"objectives, \"\n",
    "    if n_para_obj==1:\n",
    "        experiments = [[exp] for exp in objectives]\n",
    "    else:\n",
    "        experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n",
    "    experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n",
    "    print(len(experiments), \"experiments: \", experiments)\n",
    "    \n",
    "    parameters = \"np.around(np.arange(0, 1.1,0.1),2), \"\n",
    "    tasks = eval(f\"list(itertools.product({(parameters*n_para_obj)[:-2]}))\")\n",
    "    tasks = [(f'task_{i+1}',)+task for i, task in enumerate(tasks)]\n",
    "    print(len(tasks))\n",
    "    for exp in experiments:\n",
    "        df = pd.DataFrame(data=tasks, columns=[\"task\", *exp])\n",
    "        experiment_path = os.path.join('..','data', 'grid_experiments')\n",
    "        os.makedirs(experiment_path, exist_ok=True)\n",
    "        experiment_path = os.path.join(experiment_path, f\"grid_{len(df.columns)-1}objectives_{abbrev_obj_keys(exp)}.csv\") \n",
    "        df.to_csv(experiment_path, index=False)\n",
    "        print(f\"Saved experiment in {experiment_path}\")\n",
    "        write_single_objective_experiment(experiment_path, objectives=exp)\n",
    "    #df.to_csv(f\"../data/grid_{}objectives_{abbrev_obj_keys(objectives.tolist())}.csv\" ,index=False)\n",
    "        \n",
    "exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=1)        \n",
    "print(exp_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f9886f44",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "shampu",
   "language": "python",
   "name": "shampu"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}