{ "cells": [ { "cell_type": "markdown", "id": "08ee6ee0", "metadata": {}, "source": [ "## Grid Objectives\n", "Iterating between min and max for each column\n", "\n", "### Glossary\n", "- **task**: Refers to the set of values (row) and corresponding keys to be aimed at sequentially.\n", "- **objective**: Refers to one key (column) and respective value to be aimed at simultaneously during a task.\n", "- **experiment**: Refers to one file containing a multiple of objectives and tasks for a fixed number of each, respectively. " ] }, { "cell_type": "code", "execution_count": 1, "id": "e5aa7223", "metadata": {}, "outputs": [], "source": [ "import itertools\n", "import json\n", "import numpy as np\n", "import os\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "472fd031", "metadata": {}, "outputs": [], "source": [ "#Features between 0 and 1: \n", "\"\"\"\n", "normalized_feature_names = ['ratio_variants_per_number_of_traces', 'trace_len_hist1', 'trace_len_hist2',\n", " 'trace_len_hist3', 'trace_len_hist4', 'trace_len_hist5', 'trace_len_hist7',\n", " 'trace_len_hist8', 'trace_len_hist9', 'ratio_most_common_variant', \n", " 'ratio_top_1_variants', 'ratio_top_5_variants', 'ratio_top_10_variants', \n", " 'ratio_top_20_variants', 'ratio_top_50_variants', 'ratio_top_75_variants', \n", " 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n", " 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n", "\"\"\"\n", "normalized_feature_names = ['ratio_variants_per_number_of_traces', 'ratio_most_common_variant', \n", " 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n", " 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n", "def abbrev_obj_keys(obj_keys):\n", " abbreviated_keys = []\n", " for obj_key in obj_keys:\n", " key_slices = obj_key.split(\"_\")\n", " chars = []\n", " for key_slice in key_slices:\n", " for idx, single_char in enumerate(key_slice):\n", " if idx == 0 or single_char.isdigit():\n", " chars.append(single_char)\n", " abbreviated_key = ''.join(chars)\n", " abbreviated_keys.append(abbreviated_key)\n", " return '_'.join(abbreviated_keys) " ] }, { "cell_type": "code", "execution_count": 16, "id": "2be119c8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "TASKS np.around(np.arange(0.0, 1.5,0.5),2), np.around(np.arange(0.0, 1.5,0.5),2), \n", "21 [('mean_variant_occurrence', 'trace_len_coefficient_variation'), ('activities_std', 'eventropy_trace'), ('epa_normalized_variant_entropy', 'ratio_variants_per_number_of_traces'), ('activities_std', 'epa_normalized_variant_entropy'), ('eventropy_trace', 'trace_len_coefficient_variation'), ('ratio_variants_per_number_of_traces', 'trace_len_coefficient_variation'), ('activities_std', 'trace_len_coefficient_variation'), ('eventropy_trace', 'mean_variant_occurrence'), ('activities_std', 'mean_variant_occurrence'), ('epa_normalized_variant_entropy', 'eventropy_trace'), ('mean_variant_occurrence', 'start_activities_median'), ('ratio_variants_per_number_of_traces', 'start_activities_median'), ('eventropy_trace', 'start_activities_median'), ('activities_std', 'start_activities_median'), ('epa_normalized_variant_entropy', 'trace_len_coefficient_variation'), ('epa_normalized_variant_entropy', 'mean_variant_occurrence'), ('mean_variant_occurrence', 'ratio_variants_per_number_of_traces'), ('eventropy_trace', 'ratio_variants_per_number_of_traces'), ('start_activities_median', 'trace_len_coefficient_variation'), ('activities_std', 'ratio_variants_per_number_of_traces'), ('epa_normalized_variant_entropy', 'start_activities_median')]\n", "9\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_mvo_tlcv.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_mvo_tlcv.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_as_et.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_as_et.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rvpnot.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rvpnot.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_as_enve.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_as_enve.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_et_tlcv.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_et_tlcv.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_rvpnot_tlcv.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rvpnot_tlcv.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_as_tlcv.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_as_tlcv.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_et_mvo.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_et_mvo.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_as_mvo.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_as_mvo.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_et.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_et.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_mvo_sam.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_mvo_sam.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_rvpnot_sam.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rvpnot_sam.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_et_sam.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_et_sam.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_as_sam.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_as_sam.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_tlcv.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_tlcv.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_mvo.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_mvo.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_mvo_rvpnot.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_mvo_rvpnot.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_et_rvpnot.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_et_rvpnot.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_sam_tlcv.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_sam_tlcv.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_as_rvpnot.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_as_rvpnot.json\n", "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_sam.csv\n", "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_sam.json\n", "None\n" ] } ], "source": [ "def write_generator_experiment(experiment_path, objectives=[\"ratio_top_20_variants\", \"epa_normalized_sequence_entropy_linear_forgetting\"]):\n", " first_dir = os.path.split(experiment_path[3:])[-1].replace(\".csv\",\"\")\n", " second_dir = first_dir.replace(\"grid_\",\"\").replace(\"objectives\",\"\")\n", "\n", " experiment = [\n", " {\n", " 'pipeline_step': 'event_logs_generation',\n", " 'output_path':'output/generated/grid_2obj',\n", " 'generator_params': {\n", " \"experiment\": {\"input_path\": experiment_path[3:],\n", " \"objectives\": objectives},\n", " 'config_space': {\n", " 'mode': [5, 20],\n", " 'sequence': [0.01, 1],\n", " 'choice': [0.01, 1],\n", " 'parallel': [0.01, 1],\n", " 'loop': [0.01, 1],\n", " 'silent': [0.01, 1],\n", " 'lt_dependency': [0.01, 1],\n", " 'num_traces': [10, 10001],\n", " 'duplicate': [0],\n", " 'or': [0]\n", " },\n", " 'n_trials': 200\n", " }\n", " },\n", " {\n", " 'pipeline_step': 'feature_extraction',\n", " 'input_path': os.path.join('output','features', 'generated', 'grid_2obj', first_dir, second_dir),\n", " \"feature_params\": {\"feature_set\":[\"ratio_variants_per_number_of_traces\",\"ratio_most_common_variant\",\"ratio_top_10_variants\",\"epa_normalized_variant_entropy\",\"epa_normalized_sequence_entropy\",\"epa_normalized_sequence_entropy_linear_forgetting\",\"epa_normalized_sequence_entropy_exponential_forgetting\"]},\n", " 'output_path': 'output/plots',\n", " 'real_eventlog_path': 'data/BaselineED_feat.csv',\n", " 'plot_type': 'boxplot'\n", " },\n", " {\n", " \"pipeline_step\": \"benchmark_test\",\n", " \"benchmark_test\": \"discovery\",\n", " \"input_path\": os.path.join('output', 'generated', 'grid_2obj', first_dir, second_dir),\n", " \"output_path\":\"output\",\n", " \"miners\" : [\"heu\", \"imf\", \"ilp\"]\n", " }\n", " ]\n", "\n", " #print(\"EXPERIMENT:\", experiment[1]['input_path'])\n", " output_path = os.path.join('..', 'config_files','algorithm',f'grid_{len(objectives)}obj')\n", " os.makedirs(output_path, exist_ok=True)\n", " output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n", " with open(output_path, 'w') as f:\n", " json.dump(experiment, f, ensure_ascii=False)\n", " print(f\"Saved experiment config in {output_path}\")\n", " \n", " return experiment\n", "\n", "def create_objectives_grid(objectives, n_para_obj=2):\n", " parameters_o = \"objectives, \"\n", " if n_para_obj==len(objectives):\n", " experiments = [tuple(sorted(objectives))]\n", " print(len(experiments), experiments)\n", " parameters = get_ranges_from_data(sorted(objectives))\n", " tasks = eval(f\"list(itertools.product({parameters}))\")\n", " #tasks = eval(f\"list(itertools.product({(parameters*n_para_obj)[:-2]}))\")\n", " else: \n", " if n_para_obj==1:\n", " experiments = [[exp] for exp in objectives]\n", " else:\n", " experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n", " experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n", " parameters = \"np.around(np.arange(0.0, 1.5,0.5),2), \"\n", " tasks = eval(f\"list(itertools.product({(parameters*n_para_obj)[:-2]}))\")\n", " print(\"TASKS\", type(parameters), type(n_para_obj), parameters*n_para_obj)\n", " print(len(experiments), experiments)\n", "\n", " tasks = [(f'task_{i+1}',)+task for i, task in enumerate(tasks)]\n", " print(len(tasks))\n", " for exp in experiments:\n", " df = pd.DataFrame(data=tasks, columns=[\"task\", *exp])\n", " experiment_path = os.path.join('..','data', f'grid_{n_para_obj}obj')\n", " os.makedirs(experiment_path, exist_ok=True)\n", " experiment_path = os.path.join(experiment_path, f\"grid_{len(df.columns)-1}objectives_{abbrev_obj_keys(exp)}.csv\") \n", " df.to_csv(experiment_path, index=False)\n", " print(f\"Saved experiment in {experiment_path}\")\n", " write_generator_experiment(experiment_path, objectives=exp)\n", " #df.to_csv(f\"../data/grid_{}objectives_{abbrev_obj_keys(objectives.tolist())}.csv\" ,index=False)\n", "\n", "exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=2) \n", "print(exp_test)" ] }, { "cell_type": "markdown", "id": "9cc84ef2", "metadata": {}, "source": [ "## Grid Objectives\n", "Based on real ED ranges." ] }, { "cell_type": "code", "execution_count": 17, "id": "ae86005f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['ratio_variants_per_number_of_traces', 'trace_len_coefficient_variation', 'mean_variant_occurrence', 'activities_std', 'start_activities_median', 'eventropy_trace', 'epa_normalized_variant_entropy']\n", "ratio_variants_per_number_of_traces (4.081521591249218e-05, 0.4659094439111451, 0....\n", "trace_len_coefficient_variation (0.0, 0.6838390025070027, 4.744080106525514)\n", "mean_variant_occurrence (1.001552795031056, 838.6048767068644, 24500.6...\n", "activities_std (0.0, 12982.056069959535, 120522.24741658216)\n", "start_activities_median (1.0, 7975.705882352941, 150370.0)\n", "eventropy_trace (0.0, 6.2416470588235295, 13.362)\n", "epa_normalized_variant_entropy (0.0, 0.6773545645863115, 0.899497456838069)\n", "Name: range, dtype: object\n" ] }, { "data": { "text/plain": [ "'np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2)'" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "DF_PATH = \"../../shampu/data/bench_baseline_feat.csv\"\n", "def get_ranges_from_data(objectives, df_path = DF_PATH):\n", " #print(objectives)\n", " dmf = pd.read_csv(DF_PATH, index_col=None)\n", " dmf = dmf[objectives].describe()\n", " dmf = dmf.transpose()[['min', 'mean','max']]\n", " dmf['range'] = dmf.apply(lambda x: tuple([x['min'], x['mean'], x['max']]), axis=1)\n", " print(dmf['range'])\n", " #tasks = eval(f\"list(itertools.product({(parameters*n_para_obj)[:-2]}))\")\n", " result = [f\"np.around({x}, 2)\" for x in dmf['range']]\n", " result = \", \".join(result)\n", " return result\n", "\n", "print(normalized_feature_names)\n", "get_ranges_from_data(normalized_feature_names)" ] }, { "cell_type": "code", "execution_count": 18, "id": "a7a4c864", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1 [('activities_std', 'epa_normalized_variant_entropy', 'eventropy_trace', 'mean_variant_occurrence', 'ratio_variants_per_number_of_traces', 'start_activities_median', 'trace_len_coefficient_variation')]\n", "activities_std (0.0, 12982.056069959535, 120522.24741658216)\n", "epa_normalized_variant_entropy (0.0, 0.6773545645863115, 0.899497456838069)\n", "eventropy_trace (0.0, 6.2416470588235295, 13.362)\n", "mean_variant_occurrence (1.001552795031056, 838.6048767068644, 24500.6...\n", "ratio_variants_per_number_of_traces (4.081521591249218e-05, 0.4659094439111451, 0....\n", "start_activities_median (1.0, 7975.705882352941, 150370.0)\n", "trace_len_coefficient_variation (0.0, 0.6838390025070027, 4.744080106525514)\n", "Name: range, dtype: object\n", "TASKS np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2)np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2)np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2)np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2)np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2)np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2)np.around((0.0, 12982.056069959535, 120522.24741658216), 2), np.around((0.0, 0.6773545645863115, 0.899497456838069), 2), np.around((0.0, 6.2416470588235295, 13.362), 2), np.around((1.001552795031056, 838.6048767068644, 24500.666666666668), 2), np.around((4.081521591249218e-05, 0.4659094439111451, 0.9984496124031008), 2), np.around((1.0, 7975.705882352941, 150370.0), 2), np.around((0.0, 0.6838390025070027, 4.744080106525514), 2)\n", "1 [('activities_std', 'epa_normalized_variant_entropy', 'eventropy_trace', 'mean_variant_occurrence', 'ratio_variants_per_number_of_traces', 'start_activities_median', 'trace_len_coefficient_variation')]\n", "2187\n", "Saved experiment in ../data/grid_7obj/grid_7objectives_as_enve_et_mvo_rvpnot_sam_tlcv.csv\n", "Saved experiment config in ../config_files/algorithm/grid_7obj/generator_grid_7objectives_as_enve_et_mvo_rvpnot_sam_tlcv.json\n", "None\n" ] } ], "source": [ "normalized_feature_names = ['ratio_variants_per_number_of_traces', 'trace_len_coefficient_variation', 'mean_variant_occurrence', 'activities_std', 'start_activities_median', 'eventropy_trace', 'epa_normalized_variant_entropy']\n", "exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=len(normalized_feature_names)) \n", "print(exp_test)" ] }, { "cell_type": "markdown", "id": "56ab613b", "metadata": {}, "source": [ "### Helper prototypes" ] }, { "cell_type": "code", "execution_count": 6, "id": "dfd1a302", "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(columns=[\"log\",\"ratio_top_20_variants\", \"epa_normalized_sequence_entropy_linear_forgetting\"]) " ] }, { "cell_type": "code", "execution_count": 7, "id": "218946b7", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/d0/btmbyskx4t106_l2zghzln2w0000gn/T/ipykernel_12596/3751377549.py:7: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", " df = pd.concat([\n" ] } ], "source": [ "k=0\n", "for i in np.arange(0, 1.1,0.5):\n", " for j in np.arange(0,0.55,0.5):\n", " k+=1\n", " new_entry = pd.Series({'log':f\"objective_{k}\", \"ratio_top_20_variants\":round(i,1),\n", " \"epa_normalized_sequence_entropy_linear_forgetting\":round(j,1)})\n", " df = pd.concat([\n", " df, \n", " pd.DataFrame([new_entry], columns=new_entry.index)]\n", " ).reset_index(drop=True)\n", " " ] }, { "cell_type": "code", "execution_count": 8, "id": "b1e3bb5a", "metadata": {}, "outputs": [], "source": [ "df.to_csv(\"../data/grid_objectives.csv\" ,index=False)" ] }, { "cell_type": "markdown", "id": "c12bc19d", "metadata": {}, "source": [ "## Objectives from real logs\n", "(Feature selection)" ] }, { "cell_type": "code", "execution_count": 9, "id": "39ac74bb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(26, 8)\n", "26 Event-Logs: ['BPIC12' 'BPIC13cp' 'BPIC13inc' 'BPIC13op' 'BPIC14dc_p' 'BPIC14di_p'\n", " 'BPIC14dia_p' 'BPIC15f1' 'BPIC15f2' 'BPIC15f3' 'BPIC15f4' 'BPIC15f5'\n", " 'BPIC16c_p' 'BPIC16wm_p' 'BPIC17' 'BPIC17ol' 'BPIC19' 'BPIC20a' 'BPIC20b'\n", " 'BPIC20c' 'BPIC20d' 'BPIC20e' 'HD' 'RTFMP' 'RWABOCSL' 'SEPSIS']\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
logratio_variants_per_number_of_tracesratio_most_common_variantratio_top_10_variantsepa_normalized_variant_entropyepa_normalized_sequence_entropyepa_normalized_sequence_entropy_linear_forgettingepa_normalized_sequence_entropy_exponential_forgetting
0BPIC16wm_p0.0028820.2958030.7141060.0000000.0000000.0000000.000000
1BPIC15f50.9974050.0017300.1020760.6487020.6032600.3424100.404580
2BPIC15f10.9758130.0066720.1217680.6528550.6102940.2702410.363928
3BPIC190.0475620.1997580.9463680.6455300.3280290.3201850.320282
4BPIC14dia_p0.4968470.0374550.5528360.7747430.6083500.3056140.377416
\n", "
" ], "text/plain": [ " log ratio_variants_per_number_of_traces \\\n", "0 BPIC16wm_p 0.002882 \n", "1 BPIC15f5 0.997405 \n", "2 BPIC15f1 0.975813 \n", "3 BPIC19 0.047562 \n", "4 BPIC14dia_p 0.496847 \n", "\n", " ratio_most_common_variant ratio_top_10_variants \\\n", "0 0.295803 0.714106 \n", "1 0.001730 0.102076 \n", "2 0.006672 0.121768 \n", "3 0.199758 0.946368 \n", "4 0.037455 0.552836 \n", "\n", " epa_normalized_variant_entropy epa_normalized_sequence_entropy \\\n", "0 0.000000 0.000000 \n", "1 0.648702 0.603260 \n", "2 0.652855 0.610294 \n", "3 0.645530 0.328029 \n", "4 0.774743 0.608350 \n", "\n", " epa_normalized_sequence_entropy_linear_forgetting \\\n", "0 0.000000 \n", "1 0.342410 \n", "2 0.270241 \n", "3 0.320185 \n", "4 0.305614 \n", "\n", " epa_normalized_sequence_entropy_exponential_forgetting \n", "0 0.000000 \n", "1 0.404580 \n", "2 0.363928 \n", "3 0.320282 \n", "4 0.377416 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bpic_features = pd.read_csv(\"../data/BaselineED_feat.csv\", index_col=None)\n", "#bpic_features = pd.read_csv(\"../gedi/output/features/real_event_logs.csv\", index_col=None)\n", "\n", "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n", "print(bpic_features.shape)\n", "print(len(bpic_features), \" Event-Logs: \", bpic_features.sort_values('log')['log'].unique())\n", "\n", "#bpic_features.rename(columns={\"variant_entropy\":\"epa_variant_entropy\", \"normalized_variant_entropy\":\"epa_normalized_variant_entropy\", \"sequence_entropy\":\"epa_sequence_entropy\", \"normalized_sequence_entropy\":\"epa_normalized_sequence_entropy\", \"sequence_entropy_linear_forgetting\":\"epa_sequence_entropy_linear_forgetting\", \"normalized_sequence_entropy_linear_forgetting\":\"epa_normalized_sequence_entropy_linear_forgetting\", \"sequence_entropy_exponential_forgetting\":\"epa_sequence_entropy_exponential_forgetting\", \"normalized_sequence_entropy_exponential_forgetting\":\"epa_normalized_sequence_entropy_exponential_forgetting\"},\n", "# errors=\"raise\", inplace=True)\n", "\n", "bpic_features.head()\n", "#bpic_features.to_csv(\"../data/BaselineED_feat.csv\", index=False)" ] }, { "cell_type": "code", "execution_count": 10, "id": "ef0df0b9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['ratio_variants_per_number_of_traces', 'ratio_most_common_variant', 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
logratio_variants_per_number_of_tracesratio_most_common_variantratio_top_10_variantsepa_normalized_variant_entropyepa_normalized_sequence_entropyepa_normalized_sequence_entropy_linear_forgettingepa_normalized_sequence_entropy_exponential_forgetting
0BPIC16wm_p0.0028820.2958030.7141060.0000000.0000000.0000000.000000
1BPIC15f50.9974050.0017300.1020760.6487020.6032600.3424100.404580
2BPIC15f10.9758130.0066720.1217680.6528550.6102940.2702410.363928
3BPIC190.0475620.1997580.9463680.6455300.3280290.3201850.320282
4BPIC14dia_p0.4968470.0374550.5528360.7747430.6083500.3056140.377416
5BPIC15f20.9951920.0024040.1033650.6279730.6023710.3172170.390473
6BPIC15f30.9574170.0106460.1376860.6617810.6056760.3415210.404934
7BPIC13cp0.1230670.3315400.8406190.7053830.3109400.2865150.288383
8BPIC14dc_p0.0484440.0749440.7650560.4707580.4192660.3125990.326719
9BPIC20a0.0094290.4398100.9500950.6964740.1647580.0854390.104389
10BPIC14di_p0.0000410.7870810.0000001.0000000.0440180.0333220.034685
11BPIC17ol0.0003720.3806260.3806260.8134790.1051300.0526720.066000
12BPIC13op0.1318680.2173380.7692310.7029600.2767710.2620940.263029
13RTFMP0.0015360.3756200.9931040.7693530.1119320.0525860.068442
14BPIC20d0.0962360.2710810.8227730.7237850.3170440.1848790.214387
15BPIC120.3336140.2620160.6862540.7082800.4230740.2261330.275551
16RWABOCSL0.0808930.4972110.8870290.6893630.2355320.1006030.138113
17BPIC20e0.0129250.4372640.9334880.7037350.1890480.0975720.118744
18BPIC16c_p0.4380530.1017700.4247790.8994970.6837960.4046850.470116
19BPIC13inc0.2000260.2321950.7944140.7178460.4046510.3910970.391625
20BPIC15f40.9962010.0028490.1025640.6529850.6038660.3559270.412835
21BPIC170.5055700.0335140.5313400.7417060.4615650.2319220.290464
22BPIC20c0.2092000.1353150.7575370.7336530.4201500.1372870.215490
23BPIC20b0.1167620.2122810.8112890.7582680.3393800.1456110.193753
24HD0.0493450.5165940.9063320.7991200.2540660.1184780.154576
25SEPSIS0.8057140.0333330.2742860.6957590.5223430.2193650.299505
\n", "
" ], "text/plain": [ " log ratio_variants_per_number_of_traces \\\n", "0 BPIC16wm_p 0.002882 \n", "1 BPIC15f5 0.997405 \n", "2 BPIC15f1 0.975813 \n", "3 BPIC19 0.047562 \n", "4 BPIC14dia_p 0.496847 \n", "5 BPIC15f2 0.995192 \n", "6 BPIC15f3 0.957417 \n", "7 BPIC13cp 0.123067 \n", "8 BPIC14dc_p 0.048444 \n", "9 BPIC20a 0.009429 \n", "10 BPIC14di_p 0.000041 \n", "11 BPIC17ol 0.000372 \n", "12 BPIC13op 0.131868 \n", "13 RTFMP 0.001536 \n", "14 BPIC20d 0.096236 \n", "15 BPIC12 0.333614 \n", "16 RWABOCSL 0.080893 \n", "17 BPIC20e 0.012925 \n", "18 BPIC16c_p 0.438053 \n", "19 BPIC13inc 0.200026 \n", "20 BPIC15f4 0.996201 \n", "21 BPIC17 0.505570 \n", "22 BPIC20c 0.209200 \n", "23 BPIC20b 0.116762 \n", "24 HD 0.049345 \n", "25 SEPSIS 0.805714 \n", "\n", " ratio_most_common_variant ratio_top_10_variants \\\n", "0 0.295803 0.714106 \n", "1 0.001730 0.102076 \n", "2 0.006672 0.121768 \n", "3 0.199758 0.946368 \n", "4 0.037455 0.552836 \n", "5 0.002404 0.103365 \n", "6 0.010646 0.137686 \n", "7 0.331540 0.840619 \n", "8 0.074944 0.765056 \n", "9 0.439810 0.950095 \n", "10 0.787081 0.000000 \n", "11 0.380626 0.380626 \n", "12 0.217338 0.769231 \n", "13 0.375620 0.993104 \n", "14 0.271081 0.822773 \n", "15 0.262016 0.686254 \n", "16 0.497211 0.887029 \n", "17 0.437264 0.933488 \n", "18 0.101770 0.424779 \n", "19 0.232195 0.794414 \n", "20 0.002849 0.102564 \n", "21 0.033514 0.531340 \n", "22 0.135315 0.757537 \n", "23 0.212281 0.811289 \n", "24 0.516594 0.906332 \n", "25 0.033333 0.274286 \n", "\n", " epa_normalized_variant_entropy epa_normalized_sequence_entropy \\\n", "0 0.000000 0.000000 \n", "1 0.648702 0.603260 \n", "2 0.652855 0.610294 \n", "3 0.645530 0.328029 \n", "4 0.774743 0.608350 \n", "5 0.627973 0.602371 \n", "6 0.661781 0.605676 \n", "7 0.705383 0.310940 \n", "8 0.470758 0.419266 \n", "9 0.696474 0.164758 \n", "10 1.000000 0.044018 \n", "11 0.813479 0.105130 \n", "12 0.702960 0.276771 \n", "13 0.769353 0.111932 \n", "14 0.723785 0.317044 \n", "15 0.708280 0.423074 \n", "16 0.689363 0.235532 \n", "17 0.703735 0.189048 \n", "18 0.899497 0.683796 \n", "19 0.717846 0.404651 \n", "20 0.652985 0.603866 \n", "21 0.741706 0.461565 \n", "22 0.733653 0.420150 \n", "23 0.758268 0.339380 \n", "24 0.799120 0.254066 \n", "25 0.695759 0.522343 \n", "\n", " epa_normalized_sequence_entropy_linear_forgetting \\\n", "0 0.000000 \n", "1 0.342410 \n", "2 0.270241 \n", "3 0.320185 \n", "4 0.305614 \n", "5 0.317217 \n", "6 0.341521 \n", "7 0.286515 \n", "8 0.312599 \n", "9 0.085439 \n", "10 0.033322 \n", "11 0.052672 \n", "12 0.262094 \n", "13 0.052586 \n", "14 0.184879 \n", "15 0.226133 \n", "16 0.100603 \n", "17 0.097572 \n", "18 0.404685 \n", "19 0.391097 \n", "20 0.355927 \n", "21 0.231922 \n", "22 0.137287 \n", "23 0.145611 \n", "24 0.118478 \n", "25 0.219365 \n", "\n", " epa_normalized_sequence_entropy_exponential_forgetting \n", "0 0.000000 \n", "1 0.404580 \n", "2 0.363928 \n", "3 0.320282 \n", "4 0.377416 \n", "5 0.390473 \n", "6 0.404934 \n", "7 0.288383 \n", "8 0.326719 \n", "9 0.104389 \n", "10 0.034685 \n", "11 0.066000 \n", "12 0.263029 \n", "13 0.068442 \n", "14 0.214387 \n", "15 0.275551 \n", "16 0.138113 \n", "17 0.118744 \n", "18 0.470116 \n", "19 0.391625 \n", "20 0.412835 \n", "21 0.290464 \n", "22 0.215490 \n", "23 0.193753 \n", "24 0.154576 \n", "25 0.299505 " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bpic_stats = bpic_features.describe().transpose()\n", "normalized_feature_names = bpic_stats[(bpic_stats['min']>=0)&(bpic_stats['max']<=1)].index.to_list() \n", "normalized_feature_names = ['ratio_variants_per_number_of_traces', 'ratio_most_common_variant', \n", " 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n", " 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n", "print(normalized_feature_names)\n", "bpic_features[['log']+normalized_feature_names]" ] }, { "cell_type": "code", "execution_count": 11, "id": "44909860", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "21\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enself_rvpnot.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_rmcv_rvpnot.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_ense_enself.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_ense_enseef.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enve_rvpnot.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enseef_rt10v.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enself_rt10v.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enseef_enve.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_rmcv_rt10v.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enself_enve.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_ense_rvpnot.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enve_rt10v.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enseef_rmcv.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enself_rmcv.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enve_rmcv.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_ense_rt10v.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enseef_enself.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_ense_enve.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_rt10v_rvpnot.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_ense_rmcv.json\n", "Saved experiment config in ../config_files/algorithm/BaselineED_feat/generator_2_enseef_rvpnot.json\n", "None\n" ] } ], "source": [ "#Features between 0 and 1: \n", "def write_generator_bpic_experiment(objectives, n_para_obj=2):\n", " parameters_o = \"objectives, \"\n", " experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n", " experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n", " for exp in experiments:\n", " experiment_path = os.path.join('..','data', 'BaselineED_feat')\n", " os.makedirs(experiment_path, exist_ok=True)\n", " experiment_path = os.path.join(experiment_path, f\"{len(exp)}_{abbrev_obj_keys(exp)}.csv\") \n", "\n", "\n", " first_dir = os.path.split(experiment_path[3:])[-1].replace(\".csv\",\"\")\n", " second_dir = first_dir.replace(\"grid_\",\"\").replace(\"objectives\",\"\")\n", "\n", " experiment = [\n", " {\n", " 'pipeline_step': 'event_logs_generation',\n", " 'output_path':'output/generated',\n", " 'generator_params': {\n", " \"experiment\": {\"input_path\": \"data/BaselineED_feat.csv\",\n", " \"objectives\": exp},\n", " 'config_space': {\n", " 'mode': [5, 20],\n", " 'sequence': [0.01, 1],\n", " 'choice': [0.01, 1],\n", " 'parallel': [0.01, 1],\n", " 'loop': [0.01, 1],\n", " 'silent': [0.01, 1],\n", " 'lt_dependency': [0.01, 1],\n", " 'num_traces': [10, 10001],\n", " 'duplicate': [0],\n", " 'or': [0]\n", " },\n", " 'n_trials': 200\n", " }\n", " },\n", " {\n", " 'pipeline_step': 'feature_extraction',\n", " 'input_path': os.path.join('output', 'features', 'generated', 'BaselineED_feat', first_dir),\n", " 'input_path': os.path.join('output', 'generated', 'BaselineED_feat', first_dir),\n", " 'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n", " 'feature_params': {\"feature_set\":[\"ratio_variants_per_number_of_traces\",\"ratio_most_common_variant\",\"ratio_top_10_variants\",\"epa_normalized_variant_entropy\",\"epa_normalized_sequence_entropy\",\"epa_normalized_sequence_entropy_linear_forgetting\",\"epa_normalized_sequence_entropy_exponential_forgetting\"]},\n", " 'output_path': 'output/plots',\n", " 'real_eventlog_path': 'data/BaselineED_feat.csv',\n", " 'plot_type': 'boxplot'\n", " },\n", " {\n", " \"pipeline_step\": \"benchmark_test\",\n", " \"benchmark_test\": \"discovery\",\n", " \"input_path\": os.path.join('output', 'generated', 'BaselineED_feat', first_dir),\n", " \"output_path\":\"output\",\n", " \"miners\" : [\"heu\", \"imf\", \"ilp\"]\n", " }\n", " ]\n", "\n", " output_path = os.path.join('..', 'config_files','algorithm','BaselineED_feat')\n", " os.makedirs(output_path, exist_ok=True)\n", " output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n", "\n", " with open(output_path, 'w') as f:\n", " json.dump(experiment, f, ensure_ascii=False)\n", " print(f\"Saved experiment config in {output_path}\")\n", " return experiment\n", "\n", "\n", "def create_objectives_grid(objectives, n_para_obj=2):\n", " parameters_o = \"objectives, \"\n", " experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n", " experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n", " print(len(experiments))\n", " \n", " for exp in experiments:\n", " write_generator_bpic_experiment(objectives=exp)\n", " \n", "exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=2) \n", "print(exp_test)" ] }, { "cell_type": "markdown", "id": "b07e9753", "metadata": {}, "source": [ "## Single objective from real logs\n", "(Feature selection)" ] }, { "cell_type": "code", "execution_count": 12, "id": "d759a677", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "7 experiments: [('epa_normalized_sequence_entropy_linear_forgetting',), ('ratio_most_common_variant',), ('epa_normalized_sequence_entropy_exponential_forgetting',), ('epa_normalized_sequence_entropy',), ('ratio_top_10_variants',), ('ratio_variants_per_number_of_traces',), ('epa_normalized_variant_entropy',)]\n", "11\n", "Saved experiment in ../data/grid_experiments/grid_1objectives_enself.csv\n", "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enself.json\n", "Saved experiment in ../data/grid_experiments/grid_1objectives_rmcv.csv\n", "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rmcv.json\n", "Saved experiment in ../data/grid_experiments/grid_1objectives_enseef.csv\n", "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enseef.json\n", "Saved experiment in ../data/grid_experiments/grid_1objectives_ense.csv\n", "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_ense.json\n", "Saved experiment in ../data/grid_experiments/grid_1objectives_rt10v.csv\n", "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rt10v.json\n", "Saved experiment in ../data/grid_experiments/grid_1objectives_rvpnot.csv\n", "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rvpnot.json\n", "Saved experiment in ../data/grid_experiments/grid_1objectives_enve.csv\n", "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enve.json\n", "None\n" ] } ], "source": [ "def write_single_objective_experiment(experiment_path, objectives=[\"ratio_top_20_variants\", \"epa_normalized_sequence_entropy_linear_forgetting\"]):\n", " first_dir = os.path.split(experiment_path[3:])[-1].replace(\".csv\",\"\")\n", " second_dir = first_dir.replace(\"grid_\",\"\").replace(\"objectives\",\"\")\n", "\n", " experiment = [\n", " {\n", " 'pipeline_step': 'event_logs_generation',\n", " 'output_path':os.path.join('output','generated', 'grid_1obj'),\n", " 'generator_params': {\n", " \"experiment\": {\"input_path\": experiment_path[3:],\n", " \"objectives\": objectives},\n", " 'config_space': {\n", " 'mode': [5, 20],\n", " 'sequence': [0.01, 1],\n", " 'choice': [0.01, 1],\n", " 'parallel': [0.01, 1],\n", " 'loop': [0.01, 1],\n", " 'silent': [0.01, 1],\n", " 'lt_dependency': [0.01, 1],\n", " 'num_traces': [10, 10001],\n", " 'duplicate': [0],\n", " 'or': [0]\n", " },\n", " 'n_trials': 200\n", " }\n", " },\n", " {\n", " 'pipeline_step': 'feature_extraction',\n", " 'input_path': os.path.join('output','features', 'generated', 'grid_1obj', first_dir, second_dir),\n", " 'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n", " 'feature_params': {\"feature_set\":[\"ratio_variants_per_number_of_traces\",\"ratio_most_common_variant\",\"ratio_top_10_variants\",\"epa_normalized_variant_entropy\",\"epa_normalized_sequence_entropy\",\"epa_normalized_sequence_entropy_linear_forgetting\",\"epa_normalized_sequence_entropy_exponential_forgetting\"]},\n", " 'output_path': 'output/plots',\n", " 'real_eventlog_path': 'data/BaselineED_feat.csv',\n", " 'plot_type': 'boxplot'\n", " },\n", " {\n", " \"pipeline_step\": \"benchmark_test\",\n", " \"benchmark_test\": \"discovery\",\n", " \"input_path\": os.path.join('output', 'generated', 'grid_1obj', first_dir, second_dir),\n", " \"output_path\":\"output\",\n", " \"miners\" : [\"heu\", \"imf\", \"ilp\"]\n", " }\n", " ]\n", "\n", " #print(\"EXPERIMENT:\", experiment)\n", " output_path = os.path.join('..', 'config_files','algorithm','grid_experiments')\n", " os.makedirs(output_path, exist_ok=True)\n", " output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n", " with open(output_path, 'w') as f:\n", " json.dump(experiment, f, ensure_ascii=False)\n", " print(f\"Saved experiment config in {output_path}\")\n", " \n", " return experiment\n", "\n", "def create_objectives_grid(objectives, n_para_obj=2):\n", " parameters_o = \"objectives, \"\n", " if n_para_obj==1:\n", " experiments = [[exp] for exp in objectives]\n", " else:\n", " experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n", " experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n", " print(len(experiments), \"experiments: \", experiments)\n", " \n", " parameters = \"np.around(np.arange(0, 1.1,0.1),2), \"\n", " tasks = eval(f\"list(itertools.product({(parameters*n_para_obj)[:-2]}))\")\n", " tasks = [(f'task_{i+1}',)+task for i, task in enumerate(tasks)]\n", " print(len(tasks))\n", " for exp in experiments:\n", " df = pd.DataFrame(data=tasks, columns=[\"task\", *exp])\n", " experiment_path = os.path.join('..','data', 'grid_experiments')\n", " os.makedirs(experiment_path, exist_ok=True)\n", " experiment_path = os.path.join(experiment_path, f\"grid_{len(df.columns)-1}objectives_{abbrev_obj_keys(exp)}.csv\") \n", " df.to_csv(experiment_path, index=False)\n", " print(f\"Saved experiment in {experiment_path}\")\n", " write_single_objective_experiment(experiment_path, objectives=exp)\n", " #df.to_csv(f\"../data/grid_{}objectives_{abbrev_obj_keys(objectives.tolist())}.csv\" ,index=False)\n", " \n", "exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=1) \n", "print(exp_test)" ] }, { "cell_type": "code", "execution_count": null, "id": "f9886f44", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "shampu", "language": "python", "name": "shampu" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.19" } }, "nbformat": 4, "nbformat_minor": 5 }