diff --git "a/notebooks/experiment_generator.ipynb" "b/notebooks/experiment_generator.ipynb" new file mode 100644--- /dev/null +++ "b/notebooks/experiment_generator.ipynb" @@ -0,0 +1,3110 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "08ee6ee0", + "metadata": {}, + "source": [ + "## Grid Objectives\n", + "Iterating between min and max for each column\n", + "\n", + "### Glossary\n", + "- **task**: Refers to the set of values (row) and corresponding keys to be aimed at sequentially.\n", + "- **objective**: Refers to one key (column) and respective value to be aimed at simultaneously during a task.\n", + "- **experiment**: Refers to one file containing a multiple of objectives and tasks for a fixed number of each, respectively. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "e5aa7223", + "metadata": {}, + "outputs": [], + "source": [ + "import itertools\n", + "import json\n", + "import numpy as np\n", + "import os\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "472fd031", + "metadata": {}, + "outputs": [], + "source": [ + "#Features between 0 and 1: \n", + "normalized_feature_names = ['ratio_unique_traces_per_trace', 'trace_len_hist1', 'trace_len_hist2',\n", + " 'trace_len_hist3', 'trace_len_hist4', 'trace_len_hist5', 'trace_len_hist7',\n", + " 'trace_len_hist8', 'trace_len_hist9', 'ratio_most_common_variant', \n", + " 'ratio_top_1_variants', 'ratio_top_5_variants', 'ratio_top_10_variants', \n", + " 'ratio_top_20_variants', 'ratio_top_50_variants', 'ratio_top_75_variants', \n", + " 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n", + " 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n", + "\n", + "normalized_feature_names = ['ratio_unique_traces_per_trace', 'ratio_most_common_variant', \n", + " 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n", + " 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n", + "\n", + "def abbrev_obj_keys(obj_keys):\n", + " abbreviated_keys = []\n", + " for obj_key in obj_keys:\n", + " key_slices = obj_key.split(\"_\")\n", + " chars = []\n", + " for key_slice in key_slices:\n", + " for idx, single_char in enumerate(key_slice):\n", + " if idx == 0 or single_char.isdigit():\n", + " chars.append(single_char)\n", + " abbreviated_key = ''.join(chars)\n", + " abbreviated_keys.append(abbreviated_key)\n", + " return '_'.join(abbreviated_keys) " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "2be119c8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "21 [('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_unique_traces_per_trace'), ('ratio_top_10_variants', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy', 'ratio_most_common_variant'), ('ratio_most_common_variant', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_most_common_variant'), ('epa_normalized_variant_entropy', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy', 'ratio_top_10_variants'), ('ratio_most_common_variant', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_most_common_variant'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_variant_entropy', 'ratio_unique_traces_per_trace'), ('epa_normalized_variant_entropy', 'ratio_most_common_variant'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_exponential_forgetting'), ('epa_normalized_sequence_entropy', 'ratio_unique_traces_per_trace')]\n", + "121\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rt10v.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rt10v.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rutpt.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rutpt.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_rt10v_rutpt.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rt10v_rutpt.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rmcv.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rmcv.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_rmcv_rt10v.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rt10v.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enself.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enself.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enve.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enve.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rmcv.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rmcv.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rt10v.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rt10v.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_enself.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enself.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_enve.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enve.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rutpt.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rutpt.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rt10v.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rt10v.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_rmcv_rutpt.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rutpt.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rmcv.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rmcv.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rt10v.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rt10v.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_enve.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_enve.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rutpt.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rutpt.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rmcv.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rmcv.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enseef.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enseef.json\n", + "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rutpt.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rutpt.json\n", + "None\n" + ] + } + ], + "source": [ + "def write_generator_experiment(experiment_path, objectives=[\"ratio_top_20_variants\", \"epa_normalized_sequence_entropy_linear_forgetting\"]):\n", + " first_dir = os.path.split(experiment_path[3:])[-1].replace(\".csv\",\"\")\n", + " second_dir = first_dir.replace(\"grid_\",\"\").replace(\"objectives\",\"\")\n", + "\n", + " experiment = [\n", + " {\n", + " 'pipeline_step': 'event_logs_generation',\n", + " 'output_path':'output/generated/grid_2obj',\n", + " 'generator_params': {\n", + " \"experiment\": {\"input_path\": experiment_path[3:],\n", + " \"objectives\": objectives},\n", + " 'config_space': {\n", + " 'mode': [5, 20],\n", + " 'sequence': [0.01, 1],\n", + " 'choice': [0.01, 1],\n", + " 'parallel': [0.01, 1],\n", + " 'loop': [0.01, 1],\n", + " 'silent': [0.01, 1],\n", + " 'lt_dependency': [0.01, 1],\n", + " 'num_traces': [10, 10001],\n", + " 'duplicate': [0],\n", + " 'or': [0]\n", + " },\n", + " 'n_trials': 200\n", + " }\n", + " },\n", + " {\n", + " 'pipeline_step': 'feature_extraction',\n", + " 'input_path': os.path.join('output','features', 'generated', 'grid_2obj', first_dir, second_dir),\n", + " 'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n", + " 'output_path': 'output/plots',\n", + " 'real_eventlog_path': 'data/34_bpic_features.csv',\n", + " 'plot_type': 'boxplot'\n", + " }\n", + " ]\n", + "\n", + " #print(\"EXPERIMENT:\", experiment[1]['input_path'])\n", + " output_path = os.path.join('..', 'config_files','algorithm','grid_2obj')\n", + " os.makedirs(output_path, exist_ok=True)\n", + " output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n", + " with open(output_path, 'w') as f:\n", + " json.dump(experiment, f, ensure_ascii=False)\n", + " print(f\"Saved experiment config in {output_path}\")\n", + " \n", + " return experiment\n", + "\n", + "def create_objectives_grid(objectives, n_para_obj=2):\n", + " parameters_o = \"objectives, \"\n", + " if n_para_obj==1:\n", + " experiments = [[exp] for exp in objectives]\n", + " else:\n", + " experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n", + " experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n", + " print(len(experiments), experiments)\n", + " \n", + " parameters = \"np.around(np.arange(0, 1.1,0.1),2), \"\n", + " tasks = eval(f\"list(itertools.product({(parameters*n_para_obj)[:-2]}))\")\n", + " tasks = [(f'task_{i+1}',)+task for i, task in enumerate(tasks)]\n", + " print(len(tasks))\n", + " for exp in experiments:\n", + " df = pd.DataFrame(data=tasks, columns=[\"task\", *exp])\n", + " experiment_path = os.path.join('..','data', 'grid_2obj')\n", + " os.makedirs(experiment_path, exist_ok=True)\n", + " experiment_path = os.path.join(experiment_path, f\"grid_{len(df.columns)-1}objectives_{abbrev_obj_keys(exp)}.csv\") \n", + " df.to_csv(experiment_path, index=False)\n", + " print(f\"Saved experiment in {experiment_path}\")\n", + " write_generator_experiment(experiment_path, objectives=exp)\n", + " #df.to_csv(f\"../data/grid_{}objectives_{abbrev_obj_keys(objectives.tolist())}.csv\" ,index=False)\n", + " \n", + "exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=2) \n", + "print(exp_test)" + ] + }, + { + "cell_type": "markdown", + "id": "56ab613b", + "metadata": {}, + "source": [ + "### Helper prototypes" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "dfd1a302", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(columns=[\"log\",\"ratio_top_20_variants\", \"epa_normalized_sequence_entropy_linear_forgetting\"]) " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "218946b7", + "metadata": {}, + "outputs": [], + "source": [ + "k=0\n", + "for i in np.arange(0, 1.1,0.2):\n", + " for j in np.arange(0,0.55,0.1):\n", + " k+=1\n", + " new_entry = pd.Series({'log':f\"objective_{k}\", \"ratio_top_20_variants\":round(i,1),\n", + " \"epa_normalized_sequence_entropy_linear_forgetting\":round(j,1)})\n", + " df = pd.concat([\n", + " df, \n", + " pd.DataFrame([new_entry], columns=new_entry.index)]\n", + " ).reset_index(drop=True)\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "b1e3bb5a", + "metadata": {}, + "outputs": [], + "source": [ + "df.to_csv(\"../data/grid_objectives.csv\" ,index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "5de45389", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
logratio_top_20_variantsnormalized_sequence_entropy_linear_forgetting
0objective_10.00.0
1objective_20.00.1
2objective_30.00.2
3objective_40.00.3
4objective_50.00.4
5objective_60.00.5
6objective_70.20.0
7objective_80.20.1
8objective_90.20.2
9objective_100.20.3
10objective_110.20.4
11objective_120.20.5
12objective_130.40.0
13objective_140.40.1
14objective_150.40.2
15objective_160.40.3
16objective_170.40.4
17objective_180.40.5
18objective_190.60.0
19objective_200.60.1
20objective_210.60.2
21objective_220.60.3
22objective_230.60.4
23objective_240.60.5
24objective_250.80.0
25objective_260.80.1
26objective_270.80.2
27objective_280.80.3
28objective_290.80.4
29objective_300.80.5
30objective_311.00.0
31objective_321.00.1
32objective_331.00.2
33objective_341.00.3
34objective_351.00.4
35objective_361.00.5
\n", + "
" + ], + "text/plain": [ + " log ratio_top_20_variants \n", + "0 objective_1 0.0 \\\n", + "1 objective_2 0.0 \n", + "2 objective_3 0.0 \n", + "3 objective_4 0.0 \n", + "4 objective_5 0.0 \n", + "5 objective_6 0.0 \n", + "6 objective_7 0.2 \n", + "7 objective_8 0.2 \n", + "8 objective_9 0.2 \n", + "9 objective_10 0.2 \n", + "10 objective_11 0.2 \n", + "11 objective_12 0.2 \n", + "12 objective_13 0.4 \n", + "13 objective_14 0.4 \n", + "14 objective_15 0.4 \n", + "15 objective_16 0.4 \n", + "16 objective_17 0.4 \n", + "17 objective_18 0.4 \n", + "18 objective_19 0.6 \n", + "19 objective_20 0.6 \n", + "20 objective_21 0.6 \n", + "21 objective_22 0.6 \n", + "22 objective_23 0.6 \n", + "23 objective_24 0.6 \n", + "24 objective_25 0.8 \n", + "25 objective_26 0.8 \n", + "26 objective_27 0.8 \n", + "27 objective_28 0.8 \n", + "28 objective_29 0.8 \n", + "29 objective_30 0.8 \n", + "30 objective_31 1.0 \n", + "31 objective_32 1.0 \n", + "32 objective_33 1.0 \n", + "33 objective_34 1.0 \n", + "34 objective_35 1.0 \n", + "35 objective_36 1.0 \n", + "\n", + " normalized_sequence_entropy_linear_forgetting \n", + "0 0.0 \n", + "1 0.1 \n", + "2 0.2 \n", + "3 0.3 \n", + "4 0.4 \n", + "5 0.5 \n", + "6 0.0 \n", + "7 0.1 \n", + "8 0.2 \n", + "9 0.3 \n", + "10 0.4 \n", + "11 0.5 \n", + "12 0.0 \n", + "13 0.1 \n", + "14 0.2 \n", + "15 0.3 \n", + "16 0.4 \n", + "17 0.5 \n", + "18 0.0 \n", + "19 0.1 \n", + "20 0.2 \n", + "21 0.3 \n", + "22 0.4 \n", + "23 0.5 \n", + "24 0.0 \n", + "25 0.1 \n", + "26 0.2 \n", + "27 0.3 \n", + "28 0.4 \n", + "29 0.5 \n", + "30 0.0 \n", + "31 0.1 \n", + "32 0.2 \n", + "33 0.3 \n", + "34 0.4 \n", + "35 0.5 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "id": "c12bc19d", + "metadata": {}, + "source": [ + "## Objectives from real logs\n", + "(Feature selection)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "39ac74bb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(34, 178)\n", + "34 Event-Logs: ['BPI2016_Complaints' 'BPI2016_Questions' 'BPI2016_Werkmap_Messages'\n", + " 'BPIC15_1' 'BPIC15_2' 'BPIC15_3' 'BPIC15_4' 'BPIC15_5'\n", + " 'BPI_Challenge_2012' 'BPI_Challenge_2013_closed_problems'\n", + " 'BPI_Challenge_2013_incidents' 'BPI_Challenge_2013_open_problems'\n", + " 'BPI_Challenge_2017' 'BPI_Challenge_2017_Offer_log' 'BPI_Challenge_2018'\n", + " 'BPI_Challenge_2019' 'CoSeLoG_WABO_1' 'CoSeLoG_WABO_2' 'CoSeLoG_WABO_3'\n", + " 'CoSeLoG_WABO_4' 'CoSeLoG_WABO_5' 'Detail_Change'\n", + " 'Detail_Incident_Activity' 'Detail_Interaction' 'DomesticDeclarations'\n", + " 'Hospital_log' 'InternationalDeclarations' 'PermitLog'\n", + " 'PrepaidTravelCost' 'Receipt_WABO_CoSeLoG' 'RequestForPayment'\n", + " 'Road_Traffic_Fine_Management_Process' 'Sepsis_Cases_Event_Log' 'finale']\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
logn_tracesn_unique_tracesratio_unique_traces_per_tracetrace_len_mintrace_len_maxtrace_len_meantrace_len_mediantrace_len_modetrace_len_stdtrace_len_variancetrace_len_q1trace_len_q3trace_len_iqrtrace_len_geometric_meantrace_len_geometric_stdtrace_len_harmonic_meantrace_len_skewnesstrace_len_kurtosistrace_len_coefficient_variationtrace_len_entropytrace_len_hist1trace_len_hist2trace_len_hist3trace_len_hist4trace_len_hist5trace_len_hist6trace_len_hist7trace_len_hist8trace_len_hist9trace_len_hist10trace_len_skewness_histtrace_len_kurtosis_histratio_most_common_variantratio_top_1_variantsratio_top_5_variantsratio_top_10_variantsratio_top_20_variantsratio_top_50_variantsratio_top_75_variantsmean_variant_occurrencestd_variant_occurrenceskewness_variant_occurrencekurtosis_variant_occurrencen_unique_activitiesactivities_minactivities_maxactivities_meanactivities_medianactivities_stdactivities_varianceactivities_q1activities_q3activities_iqractivities_skewnessactivities_kurtosisn_unique_start_activitiesstart_activities_minstart_activities_maxstart_activities_meanstart_activities_medianstart_activities_stdstart_activities_variancestart_activities_q1start_activities_q3start_activities_iqrstart_activities_skewnessstart_activities_kurtosisn_unique_end_activitiesend_activities_minend_activities_maxend_activities_meanend_activities_medianend_activities_stdend_activities_varianceend_activities_q1end_activities_q3end_activities_iqrend_activities_skewnessend_activities_kurtosisentropy_traceentropy_prefixentropy_global_blockentropy_lempel_ziventropy_k_block_diff_1entropy_k_block_diff_3entropy_k_block_diff_5entropy_k_block_ratio_1entropy_k_block_ratio_3entropy_k_block_ratio_5entropy_knn_3entropy_knn_5entropy_knn_7Log Natureepa_variant_entropyepa_normalized_variant_entropyepa_sequence_entropyepa_normalized_sequence_entropyepa_sequence_entropy_linear_forgettingepa_normalized_sequence_entropy_linear_forgettingepa_sequence_entropy_exponential_forgettingepa_normalized_sequence_entropy_exponential_forgettingaccumulated_time_time_minaccumulated_time_time_maxaccumulated_time_time_meanaccumulated_time_time_medianaccumulated_time_time_modeaccumulated_time_time_stdaccumulated_time_time_varianceaccumulated_time_time_q1accumulated_time_time_q3accumulated_time_time_iqraccumulated_time_time_geometric_meanaccumulated_time_time_geometric_stdaccumulated_time_time_harmonic_meanaccumulated_time_time_skewnessaccumulated_time_time_kurtosisaccumulated_time_time_coefficient_variationaccumulated_time_time_entropyaccumulated_time_time_skewness_histaccumulated_time_time_kurtosis_histexecution_time_time_minexecution_time_time_maxexecution_time_time_meanexecution_time_time_medianexecution_time_time_modeexecution_time_time_stdexecution_time_time_varianceexecution_time_time_q1execution_time_time_q3execution_time_time_iqrexecution_time_time_geometric_meanexecution_time_time_geometric_stdexecution_time_time_harmonic_meanexecution_time_time_skewnessexecution_time_time_kurtosisexecution_time_time_coefficient_variationexecution_time_time_entropyexecution_time_time_skewness_histexecution_time_time_kurtosis_histremaining_time_time_minremaining_time_time_maxremaining_time_time_meanremaining_time_time_medianremaining_time_time_moderemaining_time_time_stdremaining_time_time_varianceremaining_time_time_q1remaining_time_time_q3remaining_time_time_iqrremaining_time_time_geometric_meanremaining_time_time_geometric_stdremaining_time_time_harmonic_meanremaining_time_time_skewnessremaining_time_time_kurtosisremaining_time_time_coefficient_variationremaining_time_time_entropyremaining_time_time_skewness_histremaining_time_time_kurtosis_histwithin_day_time_minwithin_day_time_maxwithin_day_time_meanwithin_day_time_medianwithin_day_time_modewithin_day_time_stdwithin_day_time_variancewithin_day_time_q1within_day_time_q3within_day_time_iqrwithin_day_time_geometric_meanwithin_day_time_geometric_stdwithin_day_time_harmonic_meanwithin_day_time_skewnesswithin_day_time_kurtosiswithin_day_time_coefficient_variationwithin_day_time_entropywithin_day_time_skewness_histwithin_day_time_kurtosis_hist
0BPIC15_28328280.995192113253.31009654.06119.894977395.81009044.062.018.048.1501111.69531137.5837410.0541380.8049920.3731936.6467150.0038530.0048634.679243e-030.0239472.376321e-028.257487e-030.0047711.376248e-036.422490e-041.834997e-040.0541380.8049920.0024040.0144230.0540870.1033650.2031250.5024040.7512021.0048310.06933714.283027202.0048544101830108.18048812.0187.5881623.518932e+043.0125.5122.52.1294123.80827814173159.4285711.0186.7174013.486339e+041.08.257.253.3004118.96076782121610.1463411.035.3188001.247418e+031.003.002.005.09879125.8619919.69114.52419.4483.8597.1057.1057.1057.1057.1057.1055.5455.0394.721Real2.405122e+050.6279732.858769e+050.6023711.505466e+050.3172171.853129e+050.390473NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1BPI_Challenge_201843809284570.64957024297357.39154149.04934.8721311216.06548744.059.015.053.7750081.36739751.65150226.1264591720.3996650.60761810.5987580.0033850.0000059.288448e-070.0000000.000000e+000.000000e+000.0000000.000000e+007.740373e-087.740373e-0826.1264591720.3996650.0269810.2903740.3730060.4153710.4803350.6752040.8375901.53948112.48743864.6256805083.455806411746614161323.5609767530.0120522.2474171.452561e+10902.045907.045005.02.4440074.773254423862310952.2500002592.016111.4075482.595775e+0836.513507.7513471.251.098736-0.714800211348302086.14285713.07431.7449815.523083e+072.00193.00191.004.06238714.95282413.19116.27220.9721.023-0.0101.8550.5111.4033.5722.0017.8497.3717.067Real1.156384e+070.7120792.114626e+070.5706881.414023e+070.3816121.557608e+070.420362NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2Receipt_WABO_CoSeLoG14341160.0808931255.9811726.062.1661294.6921146.06.00.05.4147081.7049654.3564451.27652512.2960060.3621587.1971930.0360300.0081363.411204e-010.0235363.777313e-031.743375e-030.0002911.452813e-030.000000e+005.811251e-041.27652512.2960060.4972110.4972110.7963740.8870290.9302650.9595540.97977712.36206968.3602779.38068792.2819192711434317.66666727.0553.3898233.062403e+058.050.042.01.342951-0.1780941143414341434.0000001434.00.0000000.000000e+001434.01434.000.00NaNNaN141828102.4285716.0225.8715555.101796e+041.2533.2532.002.4717654.8465413.2094.7467.0190.3852.6722.9660.8041.4842.9662.9663.2602.8452.584Real2.382326e+030.6893631.829627e+040.2355327.814868e+030.1006031.072870e+040.138113NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
3BPIC15_3140913490.957417312442.35699142.04416.138406260.44814338.047.09.037.6377311.78672629.092933-0.0095411.5433690.3810097.1671530.0069210.0043401.630604e-020.0369531.173096e-024.105837e-030.0015845.278933e-041.173096e-045.865481e-05-0.0095411.5433690.0106460.0496810.0901350.1376860.2334990.5209370.7601141.0444770.59234817.964130358.01951138311409155.82506516.0306.3105449.382615e+045.0108.5103.52.4463495.280931911348156.5555568.0421.2708581.774691e+053.014.0011.002.4741584.122971119134211.8403362.039.5572101.564773e+031.007.006.006.21721743.33552510.31714.22618.7433.182-0.0076.7806.7806.7806.7806.7805.7015.2124.900Real2.981464e+050.6617813.975043e+050.6056762.241393e+050.3415212.657571e+050.404934NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
4BPI_Challenge_2019251734119730.04756219906.3397205.0513.057417170.4961375.06.01.05.1735691.6358224.59284422.132989753.7722022.05962112.0440570.0100780.0000209.559579e-060.0000033.614967e-071.606652e-070.0000004.016630e-088.033260e-088.033260e-0822.132989753.7722020.1997580.8714240.9299900.9463680.9597670.9762170.98810621.025140594.25561964.7727024917.31975142231409737998.1666671628.080833.6692066.534082e+09202.011536.011334.02.1696483.2635948219986731466.750000869.065387.4932864.275524e+0997.014224.2514127.252.0597422.5357893211813287866.68750064.531658.4289961.002256e+099.001027.251018.255.13560725.1705436.2438.81119.4470.346-0.0411.5300.8400.6203.2441.9137.3336.8826.601Real1.690369e+060.6455307.477256e+060.3280297.298458e+060.3201857.300663e+060.320282NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", + "
" + ], + "text/plain": [ + " log n_traces n_unique_traces \n", + "0 BPIC15_2 832 828 \\\n", + "1 BPI_Challenge_2018 43809 28457 \n", + "2 Receipt_WABO_CoSeLoG 1434 116 \n", + "3 BPIC15_3 1409 1349 \n", + "4 BPI_Challenge_2019 251734 11973 \n", + "\n", + " ratio_unique_traces_per_trace trace_len_min trace_len_max \n", + "0 0.995192 1 132 \\\n", + "1 0.649570 24 2973 \n", + "2 0.080893 1 25 \n", + "3 0.957417 3 124 \n", + "4 0.047562 1 990 \n", + "\n", + " trace_len_mean trace_len_median trace_len_mode trace_len_std \n", + "0 53.310096 54.0 61 19.894977 \\\n", + "1 57.391541 49.0 49 34.872131 \n", + "2 5.981172 6.0 6 2.166129 \n", + "3 42.356991 42.0 44 16.138406 \n", + "4 6.339720 5.0 5 13.057417 \n", + "\n", + " trace_len_variance trace_len_q1 trace_len_q3 trace_len_iqr \n", + "0 395.810090 44.0 62.0 18.0 \\\n", + "1 1216.065487 44.0 59.0 15.0 \n", + "2 4.692114 6.0 6.0 0.0 \n", + "3 260.448143 38.0 47.0 9.0 \n", + "4 170.496137 5.0 6.0 1.0 \n", + "\n", + " trace_len_geometric_mean trace_len_geometric_std trace_len_harmonic_mean \n", + "0 48.150111 1.695311 37.583741 \\\n", + "1 53.775008 1.367397 51.651502 \n", + "2 5.414708 1.704965 4.356445 \n", + "3 37.637731 1.786726 29.092933 \n", + "4 5.173569 1.635822 4.592844 \n", + "\n", + " trace_len_skewness trace_len_kurtosis trace_len_coefficient_variation \n", + "0 0.054138 0.804992 0.373193 \\\n", + "1 26.126459 1720.399665 0.607618 \n", + "2 1.276525 12.296006 0.362158 \n", + "3 -0.009541 1.543369 0.381009 \n", + "4 22.132989 753.772202 2.059621 \n", + "\n", + " trace_len_entropy trace_len_hist1 trace_len_hist2 trace_len_hist3 \n", + "0 6.646715 0.003853 0.004863 4.679243e-03 \\\n", + "1 10.598758 0.003385 0.000005 9.288448e-07 \n", + "2 7.197193 0.036030 0.008136 3.411204e-01 \n", + "3 7.167153 0.006921 0.004340 1.630604e-02 \n", + "4 12.044057 0.010078 0.000020 9.559579e-06 \n", + "\n", + " trace_len_hist4 trace_len_hist5 trace_len_hist6 trace_len_hist7 \n", + "0 0.023947 2.376321e-02 8.257487e-03 0.004771 \\\n", + "1 0.000000 0.000000e+00 0.000000e+00 0.000000 \n", + "2 0.023536 3.777313e-03 1.743375e-03 0.000291 \n", + "3 0.036953 1.173096e-02 4.105837e-03 0.001584 \n", + "4 0.000003 3.614967e-07 1.606652e-07 0.000000 \n", + "\n", + " trace_len_hist8 trace_len_hist9 trace_len_hist10 \n", + "0 1.376248e-03 6.422490e-04 1.834997e-04 \\\n", + "1 0.000000e+00 7.740373e-08 7.740373e-08 \n", + "2 1.452813e-03 0.000000e+00 5.811251e-04 \n", + "3 5.278933e-04 1.173096e-04 5.865481e-05 \n", + "4 4.016630e-08 8.033260e-08 8.033260e-08 \n", + "\n", + " trace_len_skewness_hist trace_len_kurtosis_hist \n", + "0 0.054138 0.804992 \\\n", + "1 26.126459 1720.399665 \n", + "2 1.276525 12.296006 \n", + "3 -0.009541 1.543369 \n", + "4 22.132989 753.772202 \n", + "\n", + " ratio_most_common_variant ratio_top_1_variants ratio_top_5_variants \n", + "0 0.002404 0.014423 0.054087 \\\n", + "1 0.026981 0.290374 0.373006 \n", + "2 0.497211 0.497211 0.796374 \n", + "3 0.010646 0.049681 0.090135 \n", + "4 0.199758 0.871424 0.929990 \n", + "\n", + " ratio_top_10_variants ratio_top_20_variants ratio_top_50_variants \n", + "0 0.103365 0.203125 0.502404 \\\n", + "1 0.415371 0.480335 0.675204 \n", + "2 0.887029 0.930265 0.959554 \n", + "3 0.137686 0.233499 0.520937 \n", + "4 0.946368 0.959767 0.976217 \n", + "\n", + " ratio_top_75_variants mean_variant_occurrence std_variant_occurrence \n", + "0 0.751202 1.004831 0.069337 \\\n", + "1 0.837590 1.539481 12.487438 \n", + "2 0.979777 12.362069 68.360277 \n", + "3 0.760114 1.044477 0.592348 \n", + "4 0.988106 21.025140 594.255619 \n", + "\n", + " skewness_variant_occurrence kurtosis_variant_occurrence \n", + "0 14.283027 202.004854 \\\n", + "1 64.625680 5083.455806 \n", + "2 9.380687 92.281919 \n", + "3 17.964130 358.019511 \n", + "4 64.772702 4917.319751 \n", + "\n", + " n_unique_activities activities_min activities_max activities_mean \n", + "0 410 1 830 108.180488 \\\n", + "1 41 17 466141 61323.560976 \n", + "2 27 1 1434 317.666667 \n", + "3 383 1 1409 155.825065 \n", + "4 42 2 314097 37998.166667 \n", + "\n", + " activities_median activities_std activities_variance activities_q1 \n", + "0 12.0 187.588162 3.518932e+04 3.0 \\\n", + "1 7530.0 120522.247417 1.452561e+10 902.0 \n", + "2 27.0 553.389823 3.062403e+05 8.0 \n", + "3 16.0 306.310544 9.382615e+04 5.0 \n", + "4 1628.0 80833.669206 6.534082e+09 202.0 \n", + "\n", + " activities_q3 activities_iqr activities_skewness activities_kurtosis \n", + "0 125.5 122.5 2.129412 3.808278 \\\n", + "1 45907.0 45005.0 2.444007 4.773254 \n", + "2 50.0 42.0 1.342951 -0.178094 \n", + "3 108.5 103.5 2.446349 5.280931 \n", + "4 11536.0 11334.0 2.169648 3.263594 \n", + "\n", + " n_unique_start_activities start_activities_min start_activities_max \n", + "0 14 1 731 \\\n", + "1 4 2 38623 \n", + "2 1 1434 1434 \n", + "3 9 1 1348 \n", + "4 8 2 199867 \n", + "\n", + " start_activities_mean start_activities_median start_activities_std \n", + "0 59.428571 1.0 186.717401 \\\n", + "1 10952.250000 2592.0 16111.407548 \n", + "2 1434.000000 1434.0 0.000000 \n", + "3 156.555556 8.0 421.270858 \n", + "4 31466.750000 869.0 65387.493286 \n", + "\n", + " start_activities_variance start_activities_q1 start_activities_q3 \n", + "0 3.486339e+04 1.0 8.25 \\\n", + "1 2.595775e+08 36.5 13507.75 \n", + "2 0.000000e+00 1434.0 1434.00 \n", + "3 1.774691e+05 3.0 14.00 \n", + "4 4.275524e+09 97.0 14224.25 \n", + "\n", + " start_activities_iqr start_activities_skewness start_activities_kurtosis \n", + "0 7.25 3.300411 8.960767 \\\n", + "1 13471.25 1.098736 -0.714800 \n", + "2 0.00 NaN NaN \n", + "3 11.00 2.474158 4.122971 \n", + "4 14127.25 2.059742 2.535789 \n", + "\n", + " n_unique_end_activities end_activities_min end_activities_max \n", + "0 82 1 216 \\\n", + "1 21 1 34830 \n", + "2 14 1 828 \n", + "3 119 1 342 \n", + "4 32 1 181328 \n", + "\n", + " end_activities_mean end_activities_median end_activities_std \n", + "0 10.146341 1.0 35.318800 \\\n", + "1 2086.142857 13.0 7431.744981 \n", + "2 102.428571 6.0 225.871555 \n", + "3 11.840336 2.0 39.557210 \n", + "4 7866.687500 64.5 31658.428996 \n", + "\n", + " end_activities_variance end_activities_q1 end_activities_q3 \n", + "0 1.247418e+03 1.00 3.00 \\\n", + "1 5.523083e+07 2.00 193.00 \n", + "2 5.101796e+04 1.25 33.25 \n", + "3 1.564773e+03 1.00 7.00 \n", + "4 1.002256e+09 9.00 1027.25 \n", + "\n", + " end_activities_iqr end_activities_skewness end_activities_kurtosis \n", + "0 2.00 5.098791 25.861991 \\\n", + "1 191.00 4.062387 14.952824 \n", + "2 32.00 2.471765 4.846541 \n", + "3 6.00 6.217217 43.335525 \n", + "4 1018.25 5.135607 25.170543 \n", + "\n", + " entropy_trace entropy_prefix entropy_global_block entropy_lempel_ziv \n", + "0 9.691 14.524 19.448 3.859 \\\n", + "1 13.191 16.272 20.972 1.023 \n", + "2 3.209 4.746 7.019 0.385 \n", + "3 10.317 14.226 18.743 3.182 \n", + "4 6.243 8.811 19.447 0.346 \n", + "\n", + " entropy_k_block_diff_1 entropy_k_block_diff_3 entropy_k_block_diff_5 \n", + "0 7.105 7.105 7.105 \\\n", + "1 -0.010 1.855 0.511 \n", + "2 2.672 2.966 0.804 \n", + "3 -0.007 6.780 6.780 \n", + "4 -0.041 1.530 0.840 \n", + "\n", + " entropy_k_block_ratio_1 entropy_k_block_ratio_3 entropy_k_block_ratio_5 \n", + "0 7.105 7.105 7.105 \\\n", + "1 1.403 3.572 2.001 \n", + "2 1.484 2.966 2.966 \n", + "3 6.780 6.780 6.780 \n", + "4 0.620 3.244 1.913 \n", + "\n", + " entropy_knn_3 entropy_knn_5 entropy_knn_7 Log Nature \n", + "0 5.545 5.039 4.721 Real \\\n", + "1 7.849 7.371 7.067 Real \n", + "2 3.260 2.845 2.584 Real \n", + "3 5.701 5.212 4.900 Real \n", + "4 7.333 6.882 6.601 Real \n", + "\n", + " epa_variant_entropy epa_normalized_variant_entropy epa_sequence_entropy \n", + "0 2.405122e+05 0.627973 2.858769e+05 \\\n", + "1 1.156384e+07 0.712079 2.114626e+07 \n", + "2 2.382326e+03 0.689363 1.829627e+04 \n", + "3 2.981464e+05 0.661781 3.975043e+05 \n", + "4 1.690369e+06 0.645530 7.477256e+06 \n", + "\n", + " epa_normalized_sequence_entropy epa_sequence_entropy_linear_forgetting \n", + "0 0.602371 1.505466e+05 \\\n", + "1 0.570688 1.414023e+07 \n", + "2 0.235532 7.814868e+03 \n", + "3 0.605676 2.241393e+05 \n", + "4 0.328029 7.298458e+06 \n", + "\n", + " epa_normalized_sequence_entropy_linear_forgetting \n", + "0 0.317217 \\\n", + "1 0.381612 \n", + "2 0.100603 \n", + "3 0.341521 \n", + "4 0.320185 \n", + "\n", + " epa_sequence_entropy_exponential_forgetting \n", + "0 1.853129e+05 \\\n", + "1 1.557608e+07 \n", + "2 1.072870e+04 \n", + "3 2.657571e+05 \n", + "4 7.300663e+06 \n", + "\n", + " epa_normalized_sequence_entropy_exponential_forgetting \n", + "0 0.390473 \\\n", + "1 0.420362 \n", + "2 0.138113 \n", + "3 0.404934 \n", + "4 0.320282 \n", + "\n", + " accumulated_time_time_min accumulated_time_time_max \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " accumulated_time_time_mean accumulated_time_time_median \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " accumulated_time_time_mode accumulated_time_time_std \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " accumulated_time_time_variance accumulated_time_time_q1 \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " accumulated_time_time_q3 accumulated_time_time_iqr \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " accumulated_time_time_geometric_mean accumulated_time_time_geometric_std \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " accumulated_time_time_harmonic_mean accumulated_time_time_skewness \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " accumulated_time_time_kurtosis \n", + "0 NaN \\\n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN \n", + "\n", + " accumulated_time_time_coefficient_variation accumulated_time_time_entropy \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " accumulated_time_time_skewness_hist accumulated_time_time_kurtosis_hist \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " execution_time_time_min execution_time_time_max execution_time_time_mean \n", + "0 NaN NaN NaN \\\n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "\n", + " execution_time_time_median execution_time_time_mode \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " execution_time_time_std execution_time_time_variance \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " execution_time_time_q1 execution_time_time_q3 execution_time_time_iqr \n", + "0 NaN NaN NaN \\\n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "\n", + " execution_time_time_geometric_mean execution_time_time_geometric_std \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " execution_time_time_harmonic_mean execution_time_time_skewness \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " execution_time_time_kurtosis execution_time_time_coefficient_variation \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " execution_time_time_entropy execution_time_time_skewness_hist \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " execution_time_time_kurtosis_hist remaining_time_time_min \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " remaining_time_time_max remaining_time_time_mean \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " remaining_time_time_median remaining_time_time_mode \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " remaining_time_time_std remaining_time_time_variance \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " remaining_time_time_q1 remaining_time_time_q3 remaining_time_time_iqr \n", + "0 NaN NaN NaN \\\n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "\n", + " remaining_time_time_geometric_mean remaining_time_time_geometric_std \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " remaining_time_time_harmonic_mean remaining_time_time_skewness \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " remaining_time_time_kurtosis remaining_time_time_coefficient_variation \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " remaining_time_time_entropy remaining_time_time_skewness_hist \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " remaining_time_time_kurtosis_hist within_day_time_min \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " within_day_time_max within_day_time_mean within_day_time_median \n", + "0 NaN NaN NaN \\\n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "\n", + " within_day_time_mode within_day_time_std within_day_time_variance \n", + "0 NaN NaN NaN \\\n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "\n", + " within_day_time_q1 within_day_time_q3 within_day_time_iqr \n", + "0 NaN NaN NaN \\\n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "\n", + " within_day_time_geometric_mean within_day_time_geometric_std \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " within_day_time_harmonic_mean within_day_time_skewness \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " within_day_time_kurtosis within_day_time_coefficient_variation \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " within_day_time_entropy within_day_time_skewness_hist \n", + "0 NaN NaN \\\n", + "1 NaN NaN \n", + "2 NaN NaN \n", + "3 NaN NaN \n", + "4 NaN NaN \n", + "\n", + " within_day_time_kurtosis_hist \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n", + "#bpic_features = pd.read_csv(\"../tag/output/features/real_event_logs.csv\", index_col=None)\n", + "\n", + "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n", + "print(bpic_features.shape)\n", + "print(len(bpic_features), \" Event-Logs: \", bpic_features.sort_values('log')['log'].unique())\n", + "\n", + "#bpic_features.rename(columns={\"variant_entropy\":\"epa_variant_entropy\", \"normalized_variant_entropy\":\"epa_normalized_variant_entropy\", \"sequence_entropy\":\"epa_sequence_entropy\", \"normalized_sequence_entropy\":\"epa_normalized_sequence_entropy\", \"sequence_entropy_linear_forgetting\":\"epa_sequence_entropy_linear_forgetting\", \"normalized_sequence_entropy_linear_forgetting\":\"epa_normalized_sequence_entropy_linear_forgetting\", \"sequence_entropy_exponential_forgetting\":\"epa_sequence_entropy_exponential_forgetting\", \"normalized_sequence_entropy_exponential_forgetting\":\"epa_normalized_sequence_entropy_exponential_forgetting\"},\n", + "# errors=\"raise\", inplace=True)\n", + "\n", + "bpic_features.head()\n", + "#bpic_features.to_csv(\"../data/34_bpic_features.csv\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "ef0df0b9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['ratio_unique_traces_per_trace', 'ratio_most_common_variant', 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
logratio_unique_traces_per_traceratio_most_common_variantratio_top_10_variantsepa_normalized_variant_entropyepa_normalized_sequence_entropyepa_normalized_sequence_entropy_linear_forgettingepa_normalized_sequence_entropy_exponential_forgetting
0BPIC15_20.9951920.0024040.1033650.6279730.6023710.3172170.390473
1BPI_Challenge_20180.6495700.0269810.4153710.7120790.5706880.3816120.420362
2Receipt_WABO_CoSeLoG0.0808930.4972110.8870290.6893630.2355320.1006030.138113
3BPIC15_30.9574170.0106460.1376860.6617810.6056760.3415210.404934
4BPI_Challenge_20190.0475620.1997580.9463680.6455300.3280290.3201850.320282
5RequestForPayment0.0129250.4372640.9334880.7037350.1890480.0975720.118744
6PrepaidTravelCost0.0962360.2710810.8227730.7237850.3170440.1848790.214387
7DomesticDeclarations0.0094290.4398100.9500950.6964740.1647580.0854390.104389
8BPIC15_40.9962010.0028490.1025640.6529850.6038660.3559270.412835
9BPI_Challenge_20120.3336140.2620160.6862540.7082800.4230740.2261330.275551
10Hospital_log0.8582680.0358710.2274720.5174430.5130320.2678250.331672
11BPIC15_50.9974050.0017300.1020760.6487020.6032600.3424100.404580
12CoSeLoG_WABO_20.9984500.0031010.1007750.6184550.5940350.3232330.389858
13Road_Traffic_Fine_Management_Process0.0015360.3756200.9931040.7693530.1119320.0525860.068442
14BPI_Challenge_2017_Offer_log0.0003720.3806260.3806260.8134790.1051300.0526720.066000
15Sepsis_Cases_Event_Log0.8057140.0333330.2742860.6957590.5223430.2193650.299505
16CoSeLoG_WABO_30.9494020.0119600.1453540.6542960.5963670.2781210.356439
17BPI_Challenge_2013_closed_problems0.1230670.3315400.8406190.7053830.3109400.2865150.288383
18BPI_Challenge_2013_incidents0.2000260.2321950.7944140.7178460.4046510.3910970.391625
19PermitLog0.2092000.1353150.7575370.7336530.4201500.1372870.215490
20BPIC15_10.9758130.0066720.1217680.6528550.6102940.2702410.363928
21InternationalDeclarations0.1167620.2122810.8112890.7582680.3393800.1456110.193753
22BPI_Challenge_20170.5055700.0335140.5313400.7417060.4615650.2319220.290464
23BPI2016_Complaints0.4380530.1017700.4247790.8994970.6837960.4046850.470116
24BPI2016_Questions0.7974270.0156500.2823110.8134680.7561320.4249100.506118
25BPI2016_Werkmap_Messages0.0028820.2958030.7141060.0000000.0000000.0000000.000000
26BPI_Challenge_2013_open_problems0.1318680.2173380.7692310.7029600.2767710.2620940.263029
27CoSeLoG_WABO_10.9775880.0096050.1195300.6466970.6015660.2928240.376276
28CoSeLoG_WABO_40.9923760.0025410.1067340.6443990.5971090.3739200.422526
29CoSeLoG_WABO_50.9854260.0044840.1121080.6426680.5924540.3468320.401731
30Detail_Change0.0484440.0749440.765056NaNNaNNaNNaN
31Detail_Incident_Activity0.4968470.0374550.552836NaNNaNNaNNaN
32Detail_Interaction0.0000410.7870810.000000NaNNaNNaNNaN
33finale0.0493450.5165940.9063320.7991200.2540660.1184780.154576
\n", + "
" + ], + "text/plain": [ + " log ratio_unique_traces_per_trace \n", + "0 BPIC15_2 0.995192 \\\n", + "1 BPI_Challenge_2018 0.649570 \n", + "2 Receipt_WABO_CoSeLoG 0.080893 \n", + "3 BPIC15_3 0.957417 \n", + "4 BPI_Challenge_2019 0.047562 \n", + "5 RequestForPayment 0.012925 \n", + "6 PrepaidTravelCost 0.096236 \n", + "7 DomesticDeclarations 0.009429 \n", + "8 BPIC15_4 0.996201 \n", + "9 BPI_Challenge_2012 0.333614 \n", + "10 Hospital_log 0.858268 \n", + "11 BPIC15_5 0.997405 \n", + "12 CoSeLoG_WABO_2 0.998450 \n", + "13 Road_Traffic_Fine_Management_Process 0.001536 \n", + "14 BPI_Challenge_2017_Offer_log 0.000372 \n", + "15 Sepsis_Cases_Event_Log 0.805714 \n", + "16 CoSeLoG_WABO_3 0.949402 \n", + "17 BPI_Challenge_2013_closed_problems 0.123067 \n", + "18 BPI_Challenge_2013_incidents 0.200026 \n", + "19 PermitLog 0.209200 \n", + "20 BPIC15_1 0.975813 \n", + "21 InternationalDeclarations 0.116762 \n", + "22 BPI_Challenge_2017 0.505570 \n", + "23 BPI2016_Complaints 0.438053 \n", + "24 BPI2016_Questions 0.797427 \n", + "25 BPI2016_Werkmap_Messages 0.002882 \n", + "26 BPI_Challenge_2013_open_problems 0.131868 \n", + "27 CoSeLoG_WABO_1 0.977588 \n", + "28 CoSeLoG_WABO_4 0.992376 \n", + "29 CoSeLoG_WABO_5 0.985426 \n", + "30 Detail_Change 0.048444 \n", + "31 Detail_Incident_Activity 0.496847 \n", + "32 Detail_Interaction 0.000041 \n", + "33 finale 0.049345 \n", + "\n", + " ratio_most_common_variant ratio_top_10_variants \n", + "0 0.002404 0.103365 \\\n", + "1 0.026981 0.415371 \n", + "2 0.497211 0.887029 \n", + "3 0.010646 0.137686 \n", + "4 0.199758 0.946368 \n", + "5 0.437264 0.933488 \n", + "6 0.271081 0.822773 \n", + "7 0.439810 0.950095 \n", + "8 0.002849 0.102564 \n", + "9 0.262016 0.686254 \n", + "10 0.035871 0.227472 \n", + "11 0.001730 0.102076 \n", + "12 0.003101 0.100775 \n", + "13 0.375620 0.993104 \n", + "14 0.380626 0.380626 \n", + "15 0.033333 0.274286 \n", + "16 0.011960 0.145354 \n", + "17 0.331540 0.840619 \n", + "18 0.232195 0.794414 \n", + "19 0.135315 0.757537 \n", + "20 0.006672 0.121768 \n", + "21 0.212281 0.811289 \n", + "22 0.033514 0.531340 \n", + "23 0.101770 0.424779 \n", + "24 0.015650 0.282311 \n", + "25 0.295803 0.714106 \n", + "26 0.217338 0.769231 \n", + "27 0.009605 0.119530 \n", + "28 0.002541 0.106734 \n", + "29 0.004484 0.112108 \n", + "30 0.074944 0.765056 \n", + "31 0.037455 0.552836 \n", + "32 0.787081 0.000000 \n", + "33 0.516594 0.906332 \n", + "\n", + " epa_normalized_variant_entropy epa_normalized_sequence_entropy \n", + "0 0.627973 0.602371 \\\n", + "1 0.712079 0.570688 \n", + "2 0.689363 0.235532 \n", + "3 0.661781 0.605676 \n", + "4 0.645530 0.328029 \n", + "5 0.703735 0.189048 \n", + "6 0.723785 0.317044 \n", + "7 0.696474 0.164758 \n", + "8 0.652985 0.603866 \n", + "9 0.708280 0.423074 \n", + "10 0.517443 0.513032 \n", + "11 0.648702 0.603260 \n", + "12 0.618455 0.594035 \n", + "13 0.769353 0.111932 \n", + "14 0.813479 0.105130 \n", + "15 0.695759 0.522343 \n", + "16 0.654296 0.596367 \n", + "17 0.705383 0.310940 \n", + "18 0.717846 0.404651 \n", + "19 0.733653 0.420150 \n", + "20 0.652855 0.610294 \n", + "21 0.758268 0.339380 \n", + "22 0.741706 0.461565 \n", + "23 0.899497 0.683796 \n", + "24 0.813468 0.756132 \n", + "25 0.000000 0.000000 \n", + "26 0.702960 0.276771 \n", + "27 0.646697 0.601566 \n", + "28 0.644399 0.597109 \n", + "29 0.642668 0.592454 \n", + "30 NaN NaN \n", + "31 NaN NaN \n", + "32 NaN NaN \n", + "33 0.799120 0.254066 \n", + "\n", + " epa_normalized_sequence_entropy_linear_forgetting \n", + "0 0.317217 \\\n", + "1 0.381612 \n", + "2 0.100603 \n", + "3 0.341521 \n", + "4 0.320185 \n", + "5 0.097572 \n", + "6 0.184879 \n", + "7 0.085439 \n", + "8 0.355927 \n", + "9 0.226133 \n", + "10 0.267825 \n", + "11 0.342410 \n", + "12 0.323233 \n", + "13 0.052586 \n", + "14 0.052672 \n", + "15 0.219365 \n", + "16 0.278121 \n", + "17 0.286515 \n", + "18 0.391097 \n", + "19 0.137287 \n", + "20 0.270241 \n", + "21 0.145611 \n", + "22 0.231922 \n", + "23 0.404685 \n", + "24 0.424910 \n", + "25 0.000000 \n", + "26 0.262094 \n", + "27 0.292824 \n", + "28 0.373920 \n", + "29 0.346832 \n", + "30 NaN \n", + "31 NaN \n", + "32 NaN \n", + "33 0.118478 \n", + "\n", + " epa_normalized_sequence_entropy_exponential_forgetting \n", + "0 0.390473 \n", + "1 0.420362 \n", + "2 0.138113 \n", + "3 0.404934 \n", + "4 0.320282 \n", + "5 0.118744 \n", + "6 0.214387 \n", + "7 0.104389 \n", + "8 0.412835 \n", + "9 0.275551 \n", + "10 0.331672 \n", + "11 0.404580 \n", + "12 0.389858 \n", + "13 0.068442 \n", + "14 0.066000 \n", + "15 0.299505 \n", + "16 0.356439 \n", + "17 0.288383 \n", + "18 0.391625 \n", + "19 0.215490 \n", + "20 0.363928 \n", + "21 0.193753 \n", + "22 0.290464 \n", + "23 0.470116 \n", + "24 0.506118 \n", + "25 0.000000 \n", + "26 0.263029 \n", + "27 0.376276 \n", + "28 0.422526 \n", + "29 0.401731 \n", + "30 NaN \n", + "31 NaN \n", + "32 NaN \n", + "33 0.154576 " + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bpic_stats = bpic_features.describe().transpose()\n", + "normalized_feature_names = bpic_stats[(bpic_stats['min']>=0)&(bpic_stats['max']<=1)].index.to_list() \n", + "normalized_feature_names = ['ratio_unique_traces_per_trace', 'ratio_most_common_variant', \n", + " 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n", + " 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n", + "print(normalized_feature_names)\n", + "bpic_features[['log']+normalized_feature_names]" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "44909860", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "21\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_rt10v.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_rmcv.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_enself.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enve_rt10v.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_rt10v.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_enseef.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_rmcv.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_rmcv_rutpt.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_enve.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enve_rmcv.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_rmcv.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_rutpt.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_enve.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_rmcv_rt10v.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_rutpt.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_enself.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_rt10v.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_enve.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enve_rutpt.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_rt10v_rutpt.json\n", + "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_rutpt.json\n", + "None\n" + ] + } + ], + "source": [ + "#Features between 0 and 1: \n", + "def write_generator_bpic_experiment(objectives, n_para_obj=2):\n", + " parameters_o = \"objectives, \"\n", + " experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n", + " experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n", + " for exp in experiments:\n", + " experiment_path = os.path.join('..','data', '34_bpic_features')\n", + " os.makedirs(experiment_path, exist_ok=True)\n", + " experiment_path = os.path.join(experiment_path, f\"34bpic_{len(exp)}objectives_{abbrev_obj_keys(exp)}.csv\") \n", + "\n", + "\n", + " first_dir = os.path.split(experiment_path[3:])[-1].replace(\".csv\",\"\")\n", + " second_dir = first_dir.replace(\"grid_\",\"\").replace(\"objectives\",\"\")\n", + "\n", + " experiment = [\n", + " {\n", + " 'pipeline_step': 'event_logs_generation',\n", + " 'output_path':'output/generated',\n", + " 'generator_params': {\n", + " \"experiment\": {\"input_path\": \"data/34_bpic_features.csv\",\n", + " \"objectives\": exp},\n", + " 'config_space': {\n", + " 'mode': [5, 20],\n", + " 'sequence': [0.01, 1],\n", + " 'choice': [0.01, 1],\n", + " 'parallel': [0.01, 1],\n", + " 'loop': [0.01, 1],\n", + " 'silent': [0.01, 1],\n", + " 'lt_dependency': [0.01, 1],\n", + " 'num_traces': [10, 10001],\n", + " 'duplicate': [0],\n", + " 'or': [0]\n", + " },\n", + " 'n_trials': 200\n", + " }\n", + " },\n", + " {\n", + " 'pipeline_step': 'feature_extraction',\n", + " 'input_path': os.path.join('output', 'features', 'generated', '34_bpic_features', second_dir),\n", + " 'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n", + " 'output_path': 'output/plots',\n", + " 'real_eventlog_path': 'data/34_bpic_features.csv',\n", + " 'plot_type': 'boxplot'\n", + " }\n", + " ]\n", + "\n", + " output_path = os.path.join('..', 'config_files','algorithm','34_bpic_features')\n", + " os.makedirs(output_path, exist_ok=True)\n", + " output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n", + "\n", + " with open(output_path, 'w') as f:\n", + " json.dump(experiment, f, ensure_ascii=False)\n", + " print(f\"Saved experiment config in {output_path}\")\n", + " return experiment\n", + "\n", + "\n", + "def create_objectives_grid(objectives, n_para_obj=2):\n", + " parameters_o = \"objectives, \"\n", + " experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n", + " experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n", + " print(len(experiments))\n", + " \n", + " for exp in experiments:\n", + " write_generator_bpic_experiment(objectives=exp)\n", + " \n", + "exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=2) \n", + "print(exp_test)" + ] + }, + { + "cell_type": "markdown", + "id": "b07e9753", + "metadata": {}, + "source": [ + "## Single objective from real logs\n", + "(Feature selection)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d759a677", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "7 experiments: [('epa_normalized_sequence_entropy_exponential_forgetting',), ('epa_normalized_variant_entropy',), ('ratio_top_10_variants',), ('epa_normalized_sequence_entropy',), ('epa_normalized_sequence_entropy_linear_forgetting',), ('ratio_most_common_variant',), ('ratio_unique_traces_per_trace',)]\n", + "11\n", + "Saved experiment in ../data/grid_experiments/grid_1objectives_enseef.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enseef.json\n", + "Saved experiment in ../data/grid_experiments/grid_1objectives_enve.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enve.json\n", + "Saved experiment in ../data/grid_experiments/grid_1objectives_rt10v.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rt10v.json\n", + "Saved experiment in ../data/grid_experiments/grid_1objectives_ense.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_ense.json\n", + "Saved experiment in ../data/grid_experiments/grid_1objectives_enself.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enself.json\n", + "Saved experiment in ../data/grid_experiments/grid_1objectives_rmcv.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rmcv.json\n", + "Saved experiment in ../data/grid_experiments/grid_1objectives_rutpt.csv\n", + "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rutpt.json\n", + "None\n" + ] + } + ], + "source": [ + "def write_single_objective_experiment(experiment_path, objectives=[\"ratio_top_20_variants\", \"epa_normalized_sequence_entropy_linear_forgetting\"]):\n", + " first_dir = os.path.split(experiment_path[3:])[-1].replace(\".csv\",\"\")\n", + " second_dir = first_dir.replace(\"grid_\",\"\").replace(\"objectives\",\"\")\n", + "\n", + " experiment = [\n", + " {\n", + " 'pipeline_step': 'event_logs_generation',\n", + " 'output_path':os.path.join('output','generated', 'grid_1obj'),\n", + " 'generator_params': {\n", + " \"experiment\": {\"input_path\": experiment_path[3:],\n", + " \"objectives\": objectives},\n", + " 'config_space': {\n", + " 'mode': [5, 20],\n", + " 'sequence': [0.01, 1],\n", + " 'choice': [0.01, 1],\n", + " 'parallel': [0.01, 1],\n", + " 'loop': [0.01, 1],\n", + " 'silent': [0.01, 1],\n", + " 'lt_dependency': [0.01, 1],\n", + " 'num_traces': [10, 10001],\n", + " 'duplicate': [0],\n", + " 'or': [0]\n", + " },\n", + " 'n_trials': 200\n", + " }\n", + " },\n", + " {\n", + " 'pipeline_step': 'feature_extraction',\n", + " 'input_path': os.path.join('output','features', 'generated', 'grid_1obj', first_dir, second_dir),\n", + " 'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n", + " 'output_path': 'output/plots',\n", + " 'real_eventlog_path': 'data/34_bpic_features.csv',\n", + " 'plot_type': 'boxplot'\n", + " }\n", + " ]\n", + "\n", + " #print(\"EXPERIMENT:\", experiment)\n", + " output_path = os.path.join('..', 'config_files','algorithm','grid_experiments')\n", + " os.makedirs(output_path, exist_ok=True)\n", + " output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n", + " with open(output_path, 'w') as f:\n", + " json.dump(experiment, f, ensure_ascii=False)\n", + " print(f\"Saved experiment config in {output_path}\")\n", + " \n", + " return experiment\n", + "\n", + "def create_objectives_grid(objectives, n_para_obj=2):\n", + " parameters_o = \"objectives, \"\n", + " if n_para_obj==1:\n", + " experiments = [[exp] for exp in objectives]\n", + " else:\n", + " experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n", + " experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n", + " print(len(experiments), \"experiments: \", experiments)\n", + " \n", + " parameters = \"np.around(np.arange(0, 1.1,0.1),2), \"\n", + " tasks = eval(f\"list(itertools.product({(parameters*n_para_obj)[:-2]}))\")\n", + " tasks = [(f'task_{i+1}',)+task for i, task in enumerate(tasks)]\n", + " print(len(tasks))\n", + " for exp in experiments:\n", + " df = pd.DataFrame(data=tasks, columns=[\"task\", *exp])\n", + " experiment_path = os.path.join('..','data', 'grid_experiments')\n", + " os.makedirs(experiment_path, exist_ok=True)\n", + " experiment_path = os.path.join(experiment_path, f\"grid_{len(df.columns)-1}objectives_{abbrev_obj_keys(exp)}.csv\") \n", + " df.to_csv(experiment_path, index=False)\n", + " print(f\"Saved experiment in {experiment_path}\")\n", + " write_single_objective_experiment(experiment_path, objectives=exp)\n", + " #df.to_csv(f\"../data/grid_{}objectives_{abbrev_obj_keys(objectives.tolist())}.csv\" ,index=False)\n", + " \n", + "exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=1) \n", + "print(exp_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9886f44", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}