diff --git "a/notebooks/experiment_generator.ipynb" "b/notebooks/experiment_generator.ipynb"
new file mode 100644--- /dev/null
+++ "b/notebooks/experiment_generator.ipynb"
@@ -0,0 +1,3110 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "08ee6ee0",
+ "metadata": {},
+ "source": [
+ "## Grid Objectives\n",
+ "Iterating between min and max for each column\n",
+ "\n",
+ "### Glossary\n",
+ "- **task**: Refers to the set of values (row) and corresponding keys to be aimed at sequentially.\n",
+ "- **objective**: Refers to one key (column) and respective value to be aimed at simultaneously during a task.\n",
+ "- **experiment**: Refers to one file containing a multiple of objectives and tasks for a fixed number of each, respectively. "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "e5aa7223",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import itertools\n",
+ "import json\n",
+ "import numpy as np\n",
+ "import os\n",
+ "import pandas as pd"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "472fd031",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#Features between 0 and 1: \n",
+ "normalized_feature_names = ['ratio_unique_traces_per_trace', 'trace_len_hist1', 'trace_len_hist2',\n",
+ " 'trace_len_hist3', 'trace_len_hist4', 'trace_len_hist5', 'trace_len_hist7',\n",
+ " 'trace_len_hist8', 'trace_len_hist9', 'ratio_most_common_variant', \n",
+ " 'ratio_top_1_variants', 'ratio_top_5_variants', 'ratio_top_10_variants', \n",
+ " 'ratio_top_20_variants', 'ratio_top_50_variants', 'ratio_top_75_variants', \n",
+ " 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n",
+ " 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n",
+ "\n",
+ "normalized_feature_names = ['ratio_unique_traces_per_trace', 'ratio_most_common_variant', \n",
+ " 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n",
+ " 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n",
+ "\n",
+ "def abbrev_obj_keys(obj_keys):\n",
+ " abbreviated_keys = []\n",
+ " for obj_key in obj_keys:\n",
+ " key_slices = obj_key.split(\"_\")\n",
+ " chars = []\n",
+ " for key_slice in key_slices:\n",
+ " for idx, single_char in enumerate(key_slice):\n",
+ " if idx == 0 or single_char.isdigit():\n",
+ " chars.append(single_char)\n",
+ " abbreviated_key = ''.join(chars)\n",
+ " abbreviated_keys.append(abbreviated_key)\n",
+ " return '_'.join(abbreviated_keys) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "2be119c8",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "21 [('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_unique_traces_per_trace'), ('ratio_top_10_variants', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy', 'ratio_most_common_variant'), ('ratio_most_common_variant', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_most_common_variant'), ('epa_normalized_variant_entropy', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy', 'ratio_top_10_variants'), ('ratio_most_common_variant', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_most_common_variant'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_variant_entropy', 'ratio_unique_traces_per_trace'), ('epa_normalized_variant_entropy', 'ratio_most_common_variant'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_exponential_forgetting'), ('epa_normalized_sequence_entropy', 'ratio_unique_traces_per_trace')]\n",
+ "121\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rt10v.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rt10v.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rutpt.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rutpt.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_rt10v_rutpt.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rt10v_rutpt.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rmcv.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rmcv.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_rmcv_rt10v.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rt10v.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enself.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enself.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enve.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enve.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rmcv.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rmcv.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rt10v.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rt10v.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_enself.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enself.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_enve.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enve.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rutpt.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rutpt.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rt10v.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rt10v.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_rmcv_rutpt.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rutpt.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rmcv.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rmcv.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rt10v.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rt10v.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_enve.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_enve.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rutpt.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rutpt.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rmcv.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rmcv.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enseef.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enseef.json\n",
+ "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rutpt.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rutpt.json\n",
+ "None\n"
+ ]
+ }
+ ],
+ "source": [
+ "def write_generator_experiment(experiment_path, objectives=[\"ratio_top_20_variants\", \"epa_normalized_sequence_entropy_linear_forgetting\"]):\n",
+ " first_dir = os.path.split(experiment_path[3:])[-1].replace(\".csv\",\"\")\n",
+ " second_dir = first_dir.replace(\"grid_\",\"\").replace(\"objectives\",\"\")\n",
+ "\n",
+ " experiment = [\n",
+ " {\n",
+ " 'pipeline_step': 'event_logs_generation',\n",
+ " 'output_path':'output/generated/grid_2obj',\n",
+ " 'generator_params': {\n",
+ " \"experiment\": {\"input_path\": experiment_path[3:],\n",
+ " \"objectives\": objectives},\n",
+ " 'config_space': {\n",
+ " 'mode': [5, 20],\n",
+ " 'sequence': [0.01, 1],\n",
+ " 'choice': [0.01, 1],\n",
+ " 'parallel': [0.01, 1],\n",
+ " 'loop': [0.01, 1],\n",
+ " 'silent': [0.01, 1],\n",
+ " 'lt_dependency': [0.01, 1],\n",
+ " 'num_traces': [10, 10001],\n",
+ " 'duplicate': [0],\n",
+ " 'or': [0]\n",
+ " },\n",
+ " 'n_trials': 200\n",
+ " }\n",
+ " },\n",
+ " {\n",
+ " 'pipeline_step': 'feature_extraction',\n",
+ " 'input_path': os.path.join('output','features', 'generated', 'grid_2obj', first_dir, second_dir),\n",
+ " 'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n",
+ " 'output_path': 'output/plots',\n",
+ " 'real_eventlog_path': 'data/34_bpic_features.csv',\n",
+ " 'plot_type': 'boxplot'\n",
+ " }\n",
+ " ]\n",
+ "\n",
+ " #print(\"EXPERIMENT:\", experiment[1]['input_path'])\n",
+ " output_path = os.path.join('..', 'config_files','algorithm','grid_2obj')\n",
+ " os.makedirs(output_path, exist_ok=True)\n",
+ " output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n",
+ " with open(output_path, 'w') as f:\n",
+ " json.dump(experiment, f, ensure_ascii=False)\n",
+ " print(f\"Saved experiment config in {output_path}\")\n",
+ " \n",
+ " return experiment\n",
+ "\n",
+ "def create_objectives_grid(objectives, n_para_obj=2):\n",
+ " parameters_o = \"objectives, \"\n",
+ " if n_para_obj==1:\n",
+ " experiments = [[exp] for exp in objectives]\n",
+ " else:\n",
+ " experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n",
+ " experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n",
+ " print(len(experiments), experiments)\n",
+ " \n",
+ " parameters = \"np.around(np.arange(0, 1.1,0.1),2), \"\n",
+ " tasks = eval(f\"list(itertools.product({(parameters*n_para_obj)[:-2]}))\")\n",
+ " tasks = [(f'task_{i+1}',)+task for i, task in enumerate(tasks)]\n",
+ " print(len(tasks))\n",
+ " for exp in experiments:\n",
+ " df = pd.DataFrame(data=tasks, columns=[\"task\", *exp])\n",
+ " experiment_path = os.path.join('..','data', 'grid_2obj')\n",
+ " os.makedirs(experiment_path, exist_ok=True)\n",
+ " experiment_path = os.path.join(experiment_path, f\"grid_{len(df.columns)-1}objectives_{abbrev_obj_keys(exp)}.csv\") \n",
+ " df.to_csv(experiment_path, index=False)\n",
+ " print(f\"Saved experiment in {experiment_path}\")\n",
+ " write_generator_experiment(experiment_path, objectives=exp)\n",
+ " #df.to_csv(f\"../data/grid_{}objectives_{abbrev_obj_keys(objectives.tolist())}.csv\" ,index=False)\n",
+ " \n",
+ "exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=2) \n",
+ "print(exp_test)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "56ab613b",
+ "metadata": {},
+ "source": [
+ "### Helper prototypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "dfd1a302",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = pd.DataFrame(columns=[\"log\",\"ratio_top_20_variants\", \"epa_normalized_sequence_entropy_linear_forgetting\"]) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "218946b7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "k=0\n",
+ "for i in np.arange(0, 1.1,0.2):\n",
+ " for j in np.arange(0,0.55,0.1):\n",
+ " k+=1\n",
+ " new_entry = pd.Series({'log':f\"objective_{k}\", \"ratio_top_20_variants\":round(i,1),\n",
+ " \"epa_normalized_sequence_entropy_linear_forgetting\":round(j,1)})\n",
+ " df = pd.concat([\n",
+ " df, \n",
+ " pd.DataFrame([new_entry], columns=new_entry.index)]\n",
+ " ).reset_index(drop=True)\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "b1e3bb5a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.to_csv(\"../data/grid_objectives.csv\" ,index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "5de45389",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " log | \n",
+ " ratio_top_20_variants | \n",
+ " normalized_sequence_entropy_linear_forgetting | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " objective_1 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " objective_2 | \n",
+ " 0.0 | \n",
+ " 0.1 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " objective_3 | \n",
+ " 0.0 | \n",
+ " 0.2 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " objective_4 | \n",
+ " 0.0 | \n",
+ " 0.3 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " objective_5 | \n",
+ " 0.0 | \n",
+ " 0.4 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " objective_6 | \n",
+ " 0.0 | \n",
+ " 0.5 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " objective_7 | \n",
+ " 0.2 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " objective_8 | \n",
+ " 0.2 | \n",
+ " 0.1 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " objective_9 | \n",
+ " 0.2 | \n",
+ " 0.2 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " objective_10 | \n",
+ " 0.2 | \n",
+ " 0.3 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " objective_11 | \n",
+ " 0.2 | \n",
+ " 0.4 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " objective_12 | \n",
+ " 0.2 | \n",
+ " 0.5 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " objective_13 | \n",
+ " 0.4 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " objective_14 | \n",
+ " 0.4 | \n",
+ " 0.1 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " objective_15 | \n",
+ " 0.4 | \n",
+ " 0.2 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " objective_16 | \n",
+ " 0.4 | \n",
+ " 0.3 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " objective_17 | \n",
+ " 0.4 | \n",
+ " 0.4 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " objective_18 | \n",
+ " 0.4 | \n",
+ " 0.5 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " objective_19 | \n",
+ " 0.6 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " objective_20 | \n",
+ " 0.6 | \n",
+ " 0.1 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " objective_21 | \n",
+ " 0.6 | \n",
+ " 0.2 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " objective_22 | \n",
+ " 0.6 | \n",
+ " 0.3 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " objective_23 | \n",
+ " 0.6 | \n",
+ " 0.4 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " objective_24 | \n",
+ " 0.6 | \n",
+ " 0.5 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " objective_25 | \n",
+ " 0.8 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " objective_26 | \n",
+ " 0.8 | \n",
+ " 0.1 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " objective_27 | \n",
+ " 0.8 | \n",
+ " 0.2 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " objective_28 | \n",
+ " 0.8 | \n",
+ " 0.3 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " objective_29 | \n",
+ " 0.8 | \n",
+ " 0.4 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " objective_30 | \n",
+ " 0.8 | \n",
+ " 0.5 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " objective_31 | \n",
+ " 1.0 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " objective_32 | \n",
+ " 1.0 | \n",
+ " 0.1 | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " objective_33 | \n",
+ " 1.0 | \n",
+ " 0.2 | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " objective_34 | \n",
+ " 1.0 | \n",
+ " 0.3 | \n",
+ "
\n",
+ " \n",
+ " 34 | \n",
+ " objective_35 | \n",
+ " 1.0 | \n",
+ " 0.4 | \n",
+ "
\n",
+ " \n",
+ " 35 | \n",
+ " objective_36 | \n",
+ " 1.0 | \n",
+ " 0.5 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " log ratio_top_20_variants \n",
+ "0 objective_1 0.0 \\\n",
+ "1 objective_2 0.0 \n",
+ "2 objective_3 0.0 \n",
+ "3 objective_4 0.0 \n",
+ "4 objective_5 0.0 \n",
+ "5 objective_6 0.0 \n",
+ "6 objective_7 0.2 \n",
+ "7 objective_8 0.2 \n",
+ "8 objective_9 0.2 \n",
+ "9 objective_10 0.2 \n",
+ "10 objective_11 0.2 \n",
+ "11 objective_12 0.2 \n",
+ "12 objective_13 0.4 \n",
+ "13 objective_14 0.4 \n",
+ "14 objective_15 0.4 \n",
+ "15 objective_16 0.4 \n",
+ "16 objective_17 0.4 \n",
+ "17 objective_18 0.4 \n",
+ "18 objective_19 0.6 \n",
+ "19 objective_20 0.6 \n",
+ "20 objective_21 0.6 \n",
+ "21 objective_22 0.6 \n",
+ "22 objective_23 0.6 \n",
+ "23 objective_24 0.6 \n",
+ "24 objective_25 0.8 \n",
+ "25 objective_26 0.8 \n",
+ "26 objective_27 0.8 \n",
+ "27 objective_28 0.8 \n",
+ "28 objective_29 0.8 \n",
+ "29 objective_30 0.8 \n",
+ "30 objective_31 1.0 \n",
+ "31 objective_32 1.0 \n",
+ "32 objective_33 1.0 \n",
+ "33 objective_34 1.0 \n",
+ "34 objective_35 1.0 \n",
+ "35 objective_36 1.0 \n",
+ "\n",
+ " normalized_sequence_entropy_linear_forgetting \n",
+ "0 0.0 \n",
+ "1 0.1 \n",
+ "2 0.2 \n",
+ "3 0.3 \n",
+ "4 0.4 \n",
+ "5 0.5 \n",
+ "6 0.0 \n",
+ "7 0.1 \n",
+ "8 0.2 \n",
+ "9 0.3 \n",
+ "10 0.4 \n",
+ "11 0.5 \n",
+ "12 0.0 \n",
+ "13 0.1 \n",
+ "14 0.2 \n",
+ "15 0.3 \n",
+ "16 0.4 \n",
+ "17 0.5 \n",
+ "18 0.0 \n",
+ "19 0.1 \n",
+ "20 0.2 \n",
+ "21 0.3 \n",
+ "22 0.4 \n",
+ "23 0.5 \n",
+ "24 0.0 \n",
+ "25 0.1 \n",
+ "26 0.2 \n",
+ "27 0.3 \n",
+ "28 0.4 \n",
+ "29 0.5 \n",
+ "30 0.0 \n",
+ "31 0.1 \n",
+ "32 0.2 \n",
+ "33 0.3 \n",
+ "34 0.4 \n",
+ "35 0.5 "
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c12bc19d",
+ "metadata": {},
+ "source": [
+ "## Objectives from real logs\n",
+ "(Feature selection)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "39ac74bb",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(34, 178)\n",
+ "34 Event-Logs: ['BPI2016_Complaints' 'BPI2016_Questions' 'BPI2016_Werkmap_Messages'\n",
+ " 'BPIC15_1' 'BPIC15_2' 'BPIC15_3' 'BPIC15_4' 'BPIC15_5'\n",
+ " 'BPI_Challenge_2012' 'BPI_Challenge_2013_closed_problems'\n",
+ " 'BPI_Challenge_2013_incidents' 'BPI_Challenge_2013_open_problems'\n",
+ " 'BPI_Challenge_2017' 'BPI_Challenge_2017_Offer_log' 'BPI_Challenge_2018'\n",
+ " 'BPI_Challenge_2019' 'CoSeLoG_WABO_1' 'CoSeLoG_WABO_2' 'CoSeLoG_WABO_3'\n",
+ " 'CoSeLoG_WABO_4' 'CoSeLoG_WABO_5' 'Detail_Change'\n",
+ " 'Detail_Incident_Activity' 'Detail_Interaction' 'DomesticDeclarations'\n",
+ " 'Hospital_log' 'InternationalDeclarations' 'PermitLog'\n",
+ " 'PrepaidTravelCost' 'Receipt_WABO_CoSeLoG' 'RequestForPayment'\n",
+ " 'Road_Traffic_Fine_Management_Process' 'Sepsis_Cases_Event_Log' 'finale']\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " log | \n",
+ " n_traces | \n",
+ " n_unique_traces | \n",
+ " ratio_unique_traces_per_trace | \n",
+ " trace_len_min | \n",
+ " trace_len_max | \n",
+ " trace_len_mean | \n",
+ " trace_len_median | \n",
+ " trace_len_mode | \n",
+ " trace_len_std | \n",
+ " trace_len_variance | \n",
+ " trace_len_q1 | \n",
+ " trace_len_q3 | \n",
+ " trace_len_iqr | \n",
+ " trace_len_geometric_mean | \n",
+ " trace_len_geometric_std | \n",
+ " trace_len_harmonic_mean | \n",
+ " trace_len_skewness | \n",
+ " trace_len_kurtosis | \n",
+ " trace_len_coefficient_variation | \n",
+ " trace_len_entropy | \n",
+ " trace_len_hist1 | \n",
+ " trace_len_hist2 | \n",
+ " trace_len_hist3 | \n",
+ " trace_len_hist4 | \n",
+ " trace_len_hist5 | \n",
+ " trace_len_hist6 | \n",
+ " trace_len_hist7 | \n",
+ " trace_len_hist8 | \n",
+ " trace_len_hist9 | \n",
+ " trace_len_hist10 | \n",
+ " trace_len_skewness_hist | \n",
+ " trace_len_kurtosis_hist | \n",
+ " ratio_most_common_variant | \n",
+ " ratio_top_1_variants | \n",
+ " ratio_top_5_variants | \n",
+ " ratio_top_10_variants | \n",
+ " ratio_top_20_variants | \n",
+ " ratio_top_50_variants | \n",
+ " ratio_top_75_variants | \n",
+ " mean_variant_occurrence | \n",
+ " std_variant_occurrence | \n",
+ " skewness_variant_occurrence | \n",
+ " kurtosis_variant_occurrence | \n",
+ " n_unique_activities | \n",
+ " activities_min | \n",
+ " activities_max | \n",
+ " activities_mean | \n",
+ " activities_median | \n",
+ " activities_std | \n",
+ " activities_variance | \n",
+ " activities_q1 | \n",
+ " activities_q3 | \n",
+ " activities_iqr | \n",
+ " activities_skewness | \n",
+ " activities_kurtosis | \n",
+ " n_unique_start_activities | \n",
+ " start_activities_min | \n",
+ " start_activities_max | \n",
+ " start_activities_mean | \n",
+ " start_activities_median | \n",
+ " start_activities_std | \n",
+ " start_activities_variance | \n",
+ " start_activities_q1 | \n",
+ " start_activities_q3 | \n",
+ " start_activities_iqr | \n",
+ " start_activities_skewness | \n",
+ " start_activities_kurtosis | \n",
+ " n_unique_end_activities | \n",
+ " end_activities_min | \n",
+ " end_activities_max | \n",
+ " end_activities_mean | \n",
+ " end_activities_median | \n",
+ " end_activities_std | \n",
+ " end_activities_variance | \n",
+ " end_activities_q1 | \n",
+ " end_activities_q3 | \n",
+ " end_activities_iqr | \n",
+ " end_activities_skewness | \n",
+ " end_activities_kurtosis | \n",
+ " entropy_trace | \n",
+ " entropy_prefix | \n",
+ " entropy_global_block | \n",
+ " entropy_lempel_ziv | \n",
+ " entropy_k_block_diff_1 | \n",
+ " entropy_k_block_diff_3 | \n",
+ " entropy_k_block_diff_5 | \n",
+ " entropy_k_block_ratio_1 | \n",
+ " entropy_k_block_ratio_3 | \n",
+ " entropy_k_block_ratio_5 | \n",
+ " entropy_knn_3 | \n",
+ " entropy_knn_5 | \n",
+ " entropy_knn_7 | \n",
+ " Log Nature | \n",
+ " epa_variant_entropy | \n",
+ " epa_normalized_variant_entropy | \n",
+ " epa_sequence_entropy | \n",
+ " epa_normalized_sequence_entropy | \n",
+ " epa_sequence_entropy_linear_forgetting | \n",
+ " epa_normalized_sequence_entropy_linear_forgetting | \n",
+ " epa_sequence_entropy_exponential_forgetting | \n",
+ " epa_normalized_sequence_entropy_exponential_forgetting | \n",
+ " accumulated_time_time_min | \n",
+ " accumulated_time_time_max | \n",
+ " accumulated_time_time_mean | \n",
+ " accumulated_time_time_median | \n",
+ " accumulated_time_time_mode | \n",
+ " accumulated_time_time_std | \n",
+ " accumulated_time_time_variance | \n",
+ " accumulated_time_time_q1 | \n",
+ " accumulated_time_time_q3 | \n",
+ " accumulated_time_time_iqr | \n",
+ " accumulated_time_time_geometric_mean | \n",
+ " accumulated_time_time_geometric_std | \n",
+ " accumulated_time_time_harmonic_mean | \n",
+ " accumulated_time_time_skewness | \n",
+ " accumulated_time_time_kurtosis | \n",
+ " accumulated_time_time_coefficient_variation | \n",
+ " accumulated_time_time_entropy | \n",
+ " accumulated_time_time_skewness_hist | \n",
+ " accumulated_time_time_kurtosis_hist | \n",
+ " execution_time_time_min | \n",
+ " execution_time_time_max | \n",
+ " execution_time_time_mean | \n",
+ " execution_time_time_median | \n",
+ " execution_time_time_mode | \n",
+ " execution_time_time_std | \n",
+ " execution_time_time_variance | \n",
+ " execution_time_time_q1 | \n",
+ " execution_time_time_q3 | \n",
+ " execution_time_time_iqr | \n",
+ " execution_time_time_geometric_mean | \n",
+ " execution_time_time_geometric_std | \n",
+ " execution_time_time_harmonic_mean | \n",
+ " execution_time_time_skewness | \n",
+ " execution_time_time_kurtosis | \n",
+ " execution_time_time_coefficient_variation | \n",
+ " execution_time_time_entropy | \n",
+ " execution_time_time_skewness_hist | \n",
+ " execution_time_time_kurtosis_hist | \n",
+ " remaining_time_time_min | \n",
+ " remaining_time_time_max | \n",
+ " remaining_time_time_mean | \n",
+ " remaining_time_time_median | \n",
+ " remaining_time_time_mode | \n",
+ " remaining_time_time_std | \n",
+ " remaining_time_time_variance | \n",
+ " remaining_time_time_q1 | \n",
+ " remaining_time_time_q3 | \n",
+ " remaining_time_time_iqr | \n",
+ " remaining_time_time_geometric_mean | \n",
+ " remaining_time_time_geometric_std | \n",
+ " remaining_time_time_harmonic_mean | \n",
+ " remaining_time_time_skewness | \n",
+ " remaining_time_time_kurtosis | \n",
+ " remaining_time_time_coefficient_variation | \n",
+ " remaining_time_time_entropy | \n",
+ " remaining_time_time_skewness_hist | \n",
+ " remaining_time_time_kurtosis_hist | \n",
+ " within_day_time_min | \n",
+ " within_day_time_max | \n",
+ " within_day_time_mean | \n",
+ " within_day_time_median | \n",
+ " within_day_time_mode | \n",
+ " within_day_time_std | \n",
+ " within_day_time_variance | \n",
+ " within_day_time_q1 | \n",
+ " within_day_time_q3 | \n",
+ " within_day_time_iqr | \n",
+ " within_day_time_geometric_mean | \n",
+ " within_day_time_geometric_std | \n",
+ " within_day_time_harmonic_mean | \n",
+ " within_day_time_skewness | \n",
+ " within_day_time_kurtosis | \n",
+ " within_day_time_coefficient_variation | \n",
+ " within_day_time_entropy | \n",
+ " within_day_time_skewness_hist | \n",
+ " within_day_time_kurtosis_hist | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " BPIC15_2 | \n",
+ " 832 | \n",
+ " 828 | \n",
+ " 0.995192 | \n",
+ " 1 | \n",
+ " 132 | \n",
+ " 53.310096 | \n",
+ " 54.0 | \n",
+ " 61 | \n",
+ " 19.894977 | \n",
+ " 395.810090 | \n",
+ " 44.0 | \n",
+ " 62.0 | \n",
+ " 18.0 | \n",
+ " 48.150111 | \n",
+ " 1.695311 | \n",
+ " 37.583741 | \n",
+ " 0.054138 | \n",
+ " 0.804992 | \n",
+ " 0.373193 | \n",
+ " 6.646715 | \n",
+ " 0.003853 | \n",
+ " 0.004863 | \n",
+ " 4.679243e-03 | \n",
+ " 0.023947 | \n",
+ " 2.376321e-02 | \n",
+ " 8.257487e-03 | \n",
+ " 0.004771 | \n",
+ " 1.376248e-03 | \n",
+ " 6.422490e-04 | \n",
+ " 1.834997e-04 | \n",
+ " 0.054138 | \n",
+ " 0.804992 | \n",
+ " 0.002404 | \n",
+ " 0.014423 | \n",
+ " 0.054087 | \n",
+ " 0.103365 | \n",
+ " 0.203125 | \n",
+ " 0.502404 | \n",
+ " 0.751202 | \n",
+ " 1.004831 | \n",
+ " 0.069337 | \n",
+ " 14.283027 | \n",
+ " 202.004854 | \n",
+ " 410 | \n",
+ " 1 | \n",
+ " 830 | \n",
+ " 108.180488 | \n",
+ " 12.0 | \n",
+ " 187.588162 | \n",
+ " 3.518932e+04 | \n",
+ " 3.0 | \n",
+ " 125.5 | \n",
+ " 122.5 | \n",
+ " 2.129412 | \n",
+ " 3.808278 | \n",
+ " 14 | \n",
+ " 1 | \n",
+ " 731 | \n",
+ " 59.428571 | \n",
+ " 1.0 | \n",
+ " 186.717401 | \n",
+ " 3.486339e+04 | \n",
+ " 1.0 | \n",
+ " 8.25 | \n",
+ " 7.25 | \n",
+ " 3.300411 | \n",
+ " 8.960767 | \n",
+ " 82 | \n",
+ " 1 | \n",
+ " 216 | \n",
+ " 10.146341 | \n",
+ " 1.0 | \n",
+ " 35.318800 | \n",
+ " 1.247418e+03 | \n",
+ " 1.00 | \n",
+ " 3.00 | \n",
+ " 2.00 | \n",
+ " 5.098791 | \n",
+ " 25.861991 | \n",
+ " 9.691 | \n",
+ " 14.524 | \n",
+ " 19.448 | \n",
+ " 3.859 | \n",
+ " 7.105 | \n",
+ " 7.105 | \n",
+ " 7.105 | \n",
+ " 7.105 | \n",
+ " 7.105 | \n",
+ " 7.105 | \n",
+ " 5.545 | \n",
+ " 5.039 | \n",
+ " 4.721 | \n",
+ " Real | \n",
+ " 2.405122e+05 | \n",
+ " 0.627973 | \n",
+ " 2.858769e+05 | \n",
+ " 0.602371 | \n",
+ " 1.505466e+05 | \n",
+ " 0.317217 | \n",
+ " 1.853129e+05 | \n",
+ " 0.390473 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " BPI_Challenge_2018 | \n",
+ " 43809 | \n",
+ " 28457 | \n",
+ " 0.649570 | \n",
+ " 24 | \n",
+ " 2973 | \n",
+ " 57.391541 | \n",
+ " 49.0 | \n",
+ " 49 | \n",
+ " 34.872131 | \n",
+ " 1216.065487 | \n",
+ " 44.0 | \n",
+ " 59.0 | \n",
+ " 15.0 | \n",
+ " 53.775008 | \n",
+ " 1.367397 | \n",
+ " 51.651502 | \n",
+ " 26.126459 | \n",
+ " 1720.399665 | \n",
+ " 0.607618 | \n",
+ " 10.598758 | \n",
+ " 0.003385 | \n",
+ " 0.000005 | \n",
+ " 9.288448e-07 | \n",
+ " 0.000000 | \n",
+ " 0.000000e+00 | \n",
+ " 0.000000e+00 | \n",
+ " 0.000000 | \n",
+ " 0.000000e+00 | \n",
+ " 7.740373e-08 | \n",
+ " 7.740373e-08 | \n",
+ " 26.126459 | \n",
+ " 1720.399665 | \n",
+ " 0.026981 | \n",
+ " 0.290374 | \n",
+ " 0.373006 | \n",
+ " 0.415371 | \n",
+ " 0.480335 | \n",
+ " 0.675204 | \n",
+ " 0.837590 | \n",
+ " 1.539481 | \n",
+ " 12.487438 | \n",
+ " 64.625680 | \n",
+ " 5083.455806 | \n",
+ " 41 | \n",
+ " 17 | \n",
+ " 466141 | \n",
+ " 61323.560976 | \n",
+ " 7530.0 | \n",
+ " 120522.247417 | \n",
+ " 1.452561e+10 | \n",
+ " 902.0 | \n",
+ " 45907.0 | \n",
+ " 45005.0 | \n",
+ " 2.444007 | \n",
+ " 4.773254 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 38623 | \n",
+ " 10952.250000 | \n",
+ " 2592.0 | \n",
+ " 16111.407548 | \n",
+ " 2.595775e+08 | \n",
+ " 36.5 | \n",
+ " 13507.75 | \n",
+ " 13471.25 | \n",
+ " 1.098736 | \n",
+ " -0.714800 | \n",
+ " 21 | \n",
+ " 1 | \n",
+ " 34830 | \n",
+ " 2086.142857 | \n",
+ " 13.0 | \n",
+ " 7431.744981 | \n",
+ " 5.523083e+07 | \n",
+ " 2.00 | \n",
+ " 193.00 | \n",
+ " 191.00 | \n",
+ " 4.062387 | \n",
+ " 14.952824 | \n",
+ " 13.191 | \n",
+ " 16.272 | \n",
+ " 20.972 | \n",
+ " 1.023 | \n",
+ " -0.010 | \n",
+ " 1.855 | \n",
+ " 0.511 | \n",
+ " 1.403 | \n",
+ " 3.572 | \n",
+ " 2.001 | \n",
+ " 7.849 | \n",
+ " 7.371 | \n",
+ " 7.067 | \n",
+ " Real | \n",
+ " 1.156384e+07 | \n",
+ " 0.712079 | \n",
+ " 2.114626e+07 | \n",
+ " 0.570688 | \n",
+ " 1.414023e+07 | \n",
+ " 0.381612 | \n",
+ " 1.557608e+07 | \n",
+ " 0.420362 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Receipt_WABO_CoSeLoG | \n",
+ " 1434 | \n",
+ " 116 | \n",
+ " 0.080893 | \n",
+ " 1 | \n",
+ " 25 | \n",
+ " 5.981172 | \n",
+ " 6.0 | \n",
+ " 6 | \n",
+ " 2.166129 | \n",
+ " 4.692114 | \n",
+ " 6.0 | \n",
+ " 6.0 | \n",
+ " 0.0 | \n",
+ " 5.414708 | \n",
+ " 1.704965 | \n",
+ " 4.356445 | \n",
+ " 1.276525 | \n",
+ " 12.296006 | \n",
+ " 0.362158 | \n",
+ " 7.197193 | \n",
+ " 0.036030 | \n",
+ " 0.008136 | \n",
+ " 3.411204e-01 | \n",
+ " 0.023536 | \n",
+ " 3.777313e-03 | \n",
+ " 1.743375e-03 | \n",
+ " 0.000291 | \n",
+ " 1.452813e-03 | \n",
+ " 0.000000e+00 | \n",
+ " 5.811251e-04 | \n",
+ " 1.276525 | \n",
+ " 12.296006 | \n",
+ " 0.497211 | \n",
+ " 0.497211 | \n",
+ " 0.796374 | \n",
+ " 0.887029 | \n",
+ " 0.930265 | \n",
+ " 0.959554 | \n",
+ " 0.979777 | \n",
+ " 12.362069 | \n",
+ " 68.360277 | \n",
+ " 9.380687 | \n",
+ " 92.281919 | \n",
+ " 27 | \n",
+ " 1 | \n",
+ " 1434 | \n",
+ " 317.666667 | \n",
+ " 27.0 | \n",
+ " 553.389823 | \n",
+ " 3.062403e+05 | \n",
+ " 8.0 | \n",
+ " 50.0 | \n",
+ " 42.0 | \n",
+ " 1.342951 | \n",
+ " -0.178094 | \n",
+ " 1 | \n",
+ " 1434 | \n",
+ " 1434 | \n",
+ " 1434.000000 | \n",
+ " 1434.0 | \n",
+ " 0.000000 | \n",
+ " 0.000000e+00 | \n",
+ " 1434.0 | \n",
+ " 1434.00 | \n",
+ " 0.00 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 14 | \n",
+ " 1 | \n",
+ " 828 | \n",
+ " 102.428571 | \n",
+ " 6.0 | \n",
+ " 225.871555 | \n",
+ " 5.101796e+04 | \n",
+ " 1.25 | \n",
+ " 33.25 | \n",
+ " 32.00 | \n",
+ " 2.471765 | \n",
+ " 4.846541 | \n",
+ " 3.209 | \n",
+ " 4.746 | \n",
+ " 7.019 | \n",
+ " 0.385 | \n",
+ " 2.672 | \n",
+ " 2.966 | \n",
+ " 0.804 | \n",
+ " 1.484 | \n",
+ " 2.966 | \n",
+ " 2.966 | \n",
+ " 3.260 | \n",
+ " 2.845 | \n",
+ " 2.584 | \n",
+ " Real | \n",
+ " 2.382326e+03 | \n",
+ " 0.689363 | \n",
+ " 1.829627e+04 | \n",
+ " 0.235532 | \n",
+ " 7.814868e+03 | \n",
+ " 0.100603 | \n",
+ " 1.072870e+04 | \n",
+ " 0.138113 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " BPIC15_3 | \n",
+ " 1409 | \n",
+ " 1349 | \n",
+ " 0.957417 | \n",
+ " 3 | \n",
+ " 124 | \n",
+ " 42.356991 | \n",
+ " 42.0 | \n",
+ " 44 | \n",
+ " 16.138406 | \n",
+ " 260.448143 | \n",
+ " 38.0 | \n",
+ " 47.0 | \n",
+ " 9.0 | \n",
+ " 37.637731 | \n",
+ " 1.786726 | \n",
+ " 29.092933 | \n",
+ " -0.009541 | \n",
+ " 1.543369 | \n",
+ " 0.381009 | \n",
+ " 7.167153 | \n",
+ " 0.006921 | \n",
+ " 0.004340 | \n",
+ " 1.630604e-02 | \n",
+ " 0.036953 | \n",
+ " 1.173096e-02 | \n",
+ " 4.105837e-03 | \n",
+ " 0.001584 | \n",
+ " 5.278933e-04 | \n",
+ " 1.173096e-04 | \n",
+ " 5.865481e-05 | \n",
+ " -0.009541 | \n",
+ " 1.543369 | \n",
+ " 0.010646 | \n",
+ " 0.049681 | \n",
+ " 0.090135 | \n",
+ " 0.137686 | \n",
+ " 0.233499 | \n",
+ " 0.520937 | \n",
+ " 0.760114 | \n",
+ " 1.044477 | \n",
+ " 0.592348 | \n",
+ " 17.964130 | \n",
+ " 358.019511 | \n",
+ " 383 | \n",
+ " 1 | \n",
+ " 1409 | \n",
+ " 155.825065 | \n",
+ " 16.0 | \n",
+ " 306.310544 | \n",
+ " 9.382615e+04 | \n",
+ " 5.0 | \n",
+ " 108.5 | \n",
+ " 103.5 | \n",
+ " 2.446349 | \n",
+ " 5.280931 | \n",
+ " 9 | \n",
+ " 1 | \n",
+ " 1348 | \n",
+ " 156.555556 | \n",
+ " 8.0 | \n",
+ " 421.270858 | \n",
+ " 1.774691e+05 | \n",
+ " 3.0 | \n",
+ " 14.00 | \n",
+ " 11.00 | \n",
+ " 2.474158 | \n",
+ " 4.122971 | \n",
+ " 119 | \n",
+ " 1 | \n",
+ " 342 | \n",
+ " 11.840336 | \n",
+ " 2.0 | \n",
+ " 39.557210 | \n",
+ " 1.564773e+03 | \n",
+ " 1.00 | \n",
+ " 7.00 | \n",
+ " 6.00 | \n",
+ " 6.217217 | \n",
+ " 43.335525 | \n",
+ " 10.317 | \n",
+ " 14.226 | \n",
+ " 18.743 | \n",
+ " 3.182 | \n",
+ " -0.007 | \n",
+ " 6.780 | \n",
+ " 6.780 | \n",
+ " 6.780 | \n",
+ " 6.780 | \n",
+ " 6.780 | \n",
+ " 5.701 | \n",
+ " 5.212 | \n",
+ " 4.900 | \n",
+ " Real | \n",
+ " 2.981464e+05 | \n",
+ " 0.661781 | \n",
+ " 3.975043e+05 | \n",
+ " 0.605676 | \n",
+ " 2.241393e+05 | \n",
+ " 0.341521 | \n",
+ " 2.657571e+05 | \n",
+ " 0.404934 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " BPI_Challenge_2019 | \n",
+ " 251734 | \n",
+ " 11973 | \n",
+ " 0.047562 | \n",
+ " 1 | \n",
+ " 990 | \n",
+ " 6.339720 | \n",
+ " 5.0 | \n",
+ " 5 | \n",
+ " 13.057417 | \n",
+ " 170.496137 | \n",
+ " 5.0 | \n",
+ " 6.0 | \n",
+ " 1.0 | \n",
+ " 5.173569 | \n",
+ " 1.635822 | \n",
+ " 4.592844 | \n",
+ " 22.132989 | \n",
+ " 753.772202 | \n",
+ " 2.059621 | \n",
+ " 12.044057 | \n",
+ " 0.010078 | \n",
+ " 0.000020 | \n",
+ " 9.559579e-06 | \n",
+ " 0.000003 | \n",
+ " 3.614967e-07 | \n",
+ " 1.606652e-07 | \n",
+ " 0.000000 | \n",
+ " 4.016630e-08 | \n",
+ " 8.033260e-08 | \n",
+ " 8.033260e-08 | \n",
+ " 22.132989 | \n",
+ " 753.772202 | \n",
+ " 0.199758 | \n",
+ " 0.871424 | \n",
+ " 0.929990 | \n",
+ " 0.946368 | \n",
+ " 0.959767 | \n",
+ " 0.976217 | \n",
+ " 0.988106 | \n",
+ " 21.025140 | \n",
+ " 594.255619 | \n",
+ " 64.772702 | \n",
+ " 4917.319751 | \n",
+ " 42 | \n",
+ " 2 | \n",
+ " 314097 | \n",
+ " 37998.166667 | \n",
+ " 1628.0 | \n",
+ " 80833.669206 | \n",
+ " 6.534082e+09 | \n",
+ " 202.0 | \n",
+ " 11536.0 | \n",
+ " 11334.0 | \n",
+ " 2.169648 | \n",
+ " 3.263594 | \n",
+ " 8 | \n",
+ " 2 | \n",
+ " 199867 | \n",
+ " 31466.750000 | \n",
+ " 869.0 | \n",
+ " 65387.493286 | \n",
+ " 4.275524e+09 | \n",
+ " 97.0 | \n",
+ " 14224.25 | \n",
+ " 14127.25 | \n",
+ " 2.059742 | \n",
+ " 2.535789 | \n",
+ " 32 | \n",
+ " 1 | \n",
+ " 181328 | \n",
+ " 7866.687500 | \n",
+ " 64.5 | \n",
+ " 31658.428996 | \n",
+ " 1.002256e+09 | \n",
+ " 9.00 | \n",
+ " 1027.25 | \n",
+ " 1018.25 | \n",
+ " 5.135607 | \n",
+ " 25.170543 | \n",
+ " 6.243 | \n",
+ " 8.811 | \n",
+ " 19.447 | \n",
+ " 0.346 | \n",
+ " -0.041 | \n",
+ " 1.530 | \n",
+ " 0.840 | \n",
+ " 0.620 | \n",
+ " 3.244 | \n",
+ " 1.913 | \n",
+ " 7.333 | \n",
+ " 6.882 | \n",
+ " 6.601 | \n",
+ " Real | \n",
+ " 1.690369e+06 | \n",
+ " 0.645530 | \n",
+ " 7.477256e+06 | \n",
+ " 0.328029 | \n",
+ " 7.298458e+06 | \n",
+ " 0.320185 | \n",
+ " 7.300663e+06 | \n",
+ " 0.320282 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " log n_traces n_unique_traces \n",
+ "0 BPIC15_2 832 828 \\\n",
+ "1 BPI_Challenge_2018 43809 28457 \n",
+ "2 Receipt_WABO_CoSeLoG 1434 116 \n",
+ "3 BPIC15_3 1409 1349 \n",
+ "4 BPI_Challenge_2019 251734 11973 \n",
+ "\n",
+ " ratio_unique_traces_per_trace trace_len_min trace_len_max \n",
+ "0 0.995192 1 132 \\\n",
+ "1 0.649570 24 2973 \n",
+ "2 0.080893 1 25 \n",
+ "3 0.957417 3 124 \n",
+ "4 0.047562 1 990 \n",
+ "\n",
+ " trace_len_mean trace_len_median trace_len_mode trace_len_std \n",
+ "0 53.310096 54.0 61 19.894977 \\\n",
+ "1 57.391541 49.0 49 34.872131 \n",
+ "2 5.981172 6.0 6 2.166129 \n",
+ "3 42.356991 42.0 44 16.138406 \n",
+ "4 6.339720 5.0 5 13.057417 \n",
+ "\n",
+ " trace_len_variance trace_len_q1 trace_len_q3 trace_len_iqr \n",
+ "0 395.810090 44.0 62.0 18.0 \\\n",
+ "1 1216.065487 44.0 59.0 15.0 \n",
+ "2 4.692114 6.0 6.0 0.0 \n",
+ "3 260.448143 38.0 47.0 9.0 \n",
+ "4 170.496137 5.0 6.0 1.0 \n",
+ "\n",
+ " trace_len_geometric_mean trace_len_geometric_std trace_len_harmonic_mean \n",
+ "0 48.150111 1.695311 37.583741 \\\n",
+ "1 53.775008 1.367397 51.651502 \n",
+ "2 5.414708 1.704965 4.356445 \n",
+ "3 37.637731 1.786726 29.092933 \n",
+ "4 5.173569 1.635822 4.592844 \n",
+ "\n",
+ " trace_len_skewness trace_len_kurtosis trace_len_coefficient_variation \n",
+ "0 0.054138 0.804992 0.373193 \\\n",
+ "1 26.126459 1720.399665 0.607618 \n",
+ "2 1.276525 12.296006 0.362158 \n",
+ "3 -0.009541 1.543369 0.381009 \n",
+ "4 22.132989 753.772202 2.059621 \n",
+ "\n",
+ " trace_len_entropy trace_len_hist1 trace_len_hist2 trace_len_hist3 \n",
+ "0 6.646715 0.003853 0.004863 4.679243e-03 \\\n",
+ "1 10.598758 0.003385 0.000005 9.288448e-07 \n",
+ "2 7.197193 0.036030 0.008136 3.411204e-01 \n",
+ "3 7.167153 0.006921 0.004340 1.630604e-02 \n",
+ "4 12.044057 0.010078 0.000020 9.559579e-06 \n",
+ "\n",
+ " trace_len_hist4 trace_len_hist5 trace_len_hist6 trace_len_hist7 \n",
+ "0 0.023947 2.376321e-02 8.257487e-03 0.004771 \\\n",
+ "1 0.000000 0.000000e+00 0.000000e+00 0.000000 \n",
+ "2 0.023536 3.777313e-03 1.743375e-03 0.000291 \n",
+ "3 0.036953 1.173096e-02 4.105837e-03 0.001584 \n",
+ "4 0.000003 3.614967e-07 1.606652e-07 0.000000 \n",
+ "\n",
+ " trace_len_hist8 trace_len_hist9 trace_len_hist10 \n",
+ "0 1.376248e-03 6.422490e-04 1.834997e-04 \\\n",
+ "1 0.000000e+00 7.740373e-08 7.740373e-08 \n",
+ "2 1.452813e-03 0.000000e+00 5.811251e-04 \n",
+ "3 5.278933e-04 1.173096e-04 5.865481e-05 \n",
+ "4 4.016630e-08 8.033260e-08 8.033260e-08 \n",
+ "\n",
+ " trace_len_skewness_hist trace_len_kurtosis_hist \n",
+ "0 0.054138 0.804992 \\\n",
+ "1 26.126459 1720.399665 \n",
+ "2 1.276525 12.296006 \n",
+ "3 -0.009541 1.543369 \n",
+ "4 22.132989 753.772202 \n",
+ "\n",
+ " ratio_most_common_variant ratio_top_1_variants ratio_top_5_variants \n",
+ "0 0.002404 0.014423 0.054087 \\\n",
+ "1 0.026981 0.290374 0.373006 \n",
+ "2 0.497211 0.497211 0.796374 \n",
+ "3 0.010646 0.049681 0.090135 \n",
+ "4 0.199758 0.871424 0.929990 \n",
+ "\n",
+ " ratio_top_10_variants ratio_top_20_variants ratio_top_50_variants \n",
+ "0 0.103365 0.203125 0.502404 \\\n",
+ "1 0.415371 0.480335 0.675204 \n",
+ "2 0.887029 0.930265 0.959554 \n",
+ "3 0.137686 0.233499 0.520937 \n",
+ "4 0.946368 0.959767 0.976217 \n",
+ "\n",
+ " ratio_top_75_variants mean_variant_occurrence std_variant_occurrence \n",
+ "0 0.751202 1.004831 0.069337 \\\n",
+ "1 0.837590 1.539481 12.487438 \n",
+ "2 0.979777 12.362069 68.360277 \n",
+ "3 0.760114 1.044477 0.592348 \n",
+ "4 0.988106 21.025140 594.255619 \n",
+ "\n",
+ " skewness_variant_occurrence kurtosis_variant_occurrence \n",
+ "0 14.283027 202.004854 \\\n",
+ "1 64.625680 5083.455806 \n",
+ "2 9.380687 92.281919 \n",
+ "3 17.964130 358.019511 \n",
+ "4 64.772702 4917.319751 \n",
+ "\n",
+ " n_unique_activities activities_min activities_max activities_mean \n",
+ "0 410 1 830 108.180488 \\\n",
+ "1 41 17 466141 61323.560976 \n",
+ "2 27 1 1434 317.666667 \n",
+ "3 383 1 1409 155.825065 \n",
+ "4 42 2 314097 37998.166667 \n",
+ "\n",
+ " activities_median activities_std activities_variance activities_q1 \n",
+ "0 12.0 187.588162 3.518932e+04 3.0 \\\n",
+ "1 7530.0 120522.247417 1.452561e+10 902.0 \n",
+ "2 27.0 553.389823 3.062403e+05 8.0 \n",
+ "3 16.0 306.310544 9.382615e+04 5.0 \n",
+ "4 1628.0 80833.669206 6.534082e+09 202.0 \n",
+ "\n",
+ " activities_q3 activities_iqr activities_skewness activities_kurtosis \n",
+ "0 125.5 122.5 2.129412 3.808278 \\\n",
+ "1 45907.0 45005.0 2.444007 4.773254 \n",
+ "2 50.0 42.0 1.342951 -0.178094 \n",
+ "3 108.5 103.5 2.446349 5.280931 \n",
+ "4 11536.0 11334.0 2.169648 3.263594 \n",
+ "\n",
+ " n_unique_start_activities start_activities_min start_activities_max \n",
+ "0 14 1 731 \\\n",
+ "1 4 2 38623 \n",
+ "2 1 1434 1434 \n",
+ "3 9 1 1348 \n",
+ "4 8 2 199867 \n",
+ "\n",
+ " start_activities_mean start_activities_median start_activities_std \n",
+ "0 59.428571 1.0 186.717401 \\\n",
+ "1 10952.250000 2592.0 16111.407548 \n",
+ "2 1434.000000 1434.0 0.000000 \n",
+ "3 156.555556 8.0 421.270858 \n",
+ "4 31466.750000 869.0 65387.493286 \n",
+ "\n",
+ " start_activities_variance start_activities_q1 start_activities_q3 \n",
+ "0 3.486339e+04 1.0 8.25 \\\n",
+ "1 2.595775e+08 36.5 13507.75 \n",
+ "2 0.000000e+00 1434.0 1434.00 \n",
+ "3 1.774691e+05 3.0 14.00 \n",
+ "4 4.275524e+09 97.0 14224.25 \n",
+ "\n",
+ " start_activities_iqr start_activities_skewness start_activities_kurtosis \n",
+ "0 7.25 3.300411 8.960767 \\\n",
+ "1 13471.25 1.098736 -0.714800 \n",
+ "2 0.00 NaN NaN \n",
+ "3 11.00 2.474158 4.122971 \n",
+ "4 14127.25 2.059742 2.535789 \n",
+ "\n",
+ " n_unique_end_activities end_activities_min end_activities_max \n",
+ "0 82 1 216 \\\n",
+ "1 21 1 34830 \n",
+ "2 14 1 828 \n",
+ "3 119 1 342 \n",
+ "4 32 1 181328 \n",
+ "\n",
+ " end_activities_mean end_activities_median end_activities_std \n",
+ "0 10.146341 1.0 35.318800 \\\n",
+ "1 2086.142857 13.0 7431.744981 \n",
+ "2 102.428571 6.0 225.871555 \n",
+ "3 11.840336 2.0 39.557210 \n",
+ "4 7866.687500 64.5 31658.428996 \n",
+ "\n",
+ " end_activities_variance end_activities_q1 end_activities_q3 \n",
+ "0 1.247418e+03 1.00 3.00 \\\n",
+ "1 5.523083e+07 2.00 193.00 \n",
+ "2 5.101796e+04 1.25 33.25 \n",
+ "3 1.564773e+03 1.00 7.00 \n",
+ "4 1.002256e+09 9.00 1027.25 \n",
+ "\n",
+ " end_activities_iqr end_activities_skewness end_activities_kurtosis \n",
+ "0 2.00 5.098791 25.861991 \\\n",
+ "1 191.00 4.062387 14.952824 \n",
+ "2 32.00 2.471765 4.846541 \n",
+ "3 6.00 6.217217 43.335525 \n",
+ "4 1018.25 5.135607 25.170543 \n",
+ "\n",
+ " entropy_trace entropy_prefix entropy_global_block entropy_lempel_ziv \n",
+ "0 9.691 14.524 19.448 3.859 \\\n",
+ "1 13.191 16.272 20.972 1.023 \n",
+ "2 3.209 4.746 7.019 0.385 \n",
+ "3 10.317 14.226 18.743 3.182 \n",
+ "4 6.243 8.811 19.447 0.346 \n",
+ "\n",
+ " entropy_k_block_diff_1 entropy_k_block_diff_3 entropy_k_block_diff_5 \n",
+ "0 7.105 7.105 7.105 \\\n",
+ "1 -0.010 1.855 0.511 \n",
+ "2 2.672 2.966 0.804 \n",
+ "3 -0.007 6.780 6.780 \n",
+ "4 -0.041 1.530 0.840 \n",
+ "\n",
+ " entropy_k_block_ratio_1 entropy_k_block_ratio_3 entropy_k_block_ratio_5 \n",
+ "0 7.105 7.105 7.105 \\\n",
+ "1 1.403 3.572 2.001 \n",
+ "2 1.484 2.966 2.966 \n",
+ "3 6.780 6.780 6.780 \n",
+ "4 0.620 3.244 1.913 \n",
+ "\n",
+ " entropy_knn_3 entropy_knn_5 entropy_knn_7 Log Nature \n",
+ "0 5.545 5.039 4.721 Real \\\n",
+ "1 7.849 7.371 7.067 Real \n",
+ "2 3.260 2.845 2.584 Real \n",
+ "3 5.701 5.212 4.900 Real \n",
+ "4 7.333 6.882 6.601 Real \n",
+ "\n",
+ " epa_variant_entropy epa_normalized_variant_entropy epa_sequence_entropy \n",
+ "0 2.405122e+05 0.627973 2.858769e+05 \\\n",
+ "1 1.156384e+07 0.712079 2.114626e+07 \n",
+ "2 2.382326e+03 0.689363 1.829627e+04 \n",
+ "3 2.981464e+05 0.661781 3.975043e+05 \n",
+ "4 1.690369e+06 0.645530 7.477256e+06 \n",
+ "\n",
+ " epa_normalized_sequence_entropy epa_sequence_entropy_linear_forgetting \n",
+ "0 0.602371 1.505466e+05 \\\n",
+ "1 0.570688 1.414023e+07 \n",
+ "2 0.235532 7.814868e+03 \n",
+ "3 0.605676 2.241393e+05 \n",
+ "4 0.328029 7.298458e+06 \n",
+ "\n",
+ " epa_normalized_sequence_entropy_linear_forgetting \n",
+ "0 0.317217 \\\n",
+ "1 0.381612 \n",
+ "2 0.100603 \n",
+ "3 0.341521 \n",
+ "4 0.320185 \n",
+ "\n",
+ " epa_sequence_entropy_exponential_forgetting \n",
+ "0 1.853129e+05 \\\n",
+ "1 1.557608e+07 \n",
+ "2 1.072870e+04 \n",
+ "3 2.657571e+05 \n",
+ "4 7.300663e+06 \n",
+ "\n",
+ " epa_normalized_sequence_entropy_exponential_forgetting \n",
+ "0 0.390473 \\\n",
+ "1 0.420362 \n",
+ "2 0.138113 \n",
+ "3 0.404934 \n",
+ "4 0.320282 \n",
+ "\n",
+ " accumulated_time_time_min accumulated_time_time_max \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " accumulated_time_time_mean accumulated_time_time_median \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " accumulated_time_time_mode accumulated_time_time_std \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " accumulated_time_time_variance accumulated_time_time_q1 \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " accumulated_time_time_q3 accumulated_time_time_iqr \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " accumulated_time_time_geometric_mean accumulated_time_time_geometric_std \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " accumulated_time_time_harmonic_mean accumulated_time_time_skewness \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " accumulated_time_time_kurtosis \n",
+ "0 NaN \\\n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN \n",
+ "\n",
+ " accumulated_time_time_coefficient_variation accumulated_time_time_entropy \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " accumulated_time_time_skewness_hist accumulated_time_time_kurtosis_hist \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " execution_time_time_min execution_time_time_max execution_time_time_mean \n",
+ "0 NaN NaN NaN \\\n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "\n",
+ " execution_time_time_median execution_time_time_mode \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " execution_time_time_std execution_time_time_variance \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " execution_time_time_q1 execution_time_time_q3 execution_time_time_iqr \n",
+ "0 NaN NaN NaN \\\n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "\n",
+ " execution_time_time_geometric_mean execution_time_time_geometric_std \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " execution_time_time_harmonic_mean execution_time_time_skewness \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " execution_time_time_kurtosis execution_time_time_coefficient_variation \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " execution_time_time_entropy execution_time_time_skewness_hist \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " execution_time_time_kurtosis_hist remaining_time_time_min \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " remaining_time_time_max remaining_time_time_mean \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " remaining_time_time_median remaining_time_time_mode \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " remaining_time_time_std remaining_time_time_variance \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " remaining_time_time_q1 remaining_time_time_q3 remaining_time_time_iqr \n",
+ "0 NaN NaN NaN \\\n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "\n",
+ " remaining_time_time_geometric_mean remaining_time_time_geometric_std \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " remaining_time_time_harmonic_mean remaining_time_time_skewness \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " remaining_time_time_kurtosis remaining_time_time_coefficient_variation \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " remaining_time_time_entropy remaining_time_time_skewness_hist \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " remaining_time_time_kurtosis_hist within_day_time_min \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " within_day_time_max within_day_time_mean within_day_time_median \n",
+ "0 NaN NaN NaN \\\n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "\n",
+ " within_day_time_mode within_day_time_std within_day_time_variance \n",
+ "0 NaN NaN NaN \\\n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "\n",
+ " within_day_time_q1 within_day_time_q3 within_day_time_iqr \n",
+ "0 NaN NaN NaN \\\n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "\n",
+ " within_day_time_geometric_mean within_day_time_geometric_std \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " within_day_time_harmonic_mean within_day_time_skewness \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " within_day_time_kurtosis within_day_time_coefficient_variation \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " within_day_time_entropy within_day_time_skewness_hist \n",
+ "0 NaN NaN \\\n",
+ "1 NaN NaN \n",
+ "2 NaN NaN \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ "\n",
+ " within_day_time_kurtosis_hist \n",
+ "0 NaN \n",
+ "1 NaN \n",
+ "2 NaN \n",
+ "3 NaN \n",
+ "4 NaN "
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n",
+ "#bpic_features = pd.read_csv(\"../tag/output/features/real_event_logs.csv\", index_col=None)\n",
+ "\n",
+ "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n",
+ "print(bpic_features.shape)\n",
+ "print(len(bpic_features), \" Event-Logs: \", bpic_features.sort_values('log')['log'].unique())\n",
+ "\n",
+ "#bpic_features.rename(columns={\"variant_entropy\":\"epa_variant_entropy\", \"normalized_variant_entropy\":\"epa_normalized_variant_entropy\", \"sequence_entropy\":\"epa_sequence_entropy\", \"normalized_sequence_entropy\":\"epa_normalized_sequence_entropy\", \"sequence_entropy_linear_forgetting\":\"epa_sequence_entropy_linear_forgetting\", \"normalized_sequence_entropy_linear_forgetting\":\"epa_normalized_sequence_entropy_linear_forgetting\", \"sequence_entropy_exponential_forgetting\":\"epa_sequence_entropy_exponential_forgetting\", \"normalized_sequence_entropy_exponential_forgetting\":\"epa_normalized_sequence_entropy_exponential_forgetting\"},\n",
+ "# errors=\"raise\", inplace=True)\n",
+ "\n",
+ "bpic_features.head()\n",
+ "#bpic_features.to_csv(\"../data/34_bpic_features.csv\", index=False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "ef0df0b9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "['ratio_unique_traces_per_trace', 'ratio_most_common_variant', 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " log | \n",
+ " ratio_unique_traces_per_trace | \n",
+ " ratio_most_common_variant | \n",
+ " ratio_top_10_variants | \n",
+ " epa_normalized_variant_entropy | \n",
+ " epa_normalized_sequence_entropy | \n",
+ " epa_normalized_sequence_entropy_linear_forgetting | \n",
+ " epa_normalized_sequence_entropy_exponential_forgetting | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " BPIC15_2 | \n",
+ " 0.995192 | \n",
+ " 0.002404 | \n",
+ " 0.103365 | \n",
+ " 0.627973 | \n",
+ " 0.602371 | \n",
+ " 0.317217 | \n",
+ " 0.390473 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " BPI_Challenge_2018 | \n",
+ " 0.649570 | \n",
+ " 0.026981 | \n",
+ " 0.415371 | \n",
+ " 0.712079 | \n",
+ " 0.570688 | \n",
+ " 0.381612 | \n",
+ " 0.420362 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Receipt_WABO_CoSeLoG | \n",
+ " 0.080893 | \n",
+ " 0.497211 | \n",
+ " 0.887029 | \n",
+ " 0.689363 | \n",
+ " 0.235532 | \n",
+ " 0.100603 | \n",
+ " 0.138113 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " BPIC15_3 | \n",
+ " 0.957417 | \n",
+ " 0.010646 | \n",
+ " 0.137686 | \n",
+ " 0.661781 | \n",
+ " 0.605676 | \n",
+ " 0.341521 | \n",
+ " 0.404934 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " BPI_Challenge_2019 | \n",
+ " 0.047562 | \n",
+ " 0.199758 | \n",
+ " 0.946368 | \n",
+ " 0.645530 | \n",
+ " 0.328029 | \n",
+ " 0.320185 | \n",
+ " 0.320282 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " RequestForPayment | \n",
+ " 0.012925 | \n",
+ " 0.437264 | \n",
+ " 0.933488 | \n",
+ " 0.703735 | \n",
+ " 0.189048 | \n",
+ " 0.097572 | \n",
+ " 0.118744 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " PrepaidTravelCost | \n",
+ " 0.096236 | \n",
+ " 0.271081 | \n",
+ " 0.822773 | \n",
+ " 0.723785 | \n",
+ " 0.317044 | \n",
+ " 0.184879 | \n",
+ " 0.214387 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " DomesticDeclarations | \n",
+ " 0.009429 | \n",
+ " 0.439810 | \n",
+ " 0.950095 | \n",
+ " 0.696474 | \n",
+ " 0.164758 | \n",
+ " 0.085439 | \n",
+ " 0.104389 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " BPIC15_4 | \n",
+ " 0.996201 | \n",
+ " 0.002849 | \n",
+ " 0.102564 | \n",
+ " 0.652985 | \n",
+ " 0.603866 | \n",
+ " 0.355927 | \n",
+ " 0.412835 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " BPI_Challenge_2012 | \n",
+ " 0.333614 | \n",
+ " 0.262016 | \n",
+ " 0.686254 | \n",
+ " 0.708280 | \n",
+ " 0.423074 | \n",
+ " 0.226133 | \n",
+ " 0.275551 | \n",
+ "
\n",
+ " \n",
+ " 10 | \n",
+ " Hospital_log | \n",
+ " 0.858268 | \n",
+ " 0.035871 | \n",
+ " 0.227472 | \n",
+ " 0.517443 | \n",
+ " 0.513032 | \n",
+ " 0.267825 | \n",
+ " 0.331672 | \n",
+ "
\n",
+ " \n",
+ " 11 | \n",
+ " BPIC15_5 | \n",
+ " 0.997405 | \n",
+ " 0.001730 | \n",
+ " 0.102076 | \n",
+ " 0.648702 | \n",
+ " 0.603260 | \n",
+ " 0.342410 | \n",
+ " 0.404580 | \n",
+ "
\n",
+ " \n",
+ " 12 | \n",
+ " CoSeLoG_WABO_2 | \n",
+ " 0.998450 | \n",
+ " 0.003101 | \n",
+ " 0.100775 | \n",
+ " 0.618455 | \n",
+ " 0.594035 | \n",
+ " 0.323233 | \n",
+ " 0.389858 | \n",
+ "
\n",
+ " \n",
+ " 13 | \n",
+ " Road_Traffic_Fine_Management_Process | \n",
+ " 0.001536 | \n",
+ " 0.375620 | \n",
+ " 0.993104 | \n",
+ " 0.769353 | \n",
+ " 0.111932 | \n",
+ " 0.052586 | \n",
+ " 0.068442 | \n",
+ "
\n",
+ " \n",
+ " 14 | \n",
+ " BPI_Challenge_2017_Offer_log | \n",
+ " 0.000372 | \n",
+ " 0.380626 | \n",
+ " 0.380626 | \n",
+ " 0.813479 | \n",
+ " 0.105130 | \n",
+ " 0.052672 | \n",
+ " 0.066000 | \n",
+ "
\n",
+ " \n",
+ " 15 | \n",
+ " Sepsis_Cases_Event_Log | \n",
+ " 0.805714 | \n",
+ " 0.033333 | \n",
+ " 0.274286 | \n",
+ " 0.695759 | \n",
+ " 0.522343 | \n",
+ " 0.219365 | \n",
+ " 0.299505 | \n",
+ "
\n",
+ " \n",
+ " 16 | \n",
+ " CoSeLoG_WABO_3 | \n",
+ " 0.949402 | \n",
+ " 0.011960 | \n",
+ " 0.145354 | \n",
+ " 0.654296 | \n",
+ " 0.596367 | \n",
+ " 0.278121 | \n",
+ " 0.356439 | \n",
+ "
\n",
+ " \n",
+ " 17 | \n",
+ " BPI_Challenge_2013_closed_problems | \n",
+ " 0.123067 | \n",
+ " 0.331540 | \n",
+ " 0.840619 | \n",
+ " 0.705383 | \n",
+ " 0.310940 | \n",
+ " 0.286515 | \n",
+ " 0.288383 | \n",
+ "
\n",
+ " \n",
+ " 18 | \n",
+ " BPI_Challenge_2013_incidents | \n",
+ " 0.200026 | \n",
+ " 0.232195 | \n",
+ " 0.794414 | \n",
+ " 0.717846 | \n",
+ " 0.404651 | \n",
+ " 0.391097 | \n",
+ " 0.391625 | \n",
+ "
\n",
+ " \n",
+ " 19 | \n",
+ " PermitLog | \n",
+ " 0.209200 | \n",
+ " 0.135315 | \n",
+ " 0.757537 | \n",
+ " 0.733653 | \n",
+ " 0.420150 | \n",
+ " 0.137287 | \n",
+ " 0.215490 | \n",
+ "
\n",
+ " \n",
+ " 20 | \n",
+ " BPIC15_1 | \n",
+ " 0.975813 | \n",
+ " 0.006672 | \n",
+ " 0.121768 | \n",
+ " 0.652855 | \n",
+ " 0.610294 | \n",
+ " 0.270241 | \n",
+ " 0.363928 | \n",
+ "
\n",
+ " \n",
+ " 21 | \n",
+ " InternationalDeclarations | \n",
+ " 0.116762 | \n",
+ " 0.212281 | \n",
+ " 0.811289 | \n",
+ " 0.758268 | \n",
+ " 0.339380 | \n",
+ " 0.145611 | \n",
+ " 0.193753 | \n",
+ "
\n",
+ " \n",
+ " 22 | \n",
+ " BPI_Challenge_2017 | \n",
+ " 0.505570 | \n",
+ " 0.033514 | \n",
+ " 0.531340 | \n",
+ " 0.741706 | \n",
+ " 0.461565 | \n",
+ " 0.231922 | \n",
+ " 0.290464 | \n",
+ "
\n",
+ " \n",
+ " 23 | \n",
+ " BPI2016_Complaints | \n",
+ " 0.438053 | \n",
+ " 0.101770 | \n",
+ " 0.424779 | \n",
+ " 0.899497 | \n",
+ " 0.683796 | \n",
+ " 0.404685 | \n",
+ " 0.470116 | \n",
+ "
\n",
+ " \n",
+ " 24 | \n",
+ " BPI2016_Questions | \n",
+ " 0.797427 | \n",
+ " 0.015650 | \n",
+ " 0.282311 | \n",
+ " 0.813468 | \n",
+ " 0.756132 | \n",
+ " 0.424910 | \n",
+ " 0.506118 | \n",
+ "
\n",
+ " \n",
+ " 25 | \n",
+ " BPI2016_Werkmap_Messages | \n",
+ " 0.002882 | \n",
+ " 0.295803 | \n",
+ " 0.714106 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 26 | \n",
+ " BPI_Challenge_2013_open_problems | \n",
+ " 0.131868 | \n",
+ " 0.217338 | \n",
+ " 0.769231 | \n",
+ " 0.702960 | \n",
+ " 0.276771 | \n",
+ " 0.262094 | \n",
+ " 0.263029 | \n",
+ "
\n",
+ " \n",
+ " 27 | \n",
+ " CoSeLoG_WABO_1 | \n",
+ " 0.977588 | \n",
+ " 0.009605 | \n",
+ " 0.119530 | \n",
+ " 0.646697 | \n",
+ " 0.601566 | \n",
+ " 0.292824 | \n",
+ " 0.376276 | \n",
+ "
\n",
+ " \n",
+ " 28 | \n",
+ " CoSeLoG_WABO_4 | \n",
+ " 0.992376 | \n",
+ " 0.002541 | \n",
+ " 0.106734 | \n",
+ " 0.644399 | \n",
+ " 0.597109 | \n",
+ " 0.373920 | \n",
+ " 0.422526 | \n",
+ "
\n",
+ " \n",
+ " 29 | \n",
+ " CoSeLoG_WABO_5 | \n",
+ " 0.985426 | \n",
+ " 0.004484 | \n",
+ " 0.112108 | \n",
+ " 0.642668 | \n",
+ " 0.592454 | \n",
+ " 0.346832 | \n",
+ " 0.401731 | \n",
+ "
\n",
+ " \n",
+ " 30 | \n",
+ " Detail_Change | \n",
+ " 0.048444 | \n",
+ " 0.074944 | \n",
+ " 0.765056 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 31 | \n",
+ " Detail_Incident_Activity | \n",
+ " 0.496847 | \n",
+ " 0.037455 | \n",
+ " 0.552836 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 32 | \n",
+ " Detail_Interaction | \n",
+ " 0.000041 | \n",
+ " 0.787081 | \n",
+ " 0.000000 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 33 | \n",
+ " finale | \n",
+ " 0.049345 | \n",
+ " 0.516594 | \n",
+ " 0.906332 | \n",
+ " 0.799120 | \n",
+ " 0.254066 | \n",
+ " 0.118478 | \n",
+ " 0.154576 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " log ratio_unique_traces_per_trace \n",
+ "0 BPIC15_2 0.995192 \\\n",
+ "1 BPI_Challenge_2018 0.649570 \n",
+ "2 Receipt_WABO_CoSeLoG 0.080893 \n",
+ "3 BPIC15_3 0.957417 \n",
+ "4 BPI_Challenge_2019 0.047562 \n",
+ "5 RequestForPayment 0.012925 \n",
+ "6 PrepaidTravelCost 0.096236 \n",
+ "7 DomesticDeclarations 0.009429 \n",
+ "8 BPIC15_4 0.996201 \n",
+ "9 BPI_Challenge_2012 0.333614 \n",
+ "10 Hospital_log 0.858268 \n",
+ "11 BPIC15_5 0.997405 \n",
+ "12 CoSeLoG_WABO_2 0.998450 \n",
+ "13 Road_Traffic_Fine_Management_Process 0.001536 \n",
+ "14 BPI_Challenge_2017_Offer_log 0.000372 \n",
+ "15 Sepsis_Cases_Event_Log 0.805714 \n",
+ "16 CoSeLoG_WABO_3 0.949402 \n",
+ "17 BPI_Challenge_2013_closed_problems 0.123067 \n",
+ "18 BPI_Challenge_2013_incidents 0.200026 \n",
+ "19 PermitLog 0.209200 \n",
+ "20 BPIC15_1 0.975813 \n",
+ "21 InternationalDeclarations 0.116762 \n",
+ "22 BPI_Challenge_2017 0.505570 \n",
+ "23 BPI2016_Complaints 0.438053 \n",
+ "24 BPI2016_Questions 0.797427 \n",
+ "25 BPI2016_Werkmap_Messages 0.002882 \n",
+ "26 BPI_Challenge_2013_open_problems 0.131868 \n",
+ "27 CoSeLoG_WABO_1 0.977588 \n",
+ "28 CoSeLoG_WABO_4 0.992376 \n",
+ "29 CoSeLoG_WABO_5 0.985426 \n",
+ "30 Detail_Change 0.048444 \n",
+ "31 Detail_Incident_Activity 0.496847 \n",
+ "32 Detail_Interaction 0.000041 \n",
+ "33 finale 0.049345 \n",
+ "\n",
+ " ratio_most_common_variant ratio_top_10_variants \n",
+ "0 0.002404 0.103365 \\\n",
+ "1 0.026981 0.415371 \n",
+ "2 0.497211 0.887029 \n",
+ "3 0.010646 0.137686 \n",
+ "4 0.199758 0.946368 \n",
+ "5 0.437264 0.933488 \n",
+ "6 0.271081 0.822773 \n",
+ "7 0.439810 0.950095 \n",
+ "8 0.002849 0.102564 \n",
+ "9 0.262016 0.686254 \n",
+ "10 0.035871 0.227472 \n",
+ "11 0.001730 0.102076 \n",
+ "12 0.003101 0.100775 \n",
+ "13 0.375620 0.993104 \n",
+ "14 0.380626 0.380626 \n",
+ "15 0.033333 0.274286 \n",
+ "16 0.011960 0.145354 \n",
+ "17 0.331540 0.840619 \n",
+ "18 0.232195 0.794414 \n",
+ "19 0.135315 0.757537 \n",
+ "20 0.006672 0.121768 \n",
+ "21 0.212281 0.811289 \n",
+ "22 0.033514 0.531340 \n",
+ "23 0.101770 0.424779 \n",
+ "24 0.015650 0.282311 \n",
+ "25 0.295803 0.714106 \n",
+ "26 0.217338 0.769231 \n",
+ "27 0.009605 0.119530 \n",
+ "28 0.002541 0.106734 \n",
+ "29 0.004484 0.112108 \n",
+ "30 0.074944 0.765056 \n",
+ "31 0.037455 0.552836 \n",
+ "32 0.787081 0.000000 \n",
+ "33 0.516594 0.906332 \n",
+ "\n",
+ " epa_normalized_variant_entropy epa_normalized_sequence_entropy \n",
+ "0 0.627973 0.602371 \\\n",
+ "1 0.712079 0.570688 \n",
+ "2 0.689363 0.235532 \n",
+ "3 0.661781 0.605676 \n",
+ "4 0.645530 0.328029 \n",
+ "5 0.703735 0.189048 \n",
+ "6 0.723785 0.317044 \n",
+ "7 0.696474 0.164758 \n",
+ "8 0.652985 0.603866 \n",
+ "9 0.708280 0.423074 \n",
+ "10 0.517443 0.513032 \n",
+ "11 0.648702 0.603260 \n",
+ "12 0.618455 0.594035 \n",
+ "13 0.769353 0.111932 \n",
+ "14 0.813479 0.105130 \n",
+ "15 0.695759 0.522343 \n",
+ "16 0.654296 0.596367 \n",
+ "17 0.705383 0.310940 \n",
+ "18 0.717846 0.404651 \n",
+ "19 0.733653 0.420150 \n",
+ "20 0.652855 0.610294 \n",
+ "21 0.758268 0.339380 \n",
+ "22 0.741706 0.461565 \n",
+ "23 0.899497 0.683796 \n",
+ "24 0.813468 0.756132 \n",
+ "25 0.000000 0.000000 \n",
+ "26 0.702960 0.276771 \n",
+ "27 0.646697 0.601566 \n",
+ "28 0.644399 0.597109 \n",
+ "29 0.642668 0.592454 \n",
+ "30 NaN NaN \n",
+ "31 NaN NaN \n",
+ "32 NaN NaN \n",
+ "33 0.799120 0.254066 \n",
+ "\n",
+ " epa_normalized_sequence_entropy_linear_forgetting \n",
+ "0 0.317217 \\\n",
+ "1 0.381612 \n",
+ "2 0.100603 \n",
+ "3 0.341521 \n",
+ "4 0.320185 \n",
+ "5 0.097572 \n",
+ "6 0.184879 \n",
+ "7 0.085439 \n",
+ "8 0.355927 \n",
+ "9 0.226133 \n",
+ "10 0.267825 \n",
+ "11 0.342410 \n",
+ "12 0.323233 \n",
+ "13 0.052586 \n",
+ "14 0.052672 \n",
+ "15 0.219365 \n",
+ "16 0.278121 \n",
+ "17 0.286515 \n",
+ "18 0.391097 \n",
+ "19 0.137287 \n",
+ "20 0.270241 \n",
+ "21 0.145611 \n",
+ "22 0.231922 \n",
+ "23 0.404685 \n",
+ "24 0.424910 \n",
+ "25 0.000000 \n",
+ "26 0.262094 \n",
+ "27 0.292824 \n",
+ "28 0.373920 \n",
+ "29 0.346832 \n",
+ "30 NaN \n",
+ "31 NaN \n",
+ "32 NaN \n",
+ "33 0.118478 \n",
+ "\n",
+ " epa_normalized_sequence_entropy_exponential_forgetting \n",
+ "0 0.390473 \n",
+ "1 0.420362 \n",
+ "2 0.138113 \n",
+ "3 0.404934 \n",
+ "4 0.320282 \n",
+ "5 0.118744 \n",
+ "6 0.214387 \n",
+ "7 0.104389 \n",
+ "8 0.412835 \n",
+ "9 0.275551 \n",
+ "10 0.331672 \n",
+ "11 0.404580 \n",
+ "12 0.389858 \n",
+ "13 0.068442 \n",
+ "14 0.066000 \n",
+ "15 0.299505 \n",
+ "16 0.356439 \n",
+ "17 0.288383 \n",
+ "18 0.391625 \n",
+ "19 0.215490 \n",
+ "20 0.363928 \n",
+ "21 0.193753 \n",
+ "22 0.290464 \n",
+ "23 0.470116 \n",
+ "24 0.506118 \n",
+ "25 0.000000 \n",
+ "26 0.263029 \n",
+ "27 0.376276 \n",
+ "28 0.422526 \n",
+ "29 0.401731 \n",
+ "30 NaN \n",
+ "31 NaN \n",
+ "32 NaN \n",
+ "33 0.154576 "
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "bpic_stats = bpic_features.describe().transpose()\n",
+ "normalized_feature_names = bpic_stats[(bpic_stats['min']>=0)&(bpic_stats['max']<=1)].index.to_list() \n",
+ "normalized_feature_names = ['ratio_unique_traces_per_trace', 'ratio_most_common_variant', \n",
+ " 'ratio_top_10_variants', 'epa_normalized_variant_entropy', 'epa_normalized_sequence_entropy', \n",
+ " 'epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_sequence_entropy_exponential_forgetting']\n",
+ "print(normalized_feature_names)\n",
+ "bpic_features[['log']+normalized_feature_names]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "44909860",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "21\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_rt10v.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_rmcv.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_enself.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enve_rt10v.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_rt10v.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_enseef.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_rmcv.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_rmcv_rutpt.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_enve.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enve_rmcv.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_rmcv.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_rutpt.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_enve.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_rmcv_rt10v.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enself_rutpt.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_enself.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enseef_rt10v.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_enve.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_enve_rutpt.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_rt10v_rutpt.json\n",
+ "Saved experiment config in ../config_files/algorithm/34_bpic_features/generator_34bpic_2objectives_ense_rutpt.json\n",
+ "None\n"
+ ]
+ }
+ ],
+ "source": [
+ "#Features between 0 and 1: \n",
+ "def write_generator_bpic_experiment(objectives, n_para_obj=2):\n",
+ " parameters_o = \"objectives, \"\n",
+ " experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n",
+ " experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n",
+ " for exp in experiments:\n",
+ " experiment_path = os.path.join('..','data', '34_bpic_features')\n",
+ " os.makedirs(experiment_path, exist_ok=True)\n",
+ " experiment_path = os.path.join(experiment_path, f\"34bpic_{len(exp)}objectives_{abbrev_obj_keys(exp)}.csv\") \n",
+ "\n",
+ "\n",
+ " first_dir = os.path.split(experiment_path[3:])[-1].replace(\".csv\",\"\")\n",
+ " second_dir = first_dir.replace(\"grid_\",\"\").replace(\"objectives\",\"\")\n",
+ "\n",
+ " experiment = [\n",
+ " {\n",
+ " 'pipeline_step': 'event_logs_generation',\n",
+ " 'output_path':'output/generated',\n",
+ " 'generator_params': {\n",
+ " \"experiment\": {\"input_path\": \"data/34_bpic_features.csv\",\n",
+ " \"objectives\": exp},\n",
+ " 'config_space': {\n",
+ " 'mode': [5, 20],\n",
+ " 'sequence': [0.01, 1],\n",
+ " 'choice': [0.01, 1],\n",
+ " 'parallel': [0.01, 1],\n",
+ " 'loop': [0.01, 1],\n",
+ " 'silent': [0.01, 1],\n",
+ " 'lt_dependency': [0.01, 1],\n",
+ " 'num_traces': [10, 10001],\n",
+ " 'duplicate': [0],\n",
+ " 'or': [0]\n",
+ " },\n",
+ " 'n_trials': 200\n",
+ " }\n",
+ " },\n",
+ " {\n",
+ " 'pipeline_step': 'feature_extraction',\n",
+ " 'input_path': os.path.join('output', 'features', 'generated', '34_bpic_features', second_dir),\n",
+ " 'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n",
+ " 'output_path': 'output/plots',\n",
+ " 'real_eventlog_path': 'data/34_bpic_features.csv',\n",
+ " 'plot_type': 'boxplot'\n",
+ " }\n",
+ " ]\n",
+ "\n",
+ " output_path = os.path.join('..', 'config_files','algorithm','34_bpic_features')\n",
+ " os.makedirs(output_path, exist_ok=True)\n",
+ " output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n",
+ "\n",
+ " with open(output_path, 'w') as f:\n",
+ " json.dump(experiment, f, ensure_ascii=False)\n",
+ " print(f\"Saved experiment config in {output_path}\")\n",
+ " return experiment\n",
+ "\n",
+ "\n",
+ "def create_objectives_grid(objectives, n_para_obj=2):\n",
+ " parameters_o = \"objectives, \"\n",
+ " experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n",
+ " experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n",
+ " print(len(experiments))\n",
+ " \n",
+ " for exp in experiments:\n",
+ " write_generator_bpic_experiment(objectives=exp)\n",
+ " \n",
+ "exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=2) \n",
+ "print(exp_test)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b07e9753",
+ "metadata": {},
+ "source": [
+ "## Single objective from real logs\n",
+ "(Feature selection)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "d759a677",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "7 experiments: [('epa_normalized_sequence_entropy_exponential_forgetting',), ('epa_normalized_variant_entropy',), ('ratio_top_10_variants',), ('epa_normalized_sequence_entropy',), ('epa_normalized_sequence_entropy_linear_forgetting',), ('ratio_most_common_variant',), ('ratio_unique_traces_per_trace',)]\n",
+ "11\n",
+ "Saved experiment in ../data/grid_experiments/grid_1objectives_enseef.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enseef.json\n",
+ "Saved experiment in ../data/grid_experiments/grid_1objectives_enve.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enve.json\n",
+ "Saved experiment in ../data/grid_experiments/grid_1objectives_rt10v.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rt10v.json\n",
+ "Saved experiment in ../data/grid_experiments/grid_1objectives_ense.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_ense.json\n",
+ "Saved experiment in ../data/grid_experiments/grid_1objectives_enself.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_enself.json\n",
+ "Saved experiment in ../data/grid_experiments/grid_1objectives_rmcv.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rmcv.json\n",
+ "Saved experiment in ../data/grid_experiments/grid_1objectives_rutpt.csv\n",
+ "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_1objectives_rutpt.json\n",
+ "None\n"
+ ]
+ }
+ ],
+ "source": [
+ "def write_single_objective_experiment(experiment_path, objectives=[\"ratio_top_20_variants\", \"epa_normalized_sequence_entropy_linear_forgetting\"]):\n",
+ " first_dir = os.path.split(experiment_path[3:])[-1].replace(\".csv\",\"\")\n",
+ " second_dir = first_dir.replace(\"grid_\",\"\").replace(\"objectives\",\"\")\n",
+ "\n",
+ " experiment = [\n",
+ " {\n",
+ " 'pipeline_step': 'event_logs_generation',\n",
+ " 'output_path':os.path.join('output','generated', 'grid_1obj'),\n",
+ " 'generator_params': {\n",
+ " \"experiment\": {\"input_path\": experiment_path[3:],\n",
+ " \"objectives\": objectives},\n",
+ " 'config_space': {\n",
+ " 'mode': [5, 20],\n",
+ " 'sequence': [0.01, 1],\n",
+ " 'choice': [0.01, 1],\n",
+ " 'parallel': [0.01, 1],\n",
+ " 'loop': [0.01, 1],\n",
+ " 'silent': [0.01, 1],\n",
+ " 'lt_dependency': [0.01, 1],\n",
+ " 'num_traces': [10, 10001],\n",
+ " 'duplicate': [0],\n",
+ " 'or': [0]\n",
+ " },\n",
+ " 'n_trials': 200\n",
+ " }\n",
+ " },\n",
+ " {\n",
+ " 'pipeline_step': 'feature_extraction',\n",
+ " 'input_path': os.path.join('output','features', 'generated', 'grid_1obj', first_dir, second_dir),\n",
+ " 'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n",
+ " 'output_path': 'output/plots',\n",
+ " 'real_eventlog_path': 'data/34_bpic_features.csv',\n",
+ " 'plot_type': 'boxplot'\n",
+ " }\n",
+ " ]\n",
+ "\n",
+ " #print(\"EXPERIMENT:\", experiment)\n",
+ " output_path = os.path.join('..', 'config_files','algorithm','grid_experiments')\n",
+ " os.makedirs(output_path, exist_ok=True)\n",
+ " output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n",
+ " with open(output_path, 'w') as f:\n",
+ " json.dump(experiment, f, ensure_ascii=False)\n",
+ " print(f\"Saved experiment config in {output_path}\")\n",
+ " \n",
+ " return experiment\n",
+ "\n",
+ "def create_objectives_grid(objectives, n_para_obj=2):\n",
+ " parameters_o = \"objectives, \"\n",
+ " if n_para_obj==1:\n",
+ " experiments = [[exp] for exp in objectives]\n",
+ " else:\n",
+ " experiments = eval(f\"[exp for exp in list(itertools.product({(parameters_o*n_para_obj)[:-2]})) if exp[0]!=exp[1]]\")\n",
+ " experiments = list(set([tuple(sorted(exp)) for exp in experiments]))\n",
+ " print(len(experiments), \"experiments: \", experiments)\n",
+ " \n",
+ " parameters = \"np.around(np.arange(0, 1.1,0.1),2), \"\n",
+ " tasks = eval(f\"list(itertools.product({(parameters*n_para_obj)[:-2]}))\")\n",
+ " tasks = [(f'task_{i+1}',)+task for i, task in enumerate(tasks)]\n",
+ " print(len(tasks))\n",
+ " for exp in experiments:\n",
+ " df = pd.DataFrame(data=tasks, columns=[\"task\", *exp])\n",
+ " experiment_path = os.path.join('..','data', 'grid_experiments')\n",
+ " os.makedirs(experiment_path, exist_ok=True)\n",
+ " experiment_path = os.path.join(experiment_path, f\"grid_{len(df.columns)-1}objectives_{abbrev_obj_keys(exp)}.csv\") \n",
+ " df.to_csv(experiment_path, index=False)\n",
+ " print(f\"Saved experiment in {experiment_path}\")\n",
+ " write_single_objective_experiment(experiment_path, objectives=exp)\n",
+ " #df.to_csv(f\"../data/grid_{}objectives_{abbrev_obj_keys(objectives.tolist())}.csv\" ,index=False)\n",
+ " \n",
+ "exp_test = create_objectives_grid(normalized_feature_names, n_para_obj=1) \n",
+ "print(exp_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f9886f44",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}