Spaces:

andreamalhera
/

igedi

Sleeping

App Files Files Community

Andrea Maldonado commited on Mar 16, 2024

Commit

bdf9096

1 Parent(s): 4a814d8

Moving private repo to public

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

LICENSE +1 -1
README.md +24 -2
config.py +88 -0
config_files/algorithm/augmentation.json +12 -0
config_files/algorithm/benchmark.json +10 -0
config_files/algorithm/evaluation_plotter.json +17 -0
config_files/algorithm/experiment_test.json +51 -0
config_files/algorithm/feature_extraction.json +10 -0
config_files/algorithm/fix_24.json +34 -0
config_files/algorithm/generation.json +30 -0
config_files/algorithm/test/generator_2bpic_2objectives_ense_enseef.json +15 -0
config_files/algorithm/test/generator_grid_1objectives_rt10v.json +16 -0
config_files/algorithm/test/generator_grid_2objectives_ense_enself.json +19 -0
config_files/options/baseline.json +9 -0
config_files/options/run_params.json +9 -0
dashboard.py +295 -0
execute_grid_experiments.py +40 -0
main.py +73 -0
notebooks/.ipynb_checkpoints/augmentation-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/benchmarking_process_discovery-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/bpic_generability_pdm-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/data_exploration-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/experiment_generator-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_distributions-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_exploration-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_performance_similarity-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_selection-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_variance-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/gedi_representativeness-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/grid_objectives-checkpoint.ipynb +376 -0
notebooks/.ipynb_checkpoints/oversampling-checkpoint.ipynb +6 -0
notebooks/.ipynb_checkpoints/performance_feature_correlation-checkpoint.ipynb +6 -0
notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/statistics_tasks_to_datasets-checkpoint.ipynb +818 -0
notebooks/.ipynb_checkpoints/test_feed-checkpoint.ipynb +0 -0
notebooks/benchmarking_process_discovery.ipynb +0 -0
notebooks/bpic_generability_pdm.ipynb +0 -0
notebooks/experiment_generator.ipynb +0 -0
notebooks/feature_distributions.ipynb +0 -0
notebooks/feature_exploration.ipynb +0 -0
notebooks/feature_performance_similarity.ipynb +0 -0
notebooks/feature_selection.ipynb +0 -0
notebooks/gedi_representativeness.ipynb +0 -0
smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/configspace.json +90 -0
smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/intensifier.json +112 -0
smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/optimization.json +6 -0
smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/runhistory.json +153 -0
smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/scenario.json +180 -0
smac3_output/07ecbfc3dc7bfceedce234fe2b508af8/0/configspace.json +90 -0
smac3_output/07ecbfc3dc7bfceedce234fe2b508af8/0/intensifier.json +27 -0

LICENSE CHANGED Viewed

@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2024 lmu-dbs
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

 MIT License
+Copyright (c) 2023 lmu-dbs
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

README.md CHANGED Viewed

@@ -1,2 +1,24 @@
-# gedi
-Generating Event Data with Intentional Features for Benchmarking Process Mining

+# GEDI
+### Requirements
+- [Meta-feature Extractor](https://github.com/gbrltv/process_meta_learning/tree/main/meta_feature_extraction)
+- [Miniconda](https://docs.conda.io/en/latest/miniconda.html)
+- Graphviz on your OS e.g.
+For MacOS:
+```console
+brew install graphviz
+```
+## Installation
+- For smac:
+```console
+conda install pyrfr swig
+```
+- `conda env create -f .conda.yml`
+- Install [Feature Extractor for Event Data (feeed)](https://github.com/lmu-dbs/feeed) in the newly installed conda environment: `pip install feeed`
+### Startup
+```console
+conda activate tag
+python main.py -o config_files/options/baseline.json -a config_files/algorithm/experiment_test.json
+```

config.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import json
+import os
+import warnings
+from tag.utils.io_helpers import sort_files
+from tqdm import tqdm
+from utils.param_keys import INPUT_NAME, FILENAME, FOLDER_PATH, PARAMS
+def get_model_params_list(alg_json_file: str) :#-> list[dict]:
+    """
+    Loads the list of model configurations given from a json file or the default list of dictionary from the code.
+    @param alg_json_file: str
+        Path to the json data with the running configuration
+    @return: list[dict]
+        list of model configurations
+    """
+    if alg_json_file is not None:
+        return json.load(open(alg_json_file))
+    else:
+        warnings.warn('The default model parameter list is used instead of a .json-file.\n'
+                      '  Use a configuration from the `config_files`-folder together with the args `-a`.')
+        return [
+            {ALGORITHM_NAME: 'pca', NDIM: TENSOR_NDIM},
+            ]
+def get_run_params(alg_params_json: str) -> dict:
+    """
+    Loads the running configuration given from a json file or the default dictionary from the code.
+    @param alg_params_json: str
+        Path to the json data with the running configuration
+    @return: dict
+        Running Configuration
+    """
+    if alg_params_json is not None:
+        return json.load(open(alg_params_json))
+    else:
+        warnings.warn('The default run option is used instead of a .json-file.\n'
+                      '  Use a configuration from the `config_files`-folder together with the args `-o`.')
+        return {
+            RUN_OPTION: COMPARE,
+            PLOT_TYPE: COLOR_MAP,  # 'heat_map', 'color_map', '3d_map', 'explained_var_plot'
+            PLOT_TICS: True,
+            N_COMPONENTS: 2,
+            INPUT_NAME: 'runningExample',
+            SAVE_RESULTS: True,
+            LOAD_RESULTS: True
+        }
+def get_files_and_kwargs(params: dict):
+    """
+    This method returns the filename list of the trajectory and generates the kwargs for the DataTrajectory.
+    The method is individually created for the available data set.
+    Add new trajectory options, if different data set are used.
+    @param params: dict
+        running configuration
+    @return: tuple
+        list of filenames of the trajectories AND
+        kwargs with the important arguments for the classes
+    """
+    try:
+        input_name = params[INPUT_NAME]
+    except KeyError as e:
+        raise KeyError(f'Run option parameter is missing the key: `{e}`. This parameter is mandatory.')
+    #TODO: generate parent directories if they don't exist
+    if input_name == 'test':
+        filename_list = list(tqdm(sort_files(os.listdir('data/test_2'))))
+        kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/test_2'}
+    elif input_name == 'realLogs':
+        filename_list = list(tqdm(sort_files(os.listdir('data/real_event_logs'))))
+        kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/real_event_logs'}
+    elif input_name == 'gen5':
+        filename_list = list(tqdm(sort_files(os.listdir('data/event_log'))))[:5]
+        kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/event_log'}
+    elif input_name == 'gen20':
+        filename_list = list(tqdm(sort_files(os.listdir('data/event_log'))))[:20]
+        kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/event_log'}
+    elif input_name == 'runningExample':
+        filename_list = ['running-example.xes']
+        kwargs = {FILENAME: filename_list[0], FOLDER_PATH: 'data/'}
+    elif input_name == 'metaFeatures':
+        filename_list = ['log_features.csv']
+        kwargs = {FILENAME: filename_list[0], FOLDER_PATH: 'results/'}
+    else:
+        raise ValueError(f'No data trajectory was found with the name `{input_name}`.')
+    #filename_list.pop(file_element)
+    kwargs[PARAMS] = params
+    return filename_list, kwargs

config_files/algorithm/augmentation.json ADDED Viewed

	@@ -0,0 +1,12 @@

+[
+  {
+    "pipeline_step": "instance_augmentation",
+    "augmentation_params":
+    {
+        "method":"SMOTE", "no_samples":20,
+        "feature_selection": ["n_traces", "n_unique_traces", "ratio_unique_traces_per_trace", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "entropy_trace", "entropy_prefix", "entropy_global_block", "entropy_lempel_ziv", "entropy_k_block_diff_1", "entropy_k_block_diff_3", "entropy_k_block_diff_5", "entropy_k_block_ratio_1", "entropy_k_block_ratio_3", "entropy_k_block_ratio_5", "entropy_knn_3", "entropy_knn_5", "entropy_knn_7", "variant_entropy", "normalized_variant_entropy", "sequence_entropy", "normalized_sequence_entropy", "sequence_entropy_linear_forgetting", "normalized_sequence_entropy_linear_forgetting", "sequence_entropy_exponential_forgetting", "normalized_sequence_entropy_exponential_forgetting"]
+    },
+    "input_path": "data/bpic_features.csv",
+    "output_path": "output"
+  }
+]

config_files/algorithm/benchmark.json ADDED Viewed

	@@ -0,0 +1,10 @@

+[
+  {
+    "pipeline_step": "benchmark_test",
+    "benchmark_test": "discovery",
+    "input_path":"data/test_2",
+    "input_path":"data/test_2/gen_el_168.xes",
+    "output_path":"output",
+    "miners" : ["inductive", "heuristics", "imf", "ilp"]
+  }
+]

config_files/algorithm/evaluation_plotter.json ADDED Viewed

	@@ -0,0 +1,17 @@

+[
+    {
+        "pipeline_step": "evaluation_plotter",
+        "input_path": "output/features/generated/34_bpic_features/",
+        "input_path": "output/features/generated/grid_1obj/1_enve_feat.csv",
+        "input_path": "output/features/generated/grid_2obj/",
+        "input_path": ["output/features/generated/grid_1obj/", "output/features/generated/grid_2obj/"],
+        "output_path": "output/plots",
+        "reference_feature": "epa_normalized_sequence_entropy",
+        "reference_feature": "epa_normalized_variant_entropy",
+        "reference_feature": "epa_normalized_sequence_entropy_exponential_forgetting",
+        "targets": "data/34_bpic_features.csv",
+        "targets": "data/grid_experiments/grid_1obj/grid_1objectives_enve.csv",
+        "targets": "data/grid_experiments/grid_2obj/",
+        "targets": ["data/grid_experiments/grid_1obj/", "data/grid_experiments/grid_2obj/"]
+    }
+]

config_files/algorithm/experiment_test.json ADDED Viewed

	@@ -0,0 +1,51 @@

+[
+  {
+    "pipeline_step": "instance_augmentation",
+    "augmentation_params":{"method":"SMOTE", "no_samples":2,
+        "feature_selection": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
+    "input_path": "data/bpic_features.csv",
+    "output_path": "output"
+  },
+  {
+    "pipeline_step": "event_logs_generation",
+    "output_path": "output/features/2_bpic_features/2_ense_rmcv_feat.csv",
+    "output_path": "data/test_2",
+    "generator_params": {
+      "experiment": "data/grid_objectives.csv",
+      "experiment": {"input_path": "data/2_bpic_features.csv",
+        "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
+      "experiment": [
+          {"epa_normalized_sequence_entropy_linear_forgetting": 0.05, "ratio_top_20_variants": 0.4},
+          {"epa_normalized_sequence_entropy_linear_forgetting": 0.5, "ratio_top_20_variants": 0.04}
+        ],
+      "experiment": {"epa_normalized_sequence_entropy_linear_forgetting": 0.05, "ratio_top_20_variants": 0.4},
+      "config_space": {
+        "mode": [5, 20],
+        "sequence": [0.01, 1],
+        "choice": [0.01, 1],
+        "parallel": [0.01, 1],
+        "loop": [0.01, 1],
+        "silent": [0.01, 1],
+        "lt_dependency": [0.01, 1],
+        "num_traces": [100, 10001],
+        "duplicate": [0],
+        "or": [0]
+      },
+      "n_trials": 2
+    }
+  },
+  {
+    "pipeline_step": "feature_extraction",
+    "input_path": "data/test_2",
+    "feature_params": {"feature_set":["trace_length"]},
+    "output_path": "output/plots",
+    "real_eventlog_path": "data/bpic_features.csv",
+    "plot_type": "boxplot"
+  },
+  {
+    "pipeline_step": "benchmark_test",
+    "benchmark_test": "discovery",
+    "input_path":"data/test_2",
+    "miners" : ["inductive", "heuristics", "imf", "ilp", "sm"]
+  }
+]

config_files/algorithm/feature_extraction.json ADDED Viewed

	@@ -0,0 +1,10 @@

+[
+  {
+    "pipeline_step": "feature_extraction",
+    "input_path": "output/features/generated/34_bpic_features/2_rt10v_rutpt",
+    "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
+    "output_path": "output/plots",
+    "real_eventlog_path": "data/34_bpic_features.csv",
+    "plot_type": "boxplot"
+  }
+]

config_files/algorithm/fix_24.json ADDED Viewed

	@@ -0,0 +1,34 @@

+[
+  {
+    "pipeline_step": "event_logs_generation",
+    "output_path":"data/generated",
+    "generator_params": {
+      "objectives": {
+      "normalized_sequence_entropy_linear_forgetting": 0.05,
+      "ratio_top_20_variants": 0.4
+      },
+      "config_space": {
+        "mode": [5, 40],
+        "sequence": [0.01, 1],
+        "choice": [0.01, 1],
+        "parallel": [0.01, 1],
+        "loop": [0.01, 1],
+        "silent": [0.01, 1],
+        "lt_dependency": [0.01, 1],
+        "num_traces": [100, 1001],
+        "duplicate": [0],
+        "or": [0]
+      },
+      "n_trials": 20
+    }
+  },
+  {
+    "pipeline_step": "feature_extraction",
+    "input_path": "data/generated",
+    "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "entropies", "complexity"]},
+    "feature_params": {"feature_set":["trace_length"]},
+    "output_path": "output/plots",
+    "real_eventlog_path": "data/log_meta_features.csv",
+    "plot_type": "boxplot"
+  }
+]

config_files/algorithm/generation.json ADDED Viewed

	@@ -0,0 +1,30 @@

+[
+  {
+    "pipeline_step": "event_logs_generation",
+    "output_path": "output",
+    "generator_params": {
+      "experiment": {
+        "input_path": "data/grid_objectives_debug.csv",
+        "objectives": ["epa_normalized_variant_entropy"],
+        "objectives": ["ratio_most_common_variant", "epa_normalized_sequence_entropy"],
+        "objectives": ["ratio_top_20_variants","epa_normalized_sequence_entropy_linear_forgetting"]
+      },
+      "config_space": {
+        "mode": [5, 20],
+        "sequence": [0.01, 1],
+        "choice": [0.01, 1],
+        "parallel": [0.01, 1],
+        "loop": [0.01, 1],
+        "silent": [0.01, 1],
+        "lt_dependency": [0.01, 1],
+        "num_traces": [10, 10001],
+        "duplicate": [0],
+        "or": [0]
+      },
+      "n_trials": 50,
+      "plot_reference_feature": "epa_normalized_sequence_entropy",
+      "plot_reference_feature": "epa_normalized_sequence_entropy_linear_forgetting",
+      "plot_reference_feature": ""
+    }
+  }
+]

config_files/algorithm/test/generator_2bpic_2objectives_ense_enseef.json ADDED Viewed

	@@ -0,0 +1,15 @@

+[{"pipeline_step": "event_logs_generation",
+"output_path": "output/generated",
+"generator_params": {"experiment":
+	{"input_path": "data/2_bpic_features.csv",
+	"objectives": ["epa_normalized_sequence_entropy",
+	"epa_normalized_sequence_entropy_exponential_forgetting"]},
+	"config_space": {"mode": [5, 20], "sequence": [0.01, 1],
+	"choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1],
+	"silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001],
+	"duplicate": [0], "or": [0]}, "n_trials": 2}},
+	{"pipeline_step": "feature_extraction",
+	"input_path": "output/features/generated/2_bpic_features/2_ense_enseef",
+	"feature_params": {"feature_set": ["simple_stats", "trace_length", "trace_variant", "activities",
+	"start_activities", "end_activities", "eventropies", "epa_based"]}, "output_path": "output/plots",
+	"real_eventlog_path": "data/2_bpic_features.csv", "plot_type": "boxplot"}]

config_files/algorithm/test/generator_grid_1objectives_rt10v.json ADDED Viewed

	@@ -0,0 +1,16 @@

+[{"pipeline_step": "event_logs_generation",
+"output_path": "output/generated/grid_1obj",
+"generator_params": {"experiment":
+	{"input_path": "data/grid_experiments/grid_1objectives_rt10v.csv",
+	"objectives": ["ratio_top_10_variants"]},
+	"config_space": {"mode": [5, 20], "sequence": [0.01, 1],
+	"choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1],
+	"silent": [0.01, 1], "lt_dependency": [0.01, 1],
+	"num_traces": [10, 10001], "duplicate": [0],
+	"or": [0]}, "n_trials": 2}},
+ {"pipeline_step": "feature_extraction",
+ "input_path": "output/features/generated/grid_1obj/grid_1objectives_rt10v/1_rt10v",
+ "feature_params": {"feature_set": ["simple_stats", "trace_length", "trace_variant",
+ "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
+ "output_path": "output/plots", "real_eventlog_path": "data/2_bpic_features.csv",
+ "plot_type": "boxplot"}]

config_files/algorithm/test/generator_grid_2objectives_ense_enself.json ADDED Viewed

	@@ -0,0 +1,19 @@

+[{"pipeline_step": "event_logs_generation",
+"output_path": "output/generated/grid_2obj",
+"generator_params": {"experiment":
+	{"input_path": "data/2_grid_test.csv",
+	"objectives": ["epa_normalized_sequence_entropy",
+		"epa_normalized_sequence_entropy_linear_forgetting"]},
+	"config_space": {"mode": [5, 20], "sequence": [0.01, 1],
+	"choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1],
+	"silent": [0.01, 1], "lt_dependency": [0.01, 1],
+	"num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 3}},
+	{"pipeline_step": "feature_extraction",
+	"input_path": "output/features/generated/2_grid_test/2_ense_enself",
+	"feature_params": {"feature_set": ["epa_normalized_sequence_entropy",
+	"epa_normalized_sequence_entropy_linear_forgetting"]},
+	"feature_params": {"feature_set": ["simple_stats", "trace_length", "trace_variant",
+		"activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
+	"output_path": "output/plots",
+	"real_eventlog_path": "data/2_bpic_features.csv",
+	"plot_type": "boxplot"}]

config_files/options/baseline.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "run_option": "baseline",
+  "plot_type": "color_map",
+  "plot_tics": true,
+  "n_components": 2,
+  "input_name": "test",
+  "save_results": false,
+  "load_results": false
+}

config_files/options/run_params.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "run_option": "compare",
+  "plot_type": "color_map",
+  "plot_tics": true,
+  "n_components": 2,
+  "input_name": "gen20",
+  "save_results": false,
+  "load_results": true
+}

dashboard.py ADDED Viewed

	@@ -0,0 +1,295 @@

+from copy import deepcopy
+from meta_feature_extraction.simple_stats import simple_stats
+from meta_feature_extraction.trace_length import trace_length
+from meta_feature_extraction.trace_variant import trace_variant
+from meta_feature_extraction.activities import activities
+from meta_feature_extraction.start_activities import start_activities
+from meta_feature_extraction.end_activities import end_activities
+from meta_feature_extraction.entropies import entropies
+from pm4py import discover_petri_net_inductive as inductive_miner
+from pm4py import generate_process_tree
+from pm4py import save_vis_petri_net, save_vis_process_tree
+from pm4py.algo.filtering.log.variants import variants_filter
+from pm4py.algo.simulation.tree_generator import algorithm as tree_generator
+from pm4py.algo.simulation.playout.process_tree import algorithm as playout
+from pm4py.objects.conversion.log import converter as log_converter
+from pm4py.objects.log.exporter.xes import exporter as xes_exporter
+from pm4py.objects.log.importer.xes import importer as xes_importer
+from pm4py.objects.log.util import dataframe_utils
+from pm4py.sim import play_out
+import matplotlib.image as mpimg
+import os
+import pandas as pd
+import streamlit as st
+OUTPUT_PATH = "output"
+SAMPLE_EVENTS = 500
+@st.cache(allow_output_mutation=True)
+def load_from_xes(uploaded_file):
+    bytes_data = uploaded_file.getvalue()
+    log1 = xes_importer.deserialize(bytes_data)
+    get_stats(log1)
+    return log1
+@st.cache
+def load_from_csv(uploaded_file, sep):
+    if uploaded_file is not None:
+        df = pd.read_csv(uploaded_file, sep=sep, index_col=False)
+        return df
+def get_stats(log, save=True):
+    """Returns the statistics of an event log."""
+    num_traces = len(log)
+    num_events = sum([len(c) for c in log])
+    num_utraces = len(variants_filter.get_variants(log))
+    if save:
+        st.session_state["num_traces"] = num_traces
+        st.session_state["num_events"] = num_events
+        st.session_state["num_utraces"] = num_utraces
+    return num_utraces, num_traces, num_events
+#@st.cache
+def df_to_log(df, case_id, activity, timestamp):
+    df.rename(columns={case_id: 'case:concept:name',
+                       activity: 'concept:name',
+                       timestamp: "time:timestamp"}, inplace=True)
+    temp = dataframe_utils.convert_timestamp_columns_in_df(df)
+    #temp = temp.sort_values(timestamp)
+    log = log_converter.apply(temp)
+    return log, 'concept:name', "time:timestamp"
+def read_uploaded_file(uploaded_file):
+    extension = uploaded_file.name.split('.')[-1]
+    log_name = uploaded_file.name.split('.')[-2]
+    st.sidebar.write("Loaded ", extension.upper(), '-File: ', uploaded_file.name)
+    if extension == "xes":
+        event_log = load_from_xes(uploaded_file)
+        log_columns = [*list(event_log[0][0].keys())]
+        convert_button = False
+        case_id = "case:concept:name"
+        activity = "concept:name"
+        timestamp = "time:timestamp"
+        default_act_id = log_columns.index("concept:name")
+        default_tst_id = log_columns.index("time:timestamp")
+        event_df = log_converter.apply(event_log, variant=log_converter.Variants.TO_DATA_FRAME)
+        df_path = OUTPUT_PATH+"/"+log_name+".csv"
+        event_df.to_csv(df_path, sep =";", index=False)
+        return event_log, event_df, case_id, activity
+    elif extension == "csv":
+        sep = st.sidebar.text_input("Columns separator", ";")
+        event_df = load_from_csv(uploaded_file, sep)
+        old_df = deepcopy(event_df)
+        log_columns = event_df.columns
+        case_id = st.sidebar.selectbox("Choose 'case' column:", log_columns)
+        activity = st.sidebar.selectbox("Choose 'activity' column:", log_columns, index=0)
+        timestamp = st.sidebar.selectbox("Choose 'timestamp' column:", log_columns, index=0)
+        convert_button = st.sidebar.button('Confirm selection')
+        if convert_button:
+            temp = deepcopy(event_df)
+            event_log, activity, timestamp = df_to_log(temp, case_id, activity, timestamp)
+            #xes_exporter.apply(event_log, INPUT_XES)
+            log_columns = [*list(event_log[0][0].keys())]
+            st.session_state['log'] = event_log
+            return event_log, event_df, case_id, activity
+def sample_log_traces(complete_log, sample_size):
+    '''
+    Samples random traces out of logs.
+    So that number of events is slightly over SAMPLE_SIZE.
+    :param complete_log: Log extracted from xes
+    '''
+    log_traces = variants_filter.get_variants(complete_log)
+    keys = list(log_traces.keys())
+    sample_traces = {}
+    num_evs = 0
+    while num_evs < sample_size:
+        if len(keys) == 0:
+            break
+        random_trace = keys.pop()
+        sample_traces[random_trace] = log_traces[random_trace]
+        evs = sum([len(case_id) for case_id in sample_traces[random_trace]])
+        num_evs += evs
+    log1 = variants_filter.apply(complete_log, sample_traces)
+    return log1
+def show_process_petrinet(event_log, filter_info, OUTPUT_PATH):
+            OUTPUT_PLOT = f"{OUTPUT_PATH}_{filter_info}".replace(":","").replace(".","")+".png" # OUTPUT_PATH is OUTPUT_PATH+INPUT_FILE
+            try:
+                fig_pt = mpimg.imread(OUTPUT_PLOT)
+                st.write("Loaded from memory")
+            except FileNotFoundError:
+                net, im, fm = inductive_miner(event_log)
+                           # parameters={heuristics_miner.Variants.CLASSIC.value.Parameters.DEPENDENCY_THRESH: 0.99,
+                           #     pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"})
+                #parameters = {pn_visualizer.Variants.FREQUENCY.value.Parameters.FORMAT: "png"}
+                save_vis_petri_net(net, im, fm, OUTPUT_PLOT)
+                st.write("Saved in: ", OUTPUT_PLOT)
+            fig_pt = mpimg.imread(OUTPUT_PLOT)
+            st.image(fig_pt)
+def show_loaded_event_log(event_log, event_df):
+        get_stats(event_log)
+        st.write("### Loaded event-log")
+        col1, col2 = st.columns(2)
+        with col2:
+            st.dataframe(event_df)
+        with col1:
+            show_process_petrinet(event_log, None, OUTPUT_PATH+"running-example")
+def extract_meta_features(log, log_name):
+    mtf_cols = ["log", "n_traces", "n_unique_traces", "ratio_unique_traces_per_trace", "n_events", "trace_len_min", "trace_len_max",
+                "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1",
+                "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean",
+                "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1",
+                "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7",
+                "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist",
+                "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants",
+                "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence",
+                "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median",
+                "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness",
+                "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean",
+                "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3",
+                "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min",
+                "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance",
+                "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "entropy_trace",
+                "entropy_prefix", "entropy_global_block", "entropy_lempel_ziv", "entropy_k_block_diff_1", "entropy_k_block_diff_3",
+                "entropy_k_block_diff_5", "entropy_k_block_ratio_1", "entropy_k_block_ratio_3", "entropy_k_block_ratio_5", "entropy_knn_3",
+                "entropy_knn_5", "entropy_knn_7"]
+    features = [log_name]
+    features.extend(simple_stats(log))
+    features.extend(trace_length(log))
+    features.extend(trace_variant(log))
+    features.extend(activities(log))
+    features.extend(start_activities(log))
+    features.extend(end_activities(log))
+    features.extend(entropies(log_name, OUTPUT_PATH))
+    mtf = pd.DataFrame([features], columns=mtf_cols)
+    st.dataframe(mtf)
+    return mtf
+def generate_pt(mtf):
+    OUTPUT_PLOT = f"{OUTPUT_PATH}/generated_pt".replace(":","").replace(".","")#+".png" # OUTPUT_PATH is OUTPUT_PATH+INPUT_FILE
+    st.write("### PT Gen configurations")
+    col1, col2, col3, col4, col5, col6 = st.columns(6)
+    with col1:
+            param_mode = st.text_input('Mode', str(round(mtf['activities_median'].iat[0]))) #?
+            st.write("Sum of probabilities must be one")
+    with col2:
+            param_min = st.text_input('Min', str(mtf['activities_min'].iat[0]))
+            param_seq = st.text_input('Probability Sequence', 0.25)
+    with col3:
+            param_max = st.text_input('Max', str(mtf['activities_max'].iat[0]))
+            param_cho = st.text_input('Probability Choice (XOR)', 0.25)
+    with col4:
+            param_nmo = st.text_input('Number of models', 1)
+            param_par = st.text_input('Probability Parallel', 0.25)
+    with col5:
+            param_dup = st.text_input('Duplicates', 0)
+            param_lop = st.text_input('Probability Loop', 0.25)
+    with col6:
+            param_sil = st.text_input('Silent', 0.2)
+            param_or = st.text_input('Probability Or', 0.0)
+    PT_PARAMS = {tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MODE: round(float(param_mode)), #most frequent number of visible activities
+            tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MIN: int(param_min), #minimum number of visible activities
+            tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.MAX: int(param_max), #maximum number of visible activities
+            tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.SEQUENCE: float(param_seq), #probability to add a sequence operator to tree
+            tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.CHOICE: float(param_cho), #probability to add a choice (XOR) operator to tree
+            tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.PARALLEL: float(param_par), #probability to add a parallel operator to tree
+            tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.LOOP: float(param_lop), #probability to add a loop operator to tree
+            tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.OR: float(param_or), #probability to add an or operator to tree
+            tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.SILENT: float(param_sil), #probability to add silent activity to a choice or loop operator
+            tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.DUPLICATE: int(param_dup), #probability to duplicate an activity label
+            tree_generator.Variants.PTANDLOGGENERATOR.value.Parameters.NO_MODELS: int(param_nmo)} #number of trees to generate from model population
+    process_tree = generate_process_tree(parameters=PT_PARAMS)
+    save_vis_process_tree(process_tree, OUTPUT_PLOT+"_tree.png")
+    st.write("### Playout configurations")
+    param_ntraces = st.text_input('Number of traces', str(mtf['n_traces'].iat[0]))
+    PO_PARAMS = {playout.Variants.BASIC_PLAYOUT.value.Parameters.NO_TRACES : int(param_ntraces)}
+    ptgen_log = play_out(process_tree, parameters=PO_PARAMS)
+    net, im, fm = inductive_miner(ptgen_log)
+    save_vis_petri_net(net, im, fm, OUTPUT_PLOT+".png")
+    st.write("Saved in: ", OUTPUT_PLOT)
+    fig_pt_net = mpimg.imread(OUTPUT_PLOT+".png")
+    fig_pt_tree = mpimg.imread(OUTPUT_PLOT+"_tree.png")
+    fcol1, fcol2 = st.columns(2)
+    with fcol1:
+        st.image(fig_pt_tree)
+    with fcol2:
+        st.image(fig_pt_net)
+    extract_meta_features(ptgen_log, "gen_pt")
+if __name__ == '__main__':
+    st.set_page_config(layout='wide')
+    """
+    # Event Log Generator
+    """
+    start_options =  ['Event-Log', 'Meta-features']
+    start_preference = st.sidebar.selectbox("Do you want to start with a log or with metafeatures?", start_options,0)
+    #lets_start = st.sidebar.button("Let's start with "+start_preference+'!')
+    if start_preference==start_options[0]:
+        st.sidebar.write("Upload a dataset in csv or xes-format:")
+        uploaded_file = st.sidebar.file_uploader("Pick a logfile")
+        bar = st.progress(0)
+        os.makedirs(OUTPUT_PATH, exist_ok=True)
+        event_log = st.session_state['log'] if "log" in st.session_state else None
+        if uploaded_file:
+            event_log, event_df, case_id, activity_id = read_uploaded_file(uploaded_file)
+            #event_log = deepcopy(event_log)
+            use_sample = st.sidebar.checkbox('Use random sample', True)
+            if use_sample:
+                sample_size = st.sidebar.text_input('Sample size of approx number of events', str(SAMPLE_EVENTS))
+                sample_size = int(sample_size)
+                event_log = sample_log_traces(event_log, sample_size)
+                sample_cases = [event_log[i].attributes['concept:name'] for i in range(0, len(event_log))]
+                event_df = event_df[event_df[case_id].isin(sample_cases)]
+            show_loaded_event_log(event_log, event_df)
+            ext_mtf = extract_meta_features(event_log, "running-example")
+            generate_pt(ext_mtf)
+    elif start_preference==start_options[1]:
+        LOG_COL = 'log'
+        st.sidebar.write("Upload a dataset in csv-format")
+        uploaded_file = st.sidebar.file_uploader("Pick a file containing meta-features")
+        bar = st.progress(0)
+        os.makedirs(OUTPUT_PATH, exist_ok=True)
+        event_log = st.session_state[LOG_COL] if "log" in st.session_state else None
+        if uploaded_file:
+            sep = st.sidebar.text_input("Columns separator", ";")
+            mtf = load_from_csv(uploaded_file, sep)
+            st.dataframe(mtf)
+            log_options = mtf['log'].unique()
+            log_preference = st.selectbox("What log should we use for generating a new event-log?", log_options,1)
+            mtf_selection = mtf[mtf[LOG_COL]==log_preference]
+            generate_pt(mtf_selection)
+            st.write("##### Original")
+            st.write(mtf_selection)

execute_grid_experiments.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import multiprocessing
+import os
+from datetime import datetime as dt
+from tag.utils.io_helpers import sort_files
+from tqdm import tqdm
+#TODO: Pass i properly
+def multi_experiment_wrapper(config_file, i=0):
+    print(f"=========================STARTING EXPERIMENT #{i+1}=======================")
+    print(f"INFO: Executing with {config_file}")
+    os.system(f"python -W ignore main.py -o config_files/options/baseline.json -a {config_file}")
+    print(f"=========================FINISHED EXPERIMENT #{i+1}=======================")
+if __name__ == '__main__':
+    EXPERIMENTS_FOLDER = os.path.join('config_files','algorithm','34_bpic_features')
+    EXPERIMENTS_FOLDER = os.path.join('config_files','algorithm','grid_1obj')
+    EXPERIMENTS_FOLDER = os.path.join('config_files','algorithm','grid_experiments')
+    EXPERIMENTS_FOLDER = os.path.join('config_files','algorithm','test')
+    start = dt.now()
+    experiment_list = list(tqdm(sort_files(os.listdir(EXPERIMENTS_FOLDER))))
+    experiment_list = [os.path.join(EXPERIMENTS_FOLDER, config_file) for config_file in experiment_list]
+    experiment_list = experiment_list[:10]
+    print(f"========================STARTING MULTIPLE EXPERIMENTS=========================")
+    print(f"INFO: {EXPERIMENTS_FOLDER} contains config files for {len(experiment_list)}.")
+    try:
+        num_cores = multiprocessing.cpu_count() if len(
+            experiment_list) >= multiprocessing.cpu_count() else len(experiment_list)
+        with multiprocessing.Pool(num_cores) as p:
+            try:
+                print(f"INFO: Multi Experiments starting at {start.strftime('%H:%M:%S')} using {num_cores} cores for {len(experiment_list)} experiments...")
+                result = p.map(multi_experiment_wrapper, experiment_list)
+            except Exception as e:
+                print(e)
+    except Exception as e:
+        print("pare", e)
+        #for i, config_file in enumerate(experiment_list[:2]):

main.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import config
+import pandas as pd
+from datetime import datetime as dt
+from tag.generator import GenerateEventLogs
+from tag.features import EventLogFeatures
+from tag.analyser import FeatureAnalyser
+from tag.augmentation import InstanceAugmentator
+from tag.benchmark import BenchmarkTest
+from tag.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
+from utils.default_argparse import ArgParser
+from utils.param_keys import *
+from utils.param_keys.run_options import *
+def run(kwargs:dict, model_paramas_list: list, filename_list:list):
+    """
+    This function chooses the running option for the program.
+    @param kwargs: dict
+        contains the running parameters and the event-log file information
+    @param model_params_list: list
+        contains a list of model parameters, which are used to analyse this different models.
+    @param filename_list: list
+        contains the list of the filenames to load multiple event-logs
+    @return:
+    """
+    params = kwargs[PARAMS]
+    run_option = params[RUN_OPTION]
+    ft = EventLogFeatures(None)
+    augmented_ft = InstanceAugmentator()
+    gen = pd.DataFrame(columns=['log'])
+    if run_option == BASELINE:
+        for model_params in model_params_list:
+            if model_params.get(PIPELINE_STEP) == 'instance_augmentation':
+                augmented_ft = InstanceAugmentator(aug_params=model_params, samples=ft.feat)
+                AugmentationPlotter(augmented_ft, model_params)
+            elif model_params.get(PIPELINE_STEP) == 'event_logs_generation':
+                gen = pd.DataFrame(GenerateEventLogs(model_params).log_config)
+                #gen = pd.read_csv("output/features/generated/grid_2objectives_enseef_enve/2_enseef_enve_feat.csv")
+                GenerationPlotter(gen, model_params, output_path="output/plots")
+            elif model_params.get(PIPELINE_STEP) == 'benchmark_test':
+                benchmark = BenchmarkTest(model_params, event_logs=gen['log'])
+                # BenchmarkPlotter(benchmark.features, output_path="output/plots")
+            elif model_params.get(PIPELINE_STEP) == 'feature_extraction':
+                ft = EventLogFeatures(**kwargs, logs=gen['log'], ft_params=model_params)
+                FeaturesPlotter(ft.feat, model_params)
+            elif model_params.get(PIPELINE_STEP) == "evaluation_plotter":
+                GenerationPlotter(gen, model_params, output_path=model_params['output_path'], input_path=model_params['input_path'])
+    elif run_option == COMPARE:
+        if params[N_COMPONENTS] != 2:
+            raise ValueError(f'The parameter `{N_COMPONENTS}` has to be 2, but it\'s {params[N_COMPONENTS]}.')
+        ft = EventLogFeatures(**kwargs)
+        FeatureAnalyser(ft, params).compare(model_params_list)
+    else:
+        raise InvalidRunningOptionError(f'The run_option: `{run_option}` in the (json) configuration '
+                                        f'does not exists or it is not a loading option.\n')
+if __name__=='__main__':
+    start_tag = dt.now()
+    print(f'INFO: TAG starting {start_tag}')
+    args = ArgParser().parse('GEDI main')
+    run_params = config.get_run_params(args.run_params_json)
+    filename_list, kwargs = config.get_files_and_kwargs(run_params)
+    if args.result_load_files is None:
+        model_params_list = config.get_model_params_list(args.alg_params_json)
+        run(kwargs, model_params_list, filename_list)
+    else:
+        load(args.result_load_files, kwargs)
+    print(f'SUCCESS: TAG took {dt.now()-start_tag} sec.')

notebooks/.ipynb_checkpoints/augmentation-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/benchmarking_process_discovery-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/bpic_generability_pdm-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/data_exploration-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/experiment_generator-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_distributions-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_exploration-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_performance_similarity-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_selection-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_variance-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/gedi_representativeness-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/grid_objectives-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,376 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "e5aa7223",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "dfd1a302",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame(columns=[\"log\",\"ratio_top_20_variants\", \"normalized_sequence_entropy_linear_forgetting\"])    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "218946b7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "k=0\n",
+    "for i in np.arange(0.2, 1.1,0.2):\n",
+    "    for j in np.arange(0,0.55,0.1):\n",
+    "        k+=1\n",
+    "        new_entry = pd.Series({'log':f\"objective_{k}\", \"ratio_top_20_variants\":round(i,1),\n",
+    "                   \"normalized_sequence_entropy_linear_forgetting\":round(j,1)})\n",
+    "        df = pd.concat([\n",
+    "                df, \n",
+    "                pd.DataFrame([new_entry], columns=new_entry.index)]\n",
+    "           ).reset_index(drop=True)\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "b1e3bb5a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.to_csv(\"../data/grid_objectives.csv\" ,index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "5de45389",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>log</th>\n",
+       "      <th>ratio_top_20_variants</th>\n",
+       "      <th>normalized_sequence_entropy_linear_forgetting</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>objective_1</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>objective_2</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>objective_3</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>0.2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>objective_4</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>0.3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>objective_5</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>objective_6</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>objective_7</td>\n",
+       "      <td>0.4</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>objective_8</td>\n",
+       "      <td>0.4</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>objective_9</td>\n",
+       "      <td>0.4</td>\n",
+       "      <td>0.2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>objective_10</td>\n",
+       "      <td>0.4</td>\n",
+       "      <td>0.3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>objective_11</td>\n",
+       "      <td>0.4</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>objective_12</td>\n",
+       "      <td>0.4</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>objective_13</td>\n",
+       "      <td>0.6</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>objective_14</td>\n",
+       "      <td>0.6</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>objective_15</td>\n",
+       "      <td>0.6</td>\n",
+       "      <td>0.2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>objective_16</td>\n",
+       "      <td>0.6</td>\n",
+       "      <td>0.3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>objective_17</td>\n",
+       "      <td>0.6</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>objective_18</td>\n",
+       "      <td>0.6</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>objective_19</td>\n",
+       "      <td>0.8</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>objective_20</td>\n",
+       "      <td>0.8</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>objective_21</td>\n",
+       "      <td>0.8</td>\n",
+       "      <td>0.2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>objective_22</td>\n",
+       "      <td>0.8</td>\n",
+       "      <td>0.3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>objective_23</td>\n",
+       "      <td>0.8</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>objective_24</td>\n",
+       "      <td>0.8</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>objective_25</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>objective_26</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>objective_27</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>objective_28</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>objective_29</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>objective_30</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.5</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             log  ratio_top_20_variants   \n",
+       "0    objective_1                    0.2  \\\n",
+       "1    objective_2                    0.2   \n",
+       "2    objective_3                    0.2   \n",
+       "3    objective_4                    0.2   \n",
+       "4    objective_5                    0.2   \n",
+       "5    objective_6                    0.2   \n",
+       "6    objective_7                    0.4   \n",
+       "7    objective_8                    0.4   \n",
+       "8    objective_9                    0.4   \n",
+       "9   objective_10                    0.4   \n",
+       "10  objective_11                    0.4   \n",
+       "11  objective_12                    0.4   \n",
+       "12  objective_13                    0.6   \n",
+       "13  objective_14                    0.6   \n",
+       "14  objective_15                    0.6   \n",
+       "15  objective_16                    0.6   \n",
+       "16  objective_17                    0.6   \n",
+       "17  objective_18                    0.6   \n",
+       "18  objective_19                    0.8   \n",
+       "19  objective_20                    0.8   \n",
+       "20  objective_21                    0.8   \n",
+       "21  objective_22                    0.8   \n",
+       "22  objective_23                    0.8   \n",
+       "23  objective_24                    0.8   \n",
+       "24  objective_25                    1.0   \n",
+       "25  objective_26                    1.0   \n",
+       "26  objective_27                    1.0   \n",
+       "27  objective_28                    1.0   \n",
+       "28  objective_29                    1.0   \n",
+       "29  objective_30                    1.0   \n",
+       "\n",
+       "    normalized_sequence_entropy_linear_forgetting  \n",
+       "0                                             0.0  \n",
+       "1                                             0.1  \n",
+       "2                                             0.2  \n",
+       "3                                             0.3  \n",
+       "4                                             0.4  \n",
+       "5                                             0.5  \n",
+       "6                                             0.0  \n",
+       "7                                             0.1  \n",
+       "8                                             0.2  \n",
+       "9                                             0.3  \n",
+       "10                                            0.4  \n",
+       "11                                            0.5  \n",
+       "12                                            0.0  \n",
+       "13                                            0.1  \n",
+       "14                                            0.2  \n",
+       "15                                            0.3  \n",
+       "16                                            0.4  \n",
+       "17                                            0.5  \n",
+       "18                                            0.0  \n",
+       "19                                            0.1  \n",
+       "20                                            0.2  \n",
+       "21                                            0.3  \n",
+       "22                                            0.4  \n",
+       "23                                            0.5  \n",
+       "24                                            0.0  \n",
+       "25                                            0.1  \n",
+       "26                                            0.2  \n",
+       "27                                            0.3  \n",
+       "28                                            0.4  \n",
+       "29                                            0.5  "
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d726a5ae",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

notebooks/.ipynb_checkpoints/oversampling-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

notebooks/.ipynb_checkpoints/performance_feature_correlation-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/statistics_tasks_to_datasets-checkpoint.ipynb ADDED Viewed

	@@ -0,0 +1,818 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "4827785f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Short description</th>\n",
+       "      <th>data link</th>\n",
+       "      <th>challenge link</th>\n",
+       "      <th>Citations (Stand Februar 2023)</th>\n",
+       "      <th>Publications</th>\n",
+       "      <th>Process Discovery/ Declarative</th>\n",
+       "      <th>Conformance Checking / Alignment / Replay</th>\n",
+       "      <th>Online / Streaming / Realtime</th>\n",
+       "      <th>Performance (Analysis) / Temporal / Time</th>\n",
+       "      <th>Predict(ive)/  Monitoring/ Prescriptive</th>\n",
+       "      <th>Trace clustering / Clustering</th>\n",
+       "      <th>Preprocessing / Event Abstraction / Event Data Correlation</th>\n",
+       "      <th>Further keywords:</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Sepsis Cases - Event Log</td>\n",
+       "      <td>This real-life event log contains events of se...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/Sepsis_Ca...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/Sepsis_Ca...</td>\n",
+       "      <td>61</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>17</td>\n",
+       "      <td>7</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>(machine) learning, (online process) monitorin...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>BPI 2017 - Offer Log</td>\n",
+       "      <td>Contains data from a financial institute inclu...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2017:ch...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>(machine) learning, cloud computing</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Road Traffic Fine Management Process (not BPI)</td>\n",
+       "      <td>A real-life event log taken from an informatio...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/Road_Traf...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>95</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>32</td>\n",
+       "      <td>9</td>\n",
+       "      <td>4</td>\n",
+       "      <td>8</td>\n",
+       "      <td>15</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>alarm-based prescriptive process monitoring, b...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>BPI 2011</td>\n",
+       "      <td>Contains data from from a Dutch Academic Hospi...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/Real-life...</td>\n",
+       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2011:ch...</td>\n",
+       "      <td>57</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>13</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "      <td>12</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>(compliance) monitoring, (machine) learning, d...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>BPI 2012</td>\n",
+       "      <td>Contains the event log of an application proce...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2012:ch...</td>\n",
+       "      <td>151</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>40</td>\n",
+       "      <td>15</td>\n",
+       "      <td>4</td>\n",
+       "      <td>13</td>\n",
+       "      <td>46</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>(in)frequent patterns in process models, (mach...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>BPI 2013 - Open Problems</td>\n",
+       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://www.win.tue.nl/bpi/2013/challenge.html</td>\n",
+       "      <td>6</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>(in)frequent patterns in process models, (mach...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>BPI 2013 - Closed Problems</td>\n",
+       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2013:ch...</td>\n",
+       "      <td>12</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>(in)frequent patterns in process models</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>BPI 2013 - Incidents</td>\n",
+       "      <td>The log contains events from an incident and p...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://www.win.tue.nl/bpi/2013/challenge.html</td>\n",
+       "      <td>36</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>14</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>7</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>(machine) learning, rule mining</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>BPI 2014 - Incident Records</td>\n",
+       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2014:ch...</td>\n",
+       "      <td>5</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>privacy preservation, security</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>BPI 2014 - Interaction Records</td>\n",
+       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2014:ch...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>(machine) learning, hidden Markov models</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>BPI 2015 - Log 3</td>\n",
+       "      <td>Provided by 5 Dutch municipalities. The data c...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2015:ch...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>specification-driven predictive business proce...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>BPI 2015 - Log 1</td>\n",
+       "      <td>Provided by 5 Dutch municipalities. The data c...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2015:ch...</td>\n",
+       "      <td>8</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>(machine) learning</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>BPI 2016 - Clicks Logged In</td>\n",
+       "      <td>Contains clicks of users that are logged in fr...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2016:ch...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>automation</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>BPI 2017 - Application Log</td>\n",
+       "      <td>Contains data from a financial institute inclu...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2017:ch...</td>\n",
+       "      <td>73</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>11</td>\n",
+       "      <td>5</td>\n",
+       "      <td>2</td>\n",
+       "      <td>14</td>\n",
+       "      <td>23</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>(machine) learning, alarm-based prescriptive p...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>BPI 2018</td>\n",
+       "      <td>The process covers the handling of application...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2018:ch...</td>\n",
+       "      <td>26</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>(machine) learning, automation</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>BPI 2020 - Travel Permits</td>\n",
+       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
+       "      <td>2</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>stage-based process performance analysis</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>BPI 2019</td>\n",
+       "      <td>Contains the purchase order handling process o...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://icpmconference.org/2019/icpm-2019/cont...</td>\n",
+       "      <td>35</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>6</td>\n",
+       "      <td>9</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>(online process) monitoring, remaining time pr...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>BPI 2020 - International Declarations</td>\n",
+       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
+       "      <td>2</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>(machine) learning, remaining time prediction</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>BPI 2020 - Domestic Declarations</td>\n",
+       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
+       "      <td>7</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>(machine) learning, remaining time prediction</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>BPI 2020 - Prepaid Travel Cost</td>\n",
+       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
+       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
+       "      <td>2</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>multi-perspective</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>Helpdesk</td>\n",
+       "      <td>Ticketing management process of the Help desk ...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/Dataset_b...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>20</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>8</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>(machine) learning, drift detection</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>Receipt phase of an environmental permit appli...</td>\n",
+       "      <td>Data originates from the CoSeLoG project where...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/Receipt_p...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>15</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/Receipt_p...</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>Environmental permit application process (‘WAB...</td>\n",
+       "      <td>Data originates from the CoSeLoG project where...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/Environme...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>predictions with a-priori knowledge</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>Environmental permit application process (‘WAB...</td>\n",
+       "      <td>Data originates from the CoSeLoG project where...</td>\n",
+       "      <td>https://data.4tu.nl/articles/dataset/Environme...</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2</td>\n",
+       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>multidimensional process mining, process cubes</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                 Name  \\\n",
+       "0                            Sepsis Cases - Event Log   \n",
+       "1                                BPI 2017 - Offer Log   \n",
+       "2      Road Traffic Fine Management Process (not BPI)   \n",
+       "3                                            BPI 2011   \n",
+       "4                                            BPI 2012   \n",
+       "5                            BPI 2013 - Open Problems   \n",
+       "6                          BPI 2013 - Closed Problems   \n",
+       "7                                BPI 2013 - Incidents   \n",
+       "8                         BPI 2014 - Incident Records   \n",
+       "9                      BPI 2014 - Interaction Records   \n",
+       "10                                   BPI 2015 - Log 3   \n",
+       "11                                   BPI 2015 - Log 1   \n",
+       "12                        BPI 2016 - Clicks Logged In   \n",
+       "13                         BPI 2017 - Application Log   \n",
+       "14                                           BPI 2018   \n",
+       "15                          BPI 2020 - Travel Permits   \n",
+       "16                                           BPI 2019   \n",
+       "17              BPI 2020 - International Declarations   \n",
+       "18                   BPI 2020 - Domestic Declarations   \n",
+       "19                     BPI 2020 - Prepaid Travel Cost   \n",
+       "20                                           Helpdesk   \n",
+       "21  Receipt phase of an environmental permit appli...   \n",
+       "22  Environmental permit application process (‘WAB...   \n",
+       "23  Environmental permit application process (‘WAB...   \n",
+       "24                                                NaN   \n",
+       "\n",
+       "                                    Short description  \\\n",
+       "0   This real-life event log contains events of se...   \n",
+       "1   Contains data from a financial institute inclu...   \n",
+       "2   A real-life event log taken from an informatio...   \n",
+       "3   Contains data from from a Dutch Academic Hospi...   \n",
+       "4   Contains the event log of an application proce...   \n",
+       "5   Rabobank Group ICT implemented ITIL processes ...   \n",
+       "6   Rabobank Group ICT implemented ITIL processes ...   \n",
+       "7   The log contains events from an incident and p...   \n",
+       "8   Rabobank Group ICT implemented ITIL processes ...   \n",
+       "9   Rabobank Group ICT implemented ITIL processes ...   \n",
+       "10  Provided by 5 Dutch municipalities. The data c...   \n",
+       "11  Provided by 5 Dutch municipalities. The data c...   \n",
+       "12  Contains clicks of users that are logged in fr...   \n",
+       "13  Contains data from a financial institute inclu...   \n",
+       "14  The process covers the handling of application...   \n",
+       "15  Contains 2 years of data from the reimbursemen...   \n",
+       "16  Contains the purchase order handling process o...   \n",
+       "17  Contains 2 years of data from the reimbursemen...   \n",
+       "18  Contains 2 years of data from the reimbursemen...   \n",
+       "19  Contains 2 years of data from the reimbursemen...   \n",
+       "20  Ticketing management process of the Help desk ...   \n",
+       "21  Data originates from the CoSeLoG project where...   \n",
+       "22  Data originates from the CoSeLoG project where...   \n",
+       "23  Data originates from the CoSeLoG project where...   \n",
+       "24                                                NaN   \n",
+       "\n",
+       "                                            data link  \\\n",
+       "0   https://data.4tu.nl/articles/dataset/Sepsis_Ca...   \n",
+       "1   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "2   https://data.4tu.nl/articles/dataset/Road_Traf...   \n",
+       "3   https://data.4tu.nl/articles/dataset/Real-life...   \n",
+       "4   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "5   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "6   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "7   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "8   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "9   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "10  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "11  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "12  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "13  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "14  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "15  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "16  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "17  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "18  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "19  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
+       "20  https://data.4tu.nl/articles/dataset/Dataset_b...   \n",
+       "21  https://data.4tu.nl/articles/dataset/Receipt_p...   \n",
+       "22  https://data.4tu.nl/articles/dataset/Environme...   \n",
+       "23  https://data.4tu.nl/articles/dataset/Environme...   \n",
+       "24                                                NaN   \n",
+       "\n",
+       "                                       challenge link  \\\n",
+       "0   https://data.4tu.nl/articles/dataset/Sepsis_Ca...   \n",
+       "1   https://www.win.tue.nl/bpi/doku.php?id=2017:ch...   \n",
+       "2                                                 NaN   \n",
+       "3   https://www.win.tue.nl/bpi/doku.php?id=2011:ch...   \n",
+       "4   https://www.win.tue.nl/bpi/doku.php?id=2012:ch...   \n",
+       "5      https://www.win.tue.nl/bpi/2013/challenge.html   \n",
+       "6   https://www.win.tue.nl/bpi/doku.php?id=2013:ch...   \n",
+       "7      https://www.win.tue.nl/bpi/2013/challenge.html   \n",
+       "8   https://www.win.tue.nl/bpi/doku.php?id=2014:ch...   \n",
+       "9   https://www.win.tue.nl/bpi/doku.php?id=2014:ch...   \n",
+       "10  https://www.win.tue.nl/bpi/doku.php?id=2015:ch...   \n",
+       "11  https://www.win.tue.nl/bpi/doku.php?id=2015:ch...   \n",
+       "12  https://www.win.tue.nl/bpi/doku.php?id=2016:ch...   \n",
+       "13  https://www.win.tue.nl/bpi/doku.php?id=2017:ch...   \n",
+       "14  https://www.win.tue.nl/bpi/doku.php?id=2018:ch...   \n",
+       "15     https://icpmconference.org/2020/bpi-challenge/   \n",
+       "16  https://icpmconference.org/2019/icpm-2019/cont...   \n",
+       "17     https://icpmconference.org/2020/bpi-challenge/   \n",
+       "18     https://icpmconference.org/2020/bpi-challenge/   \n",
+       "19     https://icpmconference.org/2020/bpi-challenge/   \n",
+       "20                                                NaN   \n",
+       "21                                                NaN   \n",
+       "22                                                NaN   \n",
+       "23                                                NaN   \n",
+       "24                                                NaN   \n",
+       "\n",
+       "   Citations (Stand Februar 2023)  \\\n",
+       "0                              61   \n",
+       "1                               4   \n",
+       "2                              95   \n",
+       "3                              57   \n",
+       "4                             151   \n",
+       "5                               6   \n",
+       "6                              12   \n",
+       "7                              36   \n",
+       "8                               5   \n",
+       "9                               1   \n",
+       "10                              1   \n",
+       "11                              8   \n",
+       "12                              1   \n",
+       "13                             73   \n",
+       "14                             26   \n",
+       "15                              2   \n",
+       "16                             35   \n",
+       "17                              2   \n",
+       "18                              7   \n",
+       "19                              2   \n",
+       "20                             20   \n",
+       "21                             15   \n",
+       "22                              2   \n",
+       "23                              2   \n",
+       "24                            NaN   \n",
+       "\n",
+       "                                         Publications  \\\n",
+       "0   https://app.dimensions.ai/discover/publication...   \n",
+       "1   https://app.dimensions.ai/discover/publication...   \n",
+       "2   https://app.dimensions.ai/discover/publication...   \n",
+       "3   https://app.dimensions.ai/discover/publication...   \n",
+       "4   https://app.dimensions.ai/discover/publication...   \n",
+       "5   https://app.dimensions.ai/discover/publication...   \n",
+       "6   https://app.dimensions.ai/discover/publication...   \n",
+       "7   https://app.dimensions.ai/discover/publication...   \n",
+       "8   https://app.dimensions.ai/discover/publication...   \n",
+       "9   https://app.dimensions.ai/discover/publication...   \n",
+       "10  https://app.dimensions.ai/discover/publication...   \n",
+       "11  https://app.dimensions.ai/discover/publication...   \n",
+       "12  https://app.dimensions.ai/discover/publication...   \n",
+       "13  https://app.dimensions.ai/discover/publication...   \n",
+       "14  https://app.dimensions.ai/discover/publication...   \n",
+       "15  https://app.dimensions.ai/discover/publication...   \n",
+       "16  https://app.dimensions.ai/discover/publication...   \n",
+       "17  https://app.dimensions.ai/discover/publication...   \n",
+       "18  https://app.dimensions.ai/discover/publication...   \n",
+       "19  https://app.dimensions.ai/discover/publication...   \n",
+       "20  https://app.dimensions.ai/discover/publication...   \n",
+       "21  https://data.4tu.nl/articles/dataset/Receipt_p...   \n",
+       "22  https://app.dimensions.ai/discover/publication...   \n",
+       "23  https://app.dimensions.ai/discover/publication...   \n",
+       "24                                                NaN   \n",
+       "\n",
+       "   Process Discovery/ Declarative Conformance Checking / Alignment / Replay  \\\n",
+       "0                              17                                         7   \n",
+       "1                               1                                         0   \n",
+       "2                              32                                         9   \n",
+       "3                              13                                         1   \n",
+       "4                              40                                        15   \n",
+       "5                               1                                         0   \n",
+       "6                               3                                         2   \n",
+       "7                              14                                         5   \n",
+       "8                               1                                         0   \n",
+       "9                               0                                         0   \n",
+       "10                              0                                         0   \n",
+       "11                              1                                         1   \n",
+       "12                              1                                         0   \n",
+       "13                             11                                         5   \n",
+       "14                              7                                         1   \n",
+       "15                              0                                         0   \n",
+       "16                              3                                         1   \n",
+       "17                              0                                         0   \n",
+       "18                              0                                         2   \n",
+       "19                              0                                         0   \n",
+       "20                              4                                         1   \n",
+       "21                             -1                                        -1   \n",
+       "22                              0                                         0   \n",
+       "23                              1                                         0   \n",
+       "24                            NaN                                       NaN   \n",
+       "\n",
+       "   Online / Streaming / Realtime Performance (Analysis) / Temporal / Time  \\\n",
+       "0                              4                                        1   \n",
+       "1                              0                                        1   \n",
+       "2                              4                                        8   \n",
+       "3                              3                                        4   \n",
+       "4                              4                                       13   \n",
+       "5                              0                                        0   \n",
+       "6                              1                                        2   \n",
+       "7                              1                                        1   \n",
+       "8                              0                                        0   \n",
+       "9                              0                                        0   \n",
+       "10                             0                                        0   \n",
+       "11                             0                                        0   \n",
+       "12                             1                                        0   \n",
+       "13                             2                                       14   \n",
+       "14                             2                                        0   \n",
+       "15                             0                                        1   \n",
+       "16                             6                                        6   \n",
+       "17                             0                                        1   \n",
+       "18                             2                                        2   \n",
+       "19                             0                                        0   \n",
+       "20                             3                                        1   \n",
+       "21                            -1                                       -1   \n",
+       "22                             0                                        0   \n",
+       "23                             0                                        0   \n",
+       "24                           NaN                                      NaN   \n",
+       "\n",
+       "   Predict(ive)/  Monitoring/ Prescriptive Trace clustering / Clustering  \\\n",
+       "0                                        8                             2   \n",
+       "1                                        1                             0   \n",
+       "2                                       15                             1   \n",
+       "3                                       12                             4   \n",
+       "4                                       46                             0   \n",
+       "5                                        1                             0   \n",
+       "6                                        0                             0   \n",
+       "7                                        7                             0   \n",
+       "8                                        0                             0   \n",
+       "9                                        0                             0   \n",
+       "10                                       1                             0   \n",
+       "11                                       3                             0   \n",
+       "12                                       0                             0   \n",
+       "13                                      23                             1   \n",
+       "14                                       8                             0   \n",
+       "15                                       0                             0   \n",
+       "16                                       9                             4   \n",
+       "17                                       2                             0   \n",
+       "18                                       3                             0   \n",
+       "19                                       0                             0   \n",
+       "20                                       8                             0   \n",
+       "21                                      -1                            -1   \n",
+       "22                                       1                             0   \n",
+       "23                                       0                             0   \n",
+       "24                                     NaN                           NaN   \n",
+       "\n",
+       "   Preprocessing / Event Abstraction / Event Data Correlation  \\\n",
+       "0                                                   2           \n",
+       "1                                                   0           \n",
+       "2                                                   2           \n",
+       "3                                                   1           \n",
+       "4                                                   1           \n",
+       "5                                                   0           \n",
+       "6                                                   3           \n",
+       "7                                                   2           \n",
+       "8                                                   0           \n",
+       "9                                                   0           \n",
+       "10                                                  0           \n",
+       "11                                                  3           \n",
+       "12                                                  0           \n",
+       "13                                                  1           \n",
+       "14                                                  2           \n",
+       "15                                                  0           \n",
+       "16                                                  1           \n",
+       "17                                                  0           \n",
+       "18                                                  0           \n",
+       "19                                                  0           \n",
+       "20                                                  0           \n",
+       "21                                                 -1           \n",
+       "22                                                  0           \n",
+       "23                                                  0           \n",
+       "24                                                NaN           \n",
+       "\n",
+       "                                    Further keywords:  \n",
+       "0   (machine) learning, (online process) monitorin...  \n",
+       "1                 (machine) learning, cloud computing  \n",
+       "2   alarm-based prescriptive process monitoring, b...  \n",
+       "3   (compliance) monitoring, (machine) learning, d...  \n",
+       "4   (in)frequent patterns in process models, (mach...  \n",
+       "5   (in)frequent patterns in process models, (mach...  \n",
+       "6             (in)frequent patterns in process models  \n",
+       "7                     (machine) learning, rule mining  \n",
+       "8                      privacy preservation, security  \n",
+       "9            (machine) learning, hidden Markov models  \n",
+       "10  specification-driven predictive business proce...  \n",
+       "11                                 (machine) learning  \n",
+       "12                                         automation  \n",
+       "13  (machine) learning, alarm-based prescriptive p...  \n",
+       "14                     (machine) learning, automation  \n",
+       "15           stage-based process performance analysis  \n",
+       "16  (online process) monitoring, remaining time pr...  \n",
+       "17      (machine) learning, remaining time prediction  \n",
+       "18      (machine) learning, remaining time prediction  \n",
+       "19                                  multi-perspective  \n",
+       "20                (machine) learning, drift detection  \n",
+       "21                                                NaN  \n",
+       "22                predictions with a-priori knowledge  \n",
+       "23     multidimensional process mining, process cubes  \n",
+       "24                                                NaN  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#import pm4py\n",
+    "import pandas as pd\n",
+    "INPUT_PATH = \"../data/mappings.csv\"\n",
+    "df = pd.read_csv(INPUT_PATH, sep = \";\", dtype = \"unicode\")\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "04a97f37",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

notebooks/.ipynb_checkpoints/test_feed-checkpoint.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/benchmarking_process_discovery.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/bpic_generability_pdm.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/experiment_generator.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/feature_distributions.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/feature_exploration.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/feature_performance_similarity.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/feature_selection.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/gedi_representativeness.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/configspace.json ADDED Viewed

	@@ -0,0 +1,90 @@

+{
+  "hyperparameters": [
+    {
+      "name": "choice",
+      "type": "uniform_float",
+      "log": false,
+      "lower": 0.01,
+      "upper": 1.0,
+      "default": 0.505,
+      "q": null
+    },
+    {
+      "name": "duplicate",
+      "type": "constant",
+      "value": 0
+    },
+    {
+      "name": "loop",
+      "type": "uniform_float",
+      "log": false,
+      "lower": 0.01,
+      "upper": 1.0,
+      "default": 0.505,
+      "q": null
+    },
+    {
+      "name": "lt_dependency",
+      "type": "uniform_float",
+      "log": false,
+      "lower": 0.01,
+      "upper": 1.0,
+      "default": 0.505,
+      "q": null
+    },
+    {
+      "name": "mode",
+      "type": "uniform_int",
+      "log": false,
+      "lower": 5,
+      "upper": 40,
+      "default": 22,
+      "q": null
+    },
+    {
+      "name": "num_traces",
+      "type": "uniform_int",
+      "log": false,
+      "lower": 100,
+      "upper": 1001,
+      "default": 550,
+      "q": null
+    },
+    {
+      "name": "or",
+      "type": "constant",
+      "value": 0
+    },
+    {
+      "name": "parallel",
+      "type": "uniform_float",
+      "log": false,
+      "lower": 0.01,
+      "upper": 1.0,
+      "default": 0.505,
+      "q": null
+    },
+    {
+      "name": "sequence",
+      "type": "uniform_float",
+      "log": false,
+      "lower": 0.01,
+      "upper": 1.0,
+      "default": 0.505,
+      "q": null
+    },
+    {
+      "name": "silent",
+      "type": "uniform_float",
+      "log": false,
+      "lower": 0.01,
+      "upper": 1.0,
+      "default": 0.505,
+      "q": null
+    }
+  ],
+  "conditions": [],
+  "forbiddens": [],
+  "python_module_version": "0.6.1",
+  "json_format_version": 0.4
+}

smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/intensifier.json ADDED Viewed

	@@ -0,0 +1,112 @@

+{
+  "incumbent_ids": [
+    2,
+    4,
+    5
+  ],
+  "rejected_config_ids": [
+    1,
+    3
+  ],
+  "incumbents_changed": 5,
+  "trajectory": [
+    {
+      "config_ids": [
+        1
+      ],
+      "costs": [
+        [
+          0.24521995849967804,
+          0.03375422897872052
+        ]
+      ],
+      "trial": 1,
+      "walltime": 1.7022318840026855
+    },
+    {
+      "config_ids": [
+        1,
+        2
+      ],
+      "costs": [
+        [
+          0.24521995849967804,
+          0.03375422897872052
+        ],
+        [
+          0.18052799504097683,
+          0.036256498795491074
+        ]
+      ],
+      "trial": 2,
+      "walltime": 1.8762779235839844
+    },
+    {
+      "config_ids": [
+        2,
+        3
+      ],
+      "costs": [
+        [
+          0.18052799504097683,
+          0.036256498795491074
+        ],
+        [
+          0.23787143561833685,
+          0.007951827349262967
+        ]
+      ],
+      "trial": 3,
+      "walltime": 2.9373037815093994
+    },
+    {
+      "config_ids": [
+        2,
+        4
+      ],
+      "costs": [
+        [
+          0.18052799504097683,
+          0.036256498795491074
+        ],
+        [
+          0.22916682187677923,
+          0.005954652831009627
+        ]
+      ],
+      "trial": 4,
+      "walltime": 4.301177978515625
+    },
+    {
+      "config_ids": [
+        2,
+        4,
+        5
+      ],
+      "costs": [
+        [
+          0.18052799504097683,
+          0.036256498795491074
+        ],
+        [
+          0.22916682187677923,
+          0.005954652831009627
+        ],
+        [
+          0.22057271745765183,
+          0.021149784678614475
+        ]
+      ],
+      "trial": 5,
+      "walltime": 4.334147930145264
+    }
+  ],
+  "state": {
+    "queue": [
+      [
+        5,
+        1
+      ]
+    ]
+  }
+}

smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/optimization.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "used_walltime": 4.335055828094482,
+  "used_target_function_walltime": 4.198468208312988,
+  "last_update": 1706650671.728907,
+  "finished": true
+}

smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/runhistory.json ADDED Viewed

	@@ -0,0 +1,153 @@

+{
+  "stats": {
+    "submitted": 5,
+    "finished": 5,
+    "running": 0
+  },
+  "data": [
+    [
+      1,
+      null,
+      209652396,
+      null,
+      [
+        0.24521995849967804,
+        0.03375422897872052
+      ],
+      1.7000741958618164,
+      1,
+      1706650667.394413,
+      1706650669.0954418,
+      {}
+    ],
+    [
+      2,
+      null,
+      209652396,
+      null,
+      [
+        0.18052799504097683,
+        0.036256498795491074
+      ],
+      0.05061626434326172,
+      1,
+      1706650669.2182012,
+      1706650669.2693639,
+      {}
+    ],
+    [
+      3,
+      null,
+      209652396,
+      null,
+      [
+        0.23787143561833685,
+        0.007951827349262967
+      ],
+      1.0581114292144775,
+      1,
+      1706650669.2713861,
+      1706650670.3300622,
+      {}
+    ],
+    [
+      4,
+      null,
+      209652396,
+      null,
+      [
+        0.22916682187677923,
+        0.005954652831009627
+      ],
+      1.3603012561798096,
+      1,
+      1706650670.332902,
+      1706650671.6938748,
+      {}
+    ],
+    [
+      5,
+      null,
+      209652396,
+      null,
+      [
+        0.22057271745765183,
+        0.021149784678614475
+      ],
+      0.029365062713623047,
+      1,
+      1706650671.696975,
+      1706650671.7269728,
+      {}
+    ]
+  ],
+  "configs": {
+    "1": {
+      "choice": 0.27420973585918546,
+      "duplicate": 0,
+      "loop": 0.6243590484932066,
+      "lt_dependency": 0.11643280289135872,
+      "mode": 12,
+      "num_traces": 876,
+      "or": 0,
+      "parallel": 0.5005605065450072,
+      "sequence": 0.9603757594153285,
+      "silent": 0.9968127990048379
+    },
+    "2": {
+      "choice": 0.892855270774259,
+      "duplicate": 0,
+      "loop": 0.09625800670452529,
+      "lt_dependency": 0.7827238845235909,
+      "mode": 32,
+      "num_traces": 715,
+      "or": 0,
+      "parallel": 0.1376370346783048,
+      "sequence": 0.1696964227061463,
+      "silent": 0.14680112183512767
+    },
+    "3": {
+      "choice": 0.7180374727086953,
+      "duplicate": 0,
+      "loop": 0.38960710363751994,
+      "lt_dependency": 0.8342936470924586,
+      "mode": 28,
+      "num_traces": 612,
+      "or": 0,
+      "parallel": 0.442661634261348,
+      "sequence": 0.3700736632331964,
+      "silent": 0.2607586865143843
+    },
+    "4": {
+      "choice": 0.6494351719359895,
+      "duplicate": 0,
+      "loop": 0.9263406719097345,
+      "lt_dependency": 0.8011669785745563,
+      "mode": 19,
+      "num_traces": 656,
+      "or": 0,
+      "parallel": 0.6739314909219778,
+      "sequence": 0.11102436264054778,
+      "silent": 0.11927138975266208
+    },
+    "5": {
+      "choice": 0.6067357423109274,
+      "duplicate": 0,
+      "loop": 0.7938077877018379,
+      "lt_dependency": 0.780375183440352,
+      "mode": 10,
+      "num_traces": 116,
+      "or": 0,
+      "parallel": 0.7006548839679922,
+      "sequence": 0.5744948027137008,
+      "silent": 0.47164766512774325
+    }
+  },
+  "config_origins": {
+    "1": "Initial Design: Sobol",
+    "2": "Acquisition Function Maximizer: Random Search (sorted)",
+    "3": "Acquisition Function Maximizer: Random Search (sorted)",
+    "4": "Acquisition Function Maximizer: Random Search (sorted)",
+    "5": "Acquisition Function Maximizer: Random Search (sorted)"
+  }
+}

smac3_output/006b6145d4159dd008d68ca2cd3a4441/0/scenario.json ADDED Viewed

	@@ -0,0 +1,180 @@

+{
+    "name": "006b6145d4159dd008d68ca2cd3a4441",
+    "deterministic": true,
+    "objectives": [
+        "normalized_sequence_entropy_exponential_forgetting",
+        "normalized_sequence_entropy_linear_forgetting"
+    ],
+    "crash_cost": Infinity,
+    "termination_cost_threshold": Infinity,
+    "walltime_limit": Infinity,
+    "cputime_limit": Infinity,
+    "trial_walltime_limit": null,
+    "trial_memory_limit": null,
+    "n_trials": 5,
+    "use_default_config": false,
+    "instances": null,
+    "instance_features": null,
+    "min_budget": null,
+    "max_budget": null,
+    "seed": 0,
+    "n_workers": -1,
+    "_meta": {
+        "facade": {
+            "name": "HyperparameterOptimizationFacade"
+        },
+        "runner": {
+            "name": "TargetFunctionRunner",
+            "code": "b't\\x00|\\x01d\\x01\\x19\\x00|\\x01d\\x01\\x19\\x00|\\x01d\\x01\\x19\\x00|\\x01d\\x02\\x19\\x00|\\x01d\\x03\\x19\\x00|\\x01d\\x04\\x19\\x00|\\x01d\\x05\\x19\\x00|\\x01d\\x06\\x19\\x00|\\x01d\\x07\\x19\\x00|\\x01d\\x08\\x19\\x00|\\x01d\\t\\x19\\x00d\\nd\\x0b\\x9c\\x0cd\\x0c\\x8d\\x01}\\x03t\\x01|\\x03d\\r|\\x01d\\r\\x19\\x00i\\x01d\\x0c\\x8d\\x02}\\x04|\\x00\\xa0\\x02|\\x04\\xa1\\x01S\\x00'"
+        },
+        "model": {
+            "name": "RandomForest",
+            "types": [
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0
+            ],
+            "bounds": [
+                [
+                    0,
+                    1.0
+                ],
+                [
+                    0,
+                    NaN
+                ],
+                [
+                    0,
+                    1.0
+                ],
+                [
+                    0,
+                    1.0
+                ],
+                [
+                    0,
+                    1.0
+                ],
+                [
+                    0,
+                    1.0
+                ],
+                [
+                    0,
+                    NaN
+                ],
+                [
+                    0,
+                    1.0
+                ],
+                [
+                    0,
+                    1.0
+                ],
+                [
+                    0,
+                    1.0
+                ]
+            ],
+            "pca_components": 7,
+            "n_trees": 10,
+            "n_points_per_tree": -1,
+            "ratio_features": 1.0,
+            "min_samples_split": 2,
+            "min_samples_leaf": 1,
+            "max_depth": 1048576,
+            "eps_purity": 1e-08,
+            "max_nodes": 1048576,
+            "bootstrapping": true
+        },
+        "acquisition_maximizer": {
+            "name": "LocalAndSortedRandomSearch",
+            "acquisition_function": {
+                "name": "EI",
+                "xi": 0.0,
+                "log": true
+            },
+            "challengers": 10000,
+            "seed": 0,
+            "random_search": {
+                "name": "RandomSearch",
+                "acquisition_function": {
+                    "name": "EI",
+                    "xi": 0.0,
+                    "log": true
+                },
+                "challengers": 5000,
+                "seed": 0
+            },
+            "local_search": {
+                "name": "LocalSearch",
+                "acquisition_function": {
+                    "name": "EI",
+                    "xi": 0.0,
+                    "log": true
+                },
+                "challengers": 5000,
+                "seed": 0,
+                "max_steps": null,
+                "n_steps_plateau_walk": 10,
+                "vectorization_min_obtain": 2,
+                "vectorization_max_obtain": 64
+            }
+        },
+        "acquisition_function": {
+            "name": "EI",
+            "xi": 0.0,
+            "log": true
+        },
+        "intensifier": {
+            "name": "Intensifier",
+            "max_incumbents": 10,
+            "seed": 0
+        },
+        "initial_design": {
+            "name": "SobolInitialDesign",
+            "n_configs": 1,
+            "n_configs_per_hyperparameter": 10,
+            "additional_configs": [],
+            "seed": 0
+        },
+        "random_design": {
+            "name": "ProbabilityRandomDesign",
+            "seed": 0,
+            "probability": 0.2
+        },
+        "runhistory_encoder": {
+            "name": "RunHistoryLogScaledEncoder",
+            "considered_states": [
+                1,
+                2,
+                4
+            ],
+            "lower_budget_states": [],
+            "scale_percentage": 5,
+            "seed": 0
+        },
+        "multi_objective_algorithm": {
+            "name": "MeanAggregationStrategy",
+            "objective_weights": [
+                1,
+                1
+            ]
+        },
+        "config_selector": {
+            "name": "ConfigSelector",
+            "retrain_after": 8,
+            "retries": 16,
+            "min_trials": 1
+        },
+        "version": "2.0.2"
+    },
+    "output_directory": "smac3_output/006b6145d4159dd008d68ca2cd3a4441/0"
+}

smac3_output/07ecbfc3dc7bfceedce234fe2b508af8/0/configspace.json ADDED Viewed

	@@ -0,0 +1,90 @@

+{
+  "hyperparameters": [
+    {
+      "name": "choice",
+      "type": "uniform_float",
+      "log": false,
+      "lower": 0.01,
+      "upper": 1.0,
+      "default": 0.505,
+      "q": null
+    },
+    {
+      "name": "duplicate",
+      "type": "constant",
+      "value": 0
+    },
+    {
+      "name": "loop",
+      "type": "uniform_float",
+      "log": false,
+      "lower": 0.01,
+      "upper": 1.0,
+      "default": 0.505,
+      "q": null
+    },
+    {
+      "name": "lt_dependency",
+      "type": "uniform_float",
+      "log": false,
+      "lower": 0.01,
+      "upper": 1.0,
+      "default": 0.505,
+      "q": null
+    },
+    {
+      "name": "mode",
+      "type": "uniform_int",
+      "log": false,
+      "lower": 5,
+      "upper": 20,
+      "default": 12,
+      "q": null
+    },
+    {
+      "name": "num_traces",
+      "type": "uniform_int",
+      "log": false,
+      "lower": 10,
+      "upper": 10001,
+      "default": 5006,
+      "q": null
+    },
+    {
+      "name": "or",
+      "type": "constant",
+      "value": 0
+    },
+    {
+      "name": "parallel",
+      "type": "uniform_float",
+      "log": false,
+      "lower": 0.01,
+      "upper": 1.0,
+      "default": 0.505,
+      "q": null
+    },
+    {
+      "name": "sequence",
+      "type": "uniform_float",
+      "log": false,
+      "lower": 0.01,
+      "upper": 1.0,
+      "default": 0.505,
+      "q": null
+    },
+    {
+      "name": "silent",
+      "type": "uniform_float",
+      "log": false,
+      "lower": 0.01,
+      "upper": 1.0,
+      "default": 0.505,
+      "q": null
+    }
+  ],
+  "conditions": [],
+  "forbiddens": [],
+  "python_module_version": "0.6.1",
+  "json_format_version": 0.4
+}

smac3_output/07ecbfc3dc7bfceedce234fe2b508af8/0/intensifier.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "incumbent_ids": [
+    1
+  ],
+  "rejected_config_ids": [],
+  "incumbents_changed": 1,
+  "trajectory": [
+    {
+      "config_ids": [
+        1
+      ],
+      "costs": [
+        0.148762785609154
+      ],
+      "trial": 1,
+      "walltime": 7.8146021366119385
+    }
+  ],
+  "state": {
+    "queue": [
+      [
+        20,
+        1
+      ]
+    ]
+  }
+}