Spaces:

andreamalhera
/

igedi

Running

App Files Files Community

Andrea Maldonado commited on Apr 21, 2024

Commit

99bcc04

1 Parent(s): 9d90e72

Refactoring tag to gedi

Browse files

Files changed (39) hide show

config.py +1 -1
execute_grid_experiments.py +1 -1
{tag → gedi}/analyser.py +3 -3
{tag → gedi}/augmentation.py +1 -1
{tag → gedi}/benchmark.py +3 -3
{tag → gedi}/features.py +1 -1
{tag → gedi}/generator.py +2 -2
{tag → gedi}/plotter.py +3 -3
{tag → gedi}/utils/algorithms/__init__.py +0 -0
{tag → gedi}/utils/algorithms/tsne.py +0 -0
{tag → gedi}/utils/array_tools.py +0 -0
{tag → gedi}/utils/io_helpers.py +0 -0
{tag → gedi}/utils/matrix_tools.py +0 -0
main.py +9 -9
notebooks/.ipynb_checkpoints/augmentation-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/benchmarking_process_discovery-checkpoint.ipynb +2 -2
notebooks/.ipynb_checkpoints/bpic_generability_pdm-checkpoint.ipynb +1 -1
notebooks/.ipynb_checkpoints/data_exploration-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/experiment_generator-checkpoint.ipynb +49 -49
notebooks/.ipynb_checkpoints/feature_distributions-checkpoint.ipynb +1 -1
notebooks/.ipynb_checkpoints/feature_exploration-checkpoint.ipynb +1 -1
notebooks/.ipynb_checkpoints/feature_performance_similarity-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_selection-checkpoint.ipynb +1 -1
notebooks/.ipynb_checkpoints/feature_variance-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/gedi_representativeness-checkpoint.ipynb +1 -1
notebooks/.ipynb_checkpoints/grid_objectives-checkpoint.ipynb +0 -376
notebooks/.ipynb_checkpoints/oversampling-checkpoint.ipynb +0 -6
notebooks/.ipynb_checkpoints/performance_feature_correlation-checkpoint.ipynb +0 -6
notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/statistics_tasks_to_datasets-checkpoint.ipynb +0 -818
notebooks/.ipynb_checkpoints/test_feed-checkpoint.ipynb +0 -0
notebooks/benchmarking_process_discovery.ipynb +2 -2
notebooks/bpic_generability_pdm.ipynb +1 -1
notebooks/experiment_generator.ipynb +2 -2
notebooks/feature_distributions.ipynb +1 -1
notebooks/feature_exploration.ipynb +1 -1
notebooks/feature_performance_similarity.ipynb +3 -3
notebooks/feature_selection.ipynb +1 -1
notebooks/gedi_representativeness.ipynb +0 -0

config.py CHANGED Viewed

@@ -2,7 +2,7 @@ import json
 import os
 import warnings
-from tag.utils.io_helpers import sort_files
 from tqdm import tqdm
 from utils.param_keys import INPUT_NAME, FILENAME, FOLDER_PATH, PARAMS

 import os
 import warnings
+from gedi.utils.io_helpers import sort_files
 from tqdm import tqdm
 from utils.param_keys import INPUT_NAME, FILENAME, FOLDER_PATH, PARAMS

execute_grid_experiments.py CHANGED Viewed

@@ -2,7 +2,7 @@ import multiprocessing
 import os
 from datetime import datetime as dt
-from tag.utils.io_helpers import sort_files
 from tqdm import tqdm
 #TODO: Pass i properly

 import os
 from datetime import datetime as dt
+from gedi.utils.io_helpers import sort_files
 from tqdm import tqdm
 #TODO: Pass i properly

{tag → gedi}/analyser.py RENAMED Viewed

@@ -4,9 +4,9 @@ import warnings
 from sklearn.decomposition import FastICA, PCA
 from sklearn.manifold import TSNE
 from sklearn.preprocessing import Normalizer, StandardScaler
-from tag.features import EventLogFeatures
-from tag.plotter import ModelResultPlotter
-from tag.utils.matrix_tools import insert_missing_data
 # TODO: Call param_keys explicitly e.g. import INPUT_PATH
 from utils.param_keys import *
 from utils.param_keys.analyser import MODEL, INPUT_PARAMS, PERPLEXITY

 from sklearn.decomposition import FastICA, PCA
 from sklearn.manifold import TSNE
 from sklearn.preprocessing import Normalizer, StandardScaler
+from gedi.features import EventLogFeatures
+from gedi.plotter import ModelResultPlotter
+from gedi.utils.matrix_tools import insert_missing_data
 # TODO: Call param_keys explicitly e.g. import INPUT_PATH
 from utils.param_keys import *
 from utils.param_keys.analyser import MODEL, INPUT_PARAMS, PERPLEXITY

{tag → gedi}/augmentation.py RENAMED Viewed

@@ -3,7 +3,7 @@ from collections import Counter
 from datetime import datetime as dt
 from imblearn.over_sampling import SMOTE, SVMSMOTE, BorderlineSMOTE, KMeansSMOTE
 from sklearn.preprocessing import Normalizer
-from tag.utils.matrix_tools import insert_missing_data
 from utils.param_keys import INPUT_PATH, OUTPUT_PATH
 from utils.param_keys.augmentation import AUGMENTATION_PARAMS, NO_SAMPLES, FEATURE_SELECTION, METHOD

 from datetime import datetime as dt
 from imblearn.over_sampling import SMOTE, SVMSMOTE, BorderlineSMOTE, KMeansSMOTE
 from sklearn.preprocessing import Normalizer
+from gedi.utils.matrix_tools import insert_missing_data
 from utils.param_keys import INPUT_PATH, OUTPUT_PATH
 from utils.param_keys.augmentation import AUGMENTATION_PARAMS, NO_SAMPLES, FEATURE_SELECTION, METHOD

{tag → gedi}/benchmark.py RENAMED Viewed

@@ -16,7 +16,7 @@ from pm4py.algo.evaluation.generalization import algorithm as generalization_eva
 from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
 from pm4py.objects.bpmn.obj import BPMN
 from pm4py.objects.log.importer.xes import importer as xes_importer
-from tag.utils.io_helpers import dump_features_json
 from tqdm import tqdm
 from utils.param_keys import INPUT_PATH, OUTPUT_PATH
 from utils.param_keys.benchmark import MINERS
@@ -113,14 +113,14 @@ class BenchmarkTest:
         return
     def split_miner_wrapper(self, log_path="data/real_event_logs/BPI_Challenges/BPI_Challenge_2012.xes"):
-        jar_path = os.path.join("tag","libs","split-miner-1.7.1-all.jar")
         filename = os.path.split(log_path)[-1].rsplit(".",1)[0]
         bpmn_path = os.path.join("output", "bpmns_split", filename)
         os.makedirs(os.path.split(bpmn_path)[0], exist_ok=True)
         command = [
                 "java",
                 "-cp",
-                f"{os.getcwd()}/tag/libs/sm2.jar:{os.getcwd()}/tag/libs/lib/*",
                 "au.edu.unimelb.services.ServiceProvider",
                 "SM2",
                 f"{os.getcwd()}/{log_path}",

 from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
 from pm4py.objects.bpmn.obj import BPMN
 from pm4py.objects.log.importer.xes import importer as xes_importer
+from gedi.utils.io_helpers import dump_features_json
 from tqdm import tqdm
 from utils.param_keys import INPUT_PATH, OUTPUT_PATH
 from utils.param_keys.benchmark import MINERS
         return
     def split_miner_wrapper(self, log_path="data/real_event_logs/BPI_Challenges/BPI_Challenge_2012.xes"):
+        jar_path = os.path.join("gedi","libs","split-miner-1.7.1-all.jar")
         filename = os.path.split(log_path)[-1].rsplit(".",1)[0]
         bpmn_path = os.path.join("output", "bpmns_split", filename)
         os.makedirs(os.path.split(bpmn_path)[0], exist_ok=True)
         command = [
                 "java",
                 "-cp",
+                f"{os.getcwd()}/gedi/libs/sm2.jar:{os.getcwd()}/tag/libs/lib/*",
                 "au.edu.unimelb.services.ServiceProvider",
                 "SM2",
                 f"{os.getcwd()}/{log_path}",

{tag → gedi}/features.py RENAMED Viewed

@@ -11,7 +11,7 @@ from pathlib import Path, PurePath
 from sklearn.impute import SimpleImputer
 from utils.param_keys import INPUT_PATH
 from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
-from tag.utils.io_helpers import dump_features_json
 def get_sortby_parameter(elem):
     number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])

 from sklearn.impute import SimpleImputer
 from utils.param_keys import INPUT_PATH
 from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
+from gedi.utils.io_helpers import dump_features_json
 def get_sortby_parameter(elem):
     number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])

{tag → gedi}/generator.py RENAMED Viewed

@@ -20,7 +20,7 @@ from pm4py.sim import play_out
 from smac import HyperparameterOptimizationFacade, Scenario
 from utils.param_keys import OUTPUT_PATH, INPUT_PATH
 from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
-from tag.utils.io_helpers import get_output_key_value_location, dump_features_json, read_csvs
@@ -73,7 +73,7 @@ def get_tasks(experiment, output_path="", reference_feature=None):
     return tasks, output_path
 class GenerateEventLogs():
-    # TODO: Clarify nomenclature: experiment, task, objective as in notebook (https://github.com/lmu-dbs/tag/blob/main/notebooks/grid_objectives.ipynb)
     def __init__(self, params):
         print("=========================== Generator ==========================")
         print(f"INFO: Running with {params}")

 from smac import HyperparameterOptimizationFacade, Scenario
 from utils.param_keys import OUTPUT_PATH, INPUT_PATH
 from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
+from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, read_csvs
     return tasks, output_path
 class GenerateEventLogs():
+    # TODO: Clarify nomenclature: experiment, task, objective as in notebook (https://github.com/lmu-dbs/gedi/blob/main/notebooks/grid_objectives.ipynb)
     def __init__(self, params):
         print("=========================== Generator ==========================")
         print(f"INFO: Running with {params}")

{tag → gedi}/plotter.py RENAMED Viewed

@@ -20,9 +20,9 @@ from collections import defaultdict
 from sklearn.preprocessing import Normalizer, StandardScaler
 from sklearn.decomposition import PCA
 from sklearn.metrics.pairwise import euclidean_distances
-from tag.generator import get_tasks
-from tag.utils.io_helpers import get_keys_abbreviation
-from tag.utils.io_helpers import read_csvs, select_instance
 def insert_newlines(string, every=140):
     return '\n'.join(string[i:i+every] for i in range(0, len(string), every))

 from sklearn.preprocessing import Normalizer, StandardScaler
 from sklearn.decomposition import PCA
 from sklearn.metrics.pairwise import euclidean_distances
+from gedi.generator import get_tasks
+from gedi.utils.io_helpers import get_keys_abbreviation
+from gedi.utils.io_helpers import read_csvs, select_instance
 def insert_newlines(string, every=140):
     return '\n'.join(string[i:i+every] for i in range(0, len(string), every))

{tag → gedi}/utils/algorithms/__init__.py RENAMED Viewed

File without changes

{tag → gedi}/utils/algorithms/tsne.py RENAMED Viewed

File without changes

{tag → gedi}/utils/array_tools.py RENAMED Viewed

File without changes

{tag → gedi}/utils/io_helpers.py RENAMED Viewed

File without changes

{tag → gedi}/utils/matrix_tools.py RENAMED Viewed

File without changes

main.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import config
 import pandas as pd
 from datetime import datetime as dt
-from tag.generator import GenerateEventLogs
-from tag.features import EventLogFeatures
-from tag.analyser import FeatureAnalyser
-from tag.augmentation import InstanceAugmentator
-from tag.benchmark import BenchmarkTest
-from tag.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
 from utils.default_argparse import ArgParser
 from utils.param_keys import *
 from utils.param_keys.run_options import *
@@ -57,8 +57,8 @@ def run(kwargs:dict, model_paramas_list: list, filename_list:list):
 if __name__=='__main__':
-    start_tag = dt.now()
-    print(f'INFO: TAG starting {start_tag}')
     args = ArgParser().parse('GEDI main')
     run_params = config.get_run_params(args.run_params_json)
@@ -70,4 +70,4 @@ if __name__=='__main__':
     else:
         load(args.result_load_files, kwargs)
-    print(f'SUCCESS: TAG took {dt.now()-start_tag} sec.')

 import config
 import pandas as pd
 from datetime import datetime as dt
+from gedi.generator import GenerateEventLogs
+from gedi.features import EventLogFeatures
+from gedi.analyser import FeatureAnalyser
+from gedi.augmentation import InstanceAugmentator
+from gedi.benchmark import BenchmarkTest
+from gedi.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
 from utils.default_argparse import ArgParser
 from utils.param_keys import *
 from utils.param_keys.run_options import *
 if __name__=='__main__':
+    start_gedi = dt.now()
+    print(f'INFO: GEDI starting {start_gedi}')
     args = ArgParser().parse('GEDI main')
     run_params = config.get_run_params(args.run_params_json)
     else:
         load(args.result_load_files, kwargs)
+    print(f'SUCCESS: GEDI took {dt.now()-start_gedi} sec.')

notebooks/.ipynb_checkpoints/augmentation-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/benchmarking_process_discovery-checkpoint.ipynb CHANGED Viewed

@@ -1277,7 +1277,7 @@
     "\n",
     "import sys\n",
     "import os\n",
-    "sys.path.append(os.path.dirname(\"../tag/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "print(benchmarked_ft.shape, benchmarked_pd.shape)\n",
@@ -1422,7 +1422,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
   }
  },
  "nbformat": 4,

     "\n",
     "import sys\n",
     "import os\n",
+    "sys.path.append(os.path.dirname(\"../gedi/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "print(benchmarked_ft.shape, benchmarked_pd.shape)\n",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

notebooks/.ipynb_checkpoints/bpic_generability_pdm-checkpoint.ipynb CHANGED Viewed

@@ -1223,7 +1223,7 @@
     "from scipy.stats import pearsonr\n",
     "import sys\n",
     "import os\n",
-    "sys.path.append(os.path.dirname(\"../tag/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "\n",

     "from scipy.stats import pearsonr\n",
     "import sys\n",
     "import os\n",
+    "sys.path.append(os.path.dirname(\"../gedi/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "\n",

notebooks/.ipynb_checkpoints/data_exploration-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/experiment_generator-checkpoint.ipynb CHANGED Viewed

@@ -64,7 +64,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "id": "2be119c8",
    "metadata": {},
    "outputs": [
@@ -74,48 +74,48 @@
      "text": [
       "21 [('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_unique_traces_per_trace'), ('ratio_top_10_variants', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy', 'ratio_most_common_variant'), ('ratio_most_common_variant', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_most_common_variant'), ('epa_normalized_variant_entropy', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy', 'ratio_top_10_variants'), ('ratio_most_common_variant', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_most_common_variant'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_variant_entropy', 'ratio_unique_traces_per_trace'), ('epa_normalized_variant_entropy', 'ratio_most_common_variant'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_exponential_forgetting'), ('epa_normalized_sequence_entropy', 'ratio_unique_traces_per_trace')]\n",
       "121\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_enself_rt10v.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_enself_rt10v.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_enseef_rutpt.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_enseef_rutpt.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_rt10v_rutpt.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_rt10v_rutpt.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_ense_rmcv.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_ense_rmcv.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_rmcv_rt10v.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_rmcv_rt10v.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_ense_enself.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_ense_enself.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_ense_enve.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_ense_enve.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_enseef_rmcv.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_enseef_rmcv.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_enve_rt10v.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_enve_rt10v.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_enseef_enself.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_enseef_enself.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_enseef_enve.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_enseef_enve.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_enself_rutpt.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_enself_rutpt.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_ense_rt10v.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_ense_rt10v.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_rmcv_rutpt.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_rmcv_rutpt.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_enself_rmcv.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_enself_rmcv.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_enseef_rt10v.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_enseef_rt10v.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_enself_enve.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_enself_enve.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_enve_rutpt.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_enve_rutpt.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_enve_rmcv.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_enve_rmcv.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_ense_enseef.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_ense_enseef.json\n",
-      "Saved experiment in ../data/grid_experiments/grid_2objectives_ense_rutpt.csv\n",
-      "Saved experiment config in ../config_files/algorithm/grid_experiments/generator_grid_2objectives_ense_rutpt.json\n",
       "None\n"
      ]
     }
@@ -128,7 +128,7 @@
     "    experiment = [\n",
     "      {\n",
     "        'pipeline_step': 'event_logs_generation',\n",
-    "        'output_path':'output/generated',\n",
     "        'generator_params': {\n",
     "          \"experiment\": {\"input_path\": experiment_path[3:],\n",
     "            \"objectives\": objectives},\n",
@@ -149,7 +149,7 @@
     "      },\n",
     "      {\n",
     "        'pipeline_step': 'feature_extraction',\n",
-    "        'input_path': os.path.join('output','features', 'generated', first_dir, second_dir),\n",
     "        'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n",
     "        'output_path': 'output/plots',\n",
     "        'real_eventlog_path': 'data/34_bpic_features.csv',\n",
@@ -158,7 +158,7 @@
     "    ]\n",
     "\n",
     "    #print(\"EXPERIMENT:\", experiment[1]['input_path'])\n",
-    "    output_path = os.path.join('..', 'config_files','algorithm','grid_experiments')\n",
     "    os.makedirs(output_path, exist_ok=True)\n",
     "    output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n",
     "    with open(output_path, 'w') as f:\n",
@@ -182,7 +182,7 @@
     "    print(len(tasks))\n",
     "    for exp in experiments:\n",
     "        df = pd.DataFrame(data=tasks, columns=[\"task\", *exp])\n",
-    "        experiment_path = os.path.join('..','data', 'grid_experiments')\n",
     "        os.makedirs(experiment_path, exist_ok=True)\n",
     "        experiment_path = os.path.join(experiment_path, f\"grid_{len(df.columns)-1}objectives_{abbrev_obj_keys(exp)}.csv\") \n",
     "        df.to_csv(experiment_path, index=False)\n",
@@ -2225,7 +2225,7 @@
    ],
    "source": [
     "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n",
-    "#bpic_features = pd.read_csv(\"../tag/output/features/real_event_logs.csv\", index_col=None)\n",
     "\n",
     "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n",
     "print(bpic_features.shape)\n",
@@ -3102,7 +3102,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
   }
  },
  "nbformat": 4,

   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "id": "2be119c8",
    "metadata": {},
    "outputs": [
      "text": [
       "21 [('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_unique_traces_per_trace'), ('ratio_top_10_variants', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy', 'ratio_most_common_variant'), ('ratio_most_common_variant', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_most_common_variant'), ('epa_normalized_variant_entropy', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_sequence_entropy_linear_forgetting'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy', 'ratio_top_10_variants'), ('ratio_most_common_variant', 'ratio_unique_traces_per_trace'), ('epa_normalized_sequence_entropy_linear_forgetting', 'ratio_most_common_variant'), ('epa_normalized_sequence_entropy_exponential_forgetting', 'ratio_top_10_variants'), ('epa_normalized_sequence_entropy_linear_forgetting', 'epa_normalized_variant_entropy'), ('epa_normalized_variant_entropy', 'ratio_unique_traces_per_trace'), ('epa_normalized_variant_entropy', 'ratio_most_common_variant'), ('epa_normalized_sequence_entropy', 'epa_normalized_sequence_entropy_exponential_forgetting'), ('epa_normalized_sequence_entropy', 'ratio_unique_traces_per_trace')]\n",
       "121\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rt10v.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rt10v.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rutpt.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rutpt.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_rt10v_rutpt.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rt10v_rutpt.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rmcv.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rmcv.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_rmcv_rt10v.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rt10v.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enself.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enself.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enve.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enve.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rmcv.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rmcv.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rt10v.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rt10v.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_enself.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enself.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_enve.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_enve.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rutpt.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rutpt.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rt10v.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rt10v.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_rmcv_rutpt.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_rmcv_rutpt.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_rmcv.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_rmcv.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_enseef_rt10v.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enseef_rt10v.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_enself_enve.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enself_enve.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rutpt.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rutpt.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_enve_rmcv.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_enve_rmcv.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_enseef.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_enseef.json\n",
+      "Saved experiment in ../data/grid_2obj/grid_2objectives_ense_rutpt.csv\n",
+      "Saved experiment config in ../config_files/algorithm/grid_2obj/generator_grid_2objectives_ense_rutpt.json\n",
       "None\n"
      ]
     }
     "    experiment = [\n",
     "      {\n",
     "        'pipeline_step': 'event_logs_generation',\n",
+    "        'output_path':'output/generated/grid_2obj',\n",
     "        'generator_params': {\n",
     "          \"experiment\": {\"input_path\": experiment_path[3:],\n",
     "            \"objectives\": objectives},\n",
     "      },\n",
     "      {\n",
     "        'pipeline_step': 'feature_extraction',\n",
+    "        'input_path': os.path.join('output','features', 'generated', 'grid_2obj', first_dir, second_dir),\n",
     "        'feature_params': {'feature_set':['simple_stats', 'trace_length', 'trace_variant', 'activities', 'start_activities', 'end_activities', 'eventropies', 'epa_based']},\n",
     "        'output_path': 'output/plots',\n",
     "        'real_eventlog_path': 'data/34_bpic_features.csv',\n",
     "    ]\n",
     "\n",
     "    #print(\"EXPERIMENT:\", experiment[1]['input_path'])\n",
+    "    output_path = os.path.join('..', 'config_files','algorithm','grid_2obj')\n",
     "    os.makedirs(output_path, exist_ok=True)\n",
     "    output_path = os.path.join(output_path, f'generator_{os.path.split(experiment_path)[-1].split(\".\")[0]}.json') \n",
     "    with open(output_path, 'w') as f:\n",
     "    print(len(tasks))\n",
     "    for exp in experiments:\n",
     "        df = pd.DataFrame(data=tasks, columns=[\"task\", *exp])\n",
+    "        experiment_path = os.path.join('..','data', 'grid_2obj')\n",
     "        os.makedirs(experiment_path, exist_ok=True)\n",
     "        experiment_path = os.path.join(experiment_path, f\"grid_{len(df.columns)-1}objectives_{abbrev_obj_keys(exp)}.csv\") \n",
     "        df.to_csv(experiment_path, index=False)\n",
    ],
    "source": [
     "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n",
+    "#bpic_features = pd.read_csv(\"../gedi/output/features/real_event_logs.csv\", index_col=None)\n",
     "\n",
     "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n",
     "print(bpic_features.shape)\n",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

notebooks/.ipynb_checkpoints/feature_distributions-checkpoint.ipynb CHANGED Viewed

@@ -1847,7 +1847,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
   }
  },
  "nbformat": 4,

    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

notebooks/.ipynb_checkpoints/feature_exploration-checkpoint.ipynb CHANGED Viewed

@@ -3810,7 +3810,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
   }
  },
  "nbformat": 4,

    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

notebooks/.ipynb_checkpoints/feature_performance_similarity-checkpoint.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_selection-checkpoint.ipynb CHANGED Viewed

@@ -1928,7 +1928,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
   }
  },
  "nbformat": 4,

    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

notebooks/.ipynb_checkpoints/feature_variance-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/gedi_representativeness-checkpoint.ipynb CHANGED Viewed

@@ -386,7 +386,7 @@
     "if module_path not in sys.path:\n",
     "    sys.path.append(module_path)\n",
     "\n",
-    "from tag.plotter import FeaturesPlotter"
    ]
   },
   {

     "if module_path not in sys.path:\n",
     "    sys.path.append(module_path)\n",
     "\n",
+    "from gedi.plotter import FeaturesPlotter"
    ]
   },
   {

notebooks/.ipynb_checkpoints/grid_objectives-checkpoint.ipynb DELETED Viewed

@@ -1,376 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "e5aa7223",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "dfd1a302",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.DataFrame(columns=[\"log\",\"ratio_top_20_variants\", \"normalized_sequence_entropy_linear_forgetting\"])    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "218946b7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "k=0\n",
-    "for i in np.arange(0.2, 1.1,0.2):\n",
-    "    for j in np.arange(0,0.55,0.1):\n",
-    "        k+=1\n",
-    "        new_entry = pd.Series({'log':f\"objective_{k}\", \"ratio_top_20_variants\":round(i,1),\n",
-    "                   \"normalized_sequence_entropy_linear_forgetting\":round(j,1)})\n",
-    "        df = pd.concat([\n",
-    "                df, \n",
-    "                pd.DataFrame([new_entry], columns=new_entry.index)]\n",
-    "           ).reset_index(drop=True)\n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "b1e3bb5a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.to_csv(\"../data/grid_objectives.csv\" ,index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "5de45389",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>log</th>\n",
-       "      <th>ratio_top_20_variants</th>\n",
-       "      <th>normalized_sequence_entropy_linear_forgetting</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>objective_1</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>objective_2</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>objective_3</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>objective_4</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>objective_5</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>objective_6</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>objective_7</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>objective_8</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>objective_9</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>objective_10</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>objective_11</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>objective_12</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>objective_13</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>objective_14</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>objective_15</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>objective_16</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>objective_17</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>objective_18</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>objective_19</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>objective_20</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>objective_21</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>objective_22</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>objective_23</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>objective_24</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>24</th>\n",
-       "      <td>objective_25</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>25</th>\n",
-       "      <td>objective_26</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>26</th>\n",
-       "      <td>objective_27</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>27</th>\n",
-       "      <td>objective_28</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>28</th>\n",
-       "      <td>objective_29</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>29</th>\n",
-       "      <td>objective_30</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.5</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "             log  ratio_top_20_variants   \n",
-       "0    objective_1                    0.2  \\\n",
-       "1    objective_2                    0.2   \n",
-       "2    objective_3                    0.2   \n",
-       "3    objective_4                    0.2   \n",
-       "4    objective_5                    0.2   \n",
-       "5    objective_6                    0.2   \n",
-       "6    objective_7                    0.4   \n",
-       "7    objective_8                    0.4   \n",
-       "8    objective_9                    0.4   \n",
-       "9   objective_10                    0.4   \n",
-       "10  objective_11                    0.4   \n",
-       "11  objective_12                    0.4   \n",
-       "12  objective_13                    0.6   \n",
-       "13  objective_14                    0.6   \n",
-       "14  objective_15                    0.6   \n",
-       "15  objective_16                    0.6   \n",
-       "16  objective_17                    0.6   \n",
-       "17  objective_18                    0.6   \n",
-       "18  objective_19                    0.8   \n",
-       "19  objective_20                    0.8   \n",
-       "20  objective_21                    0.8   \n",
-       "21  objective_22                    0.8   \n",
-       "22  objective_23                    0.8   \n",
-       "23  objective_24                    0.8   \n",
-       "24  objective_25                    1.0   \n",
-       "25  objective_26                    1.0   \n",
-       "26  objective_27                    1.0   \n",
-       "27  objective_28                    1.0   \n",
-       "28  objective_29                    1.0   \n",
-       "29  objective_30                    1.0   \n",
-       "\n",
-       "    normalized_sequence_entropy_linear_forgetting  \n",
-       "0                                             0.0  \n",
-       "1                                             0.1  \n",
-       "2                                             0.2  \n",
-       "3                                             0.3  \n",
-       "4                                             0.4  \n",
-       "5                                             0.5  \n",
-       "6                                             0.0  \n",
-       "7                                             0.1  \n",
-       "8                                             0.2  \n",
-       "9                                             0.3  \n",
-       "10                                            0.4  \n",
-       "11                                            0.5  \n",
-       "12                                            0.0  \n",
-       "13                                            0.1  \n",
-       "14                                            0.2  \n",
-       "15                                            0.3  \n",
-       "16                                            0.4  \n",
-       "17                                            0.5  \n",
-       "18                                            0.0  \n",
-       "19                                            0.1  \n",
-       "20                                            0.2  \n",
-       "21                                            0.3  \n",
-       "22                                            0.4  \n",
-       "23                                            0.5  \n",
-       "24                                            0.0  \n",
-       "25                                            0.1  \n",
-       "26                                            0.2  \n",
-       "27                                            0.3  \n",
-       "28                                            0.4  \n",
-       "29                                            0.5  "
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d726a5ae",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

notebooks/.ipynb_checkpoints/oversampling-checkpoint.ipynb DELETED Viewed

@@ -1,6 +0,0 @@
-{
- "cells": [],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 5
-}

notebooks/.ipynb_checkpoints/performance_feature_correlation-checkpoint.ipynb DELETED Viewed

@@ -1,6 +0,0 @@
-{
- "cells": [],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 5
-}

notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/statistics_tasks_to_datasets-checkpoint.ipynb DELETED Viewed

@@ -1,818 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "4827785f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Name</th>\n",
-       "      <th>Short description</th>\n",
-       "      <th>data link</th>\n",
-       "      <th>challenge link</th>\n",
-       "      <th>Citations (Stand Februar 2023)</th>\n",
-       "      <th>Publications</th>\n",
-       "      <th>Process Discovery/ Declarative</th>\n",
-       "      <th>Conformance Checking / Alignment / Replay</th>\n",
-       "      <th>Online / Streaming / Realtime</th>\n",
-       "      <th>Performance (Analysis) / Temporal / Time</th>\n",
-       "      <th>Predict(ive)/  Monitoring/ Prescriptive</th>\n",
-       "      <th>Trace clustering / Clustering</th>\n",
-       "      <th>Preprocessing / Event Abstraction / Event Data Correlation</th>\n",
-       "      <th>Further keywords:</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Sepsis Cases - Event Log</td>\n",
-       "      <td>This real-life event log contains events of se...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Sepsis_Ca...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Sepsis_Ca...</td>\n",
-       "      <td>61</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>17</td>\n",
-       "      <td>7</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>8</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>(machine) learning, (online process) monitorin...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>BPI 2017 - Offer Log</td>\n",
-       "      <td>Contains data from a financial institute inclu...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2017:ch...</td>\n",
-       "      <td>4</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(machine) learning, cloud computing</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Road Traffic Fine Management Process (not BPI)</td>\n",
-       "      <td>A real-life event log taken from an informatio...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Road_Traf...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>95</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>32</td>\n",
-       "      <td>9</td>\n",
-       "      <td>4</td>\n",
-       "      <td>8</td>\n",
-       "      <td>15</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>alarm-based prescriptive process monitoring, b...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>BPI 2011</td>\n",
-       "      <td>Contains data from from a Dutch Academic Hospi...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Real-life...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2011:ch...</td>\n",
-       "      <td>57</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>13</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "      <td>12</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>(compliance) monitoring, (machine) learning, d...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>BPI 2012</td>\n",
-       "      <td>Contains the event log of an application proce...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2012:ch...</td>\n",
-       "      <td>151</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>40</td>\n",
-       "      <td>15</td>\n",
-       "      <td>4</td>\n",
-       "      <td>13</td>\n",
-       "      <td>46</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>(in)frequent patterns in process models, (mach...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>BPI 2013 - Open Problems</td>\n",
-       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/2013/challenge.html</td>\n",
-       "      <td>6</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(in)frequent patterns in process models, (mach...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>BPI 2013 - Closed Problems</td>\n",
-       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2013:ch...</td>\n",
-       "      <td>12</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>3</td>\n",
-       "      <td>(in)frequent patterns in process models</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>BPI 2013 - Incidents</td>\n",
-       "      <td>The log contains events from an incident and p...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/2013/challenge.html</td>\n",
-       "      <td>36</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>14</td>\n",
-       "      <td>5</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>7</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>(machine) learning, rule mining</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>BPI 2014 - Incident Records</td>\n",
-       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2014:ch...</td>\n",
-       "      <td>5</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>privacy preservation, security</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>BPI 2014 - Interaction Records</td>\n",
-       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2014:ch...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(machine) learning, hidden Markov models</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>BPI 2015 - Log 3</td>\n",
-       "      <td>Provided by 5 Dutch municipalities. The data c...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2015:ch...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>specification-driven predictive business proce...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>BPI 2015 - Log 1</td>\n",
-       "      <td>Provided by 5 Dutch municipalities. The data c...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2015:ch...</td>\n",
-       "      <td>8</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>3</td>\n",
-       "      <td>0</td>\n",
-       "      <td>3</td>\n",
-       "      <td>(machine) learning</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>BPI 2016 - Clicks Logged In</td>\n",
-       "      <td>Contains clicks of users that are logged in fr...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2016:ch...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>automation</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>BPI 2017 - Application Log</td>\n",
-       "      <td>Contains data from a financial institute inclu...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2017:ch...</td>\n",
-       "      <td>73</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>11</td>\n",
-       "      <td>5</td>\n",
-       "      <td>2</td>\n",
-       "      <td>14</td>\n",
-       "      <td>23</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>(machine) learning, alarm-based prescriptive p...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>BPI 2018</td>\n",
-       "      <td>The process covers the handling of application...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2018:ch...</td>\n",
-       "      <td>26</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>7</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>8</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>(machine) learning, automation</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>BPI 2020 - Travel Permits</td>\n",
-       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
-       "      <td>2</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>stage-based process performance analysis</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>BPI 2019</td>\n",
-       "      <td>Contains the purchase order handling process o...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://icpmconference.org/2019/icpm-2019/cont...</td>\n",
-       "      <td>35</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>6</td>\n",
-       "      <td>6</td>\n",
-       "      <td>9</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>(online process) monitoring, remaining time pr...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>BPI 2020 - International Declarations</td>\n",
-       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
-       "      <td>2</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(machine) learning, remaining time prediction</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>BPI 2020 - Domestic Declarations</td>\n",
-       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
-       "      <td>7</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(machine) learning, remaining time prediction</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>BPI 2020 - Prepaid Travel Cost</td>\n",
-       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
-       "      <td>2</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>multi-perspective</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>Helpdesk</td>\n",
-       "      <td>Ticketing management process of the Help desk ...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Dataset_b...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>20</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>8</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(machine) learning, drift detection</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>Receipt phase of an environmental permit appli...</td>\n",
-       "      <td>Data originates from the CoSeLoG project where...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Receipt_p...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>15</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Receipt_p...</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>Environmental permit application process (‘WAB...</td>\n",
-       "      <td>Data originates from the CoSeLoG project where...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Environme...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>2</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>predictions with a-priori knowledge</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>Environmental permit application process (‘WAB...</td>\n",
-       "      <td>Data originates from the CoSeLoG project where...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Environme...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>2</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>multidimensional process mining, process cubes</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>24</th>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                                 Name  \\\n",
-       "0                            Sepsis Cases - Event Log   \n",
-       "1                                BPI 2017 - Offer Log   \n",
-       "2      Road Traffic Fine Management Process (not BPI)   \n",
-       "3                                            BPI 2011   \n",
-       "4                                            BPI 2012   \n",
-       "5                            BPI 2013 - Open Problems   \n",
-       "6                          BPI 2013 - Closed Problems   \n",
-       "7                                BPI 2013 - Incidents   \n",
-       "8                         BPI 2014 - Incident Records   \n",
-       "9                      BPI 2014 - Interaction Records   \n",
-       "10                                   BPI 2015 - Log 3   \n",
-       "11                                   BPI 2015 - Log 1   \n",
-       "12                        BPI 2016 - Clicks Logged In   \n",
-       "13                         BPI 2017 - Application Log   \n",
-       "14                                           BPI 2018   \n",
-       "15                          BPI 2020 - Travel Permits   \n",
-       "16                                           BPI 2019   \n",
-       "17              BPI 2020 - International Declarations   \n",
-       "18                   BPI 2020 - Domestic Declarations   \n",
-       "19                     BPI 2020 - Prepaid Travel Cost   \n",
-       "20                                           Helpdesk   \n",
-       "21  Receipt phase of an environmental permit appli...   \n",
-       "22  Environmental permit application process (‘WAB...   \n",
-       "23  Environmental permit application process (‘WAB...   \n",
-       "24                                                NaN   \n",
-       "\n",
-       "                                    Short description  \\\n",
-       "0   This real-life event log contains events of se...   \n",
-       "1   Contains data from a financial institute inclu...   \n",
-       "2   A real-life event log taken from an informatio...   \n",
-       "3   Contains data from from a Dutch Academic Hospi...   \n",
-       "4   Contains the event log of an application proce...   \n",
-       "5   Rabobank Group ICT implemented ITIL processes ...   \n",
-       "6   Rabobank Group ICT implemented ITIL processes ...   \n",
-       "7   The log contains events from an incident and p...   \n",
-       "8   Rabobank Group ICT implemented ITIL processes ...   \n",
-       "9   Rabobank Group ICT implemented ITIL processes ...   \n",
-       "10  Provided by 5 Dutch municipalities. The data c...   \n",
-       "11  Provided by 5 Dutch municipalities. The data c...   \n",
-       "12  Contains clicks of users that are logged in fr...   \n",
-       "13  Contains data from a financial institute inclu...   \n",
-       "14  The process covers the handling of application...   \n",
-       "15  Contains 2 years of data from the reimbursemen...   \n",
-       "16  Contains the purchase order handling process o...   \n",
-       "17  Contains 2 years of data from the reimbursemen...   \n",
-       "18  Contains 2 years of data from the reimbursemen...   \n",
-       "19  Contains 2 years of data from the reimbursemen...   \n",
-       "20  Ticketing management process of the Help desk ...   \n",
-       "21  Data originates from the CoSeLoG project where...   \n",
-       "22  Data originates from the CoSeLoG project where...   \n",
-       "23  Data originates from the CoSeLoG project where...   \n",
-       "24                                                NaN   \n",
-       "\n",
-       "                                            data link  \\\n",
-       "0   https://data.4tu.nl/articles/dataset/Sepsis_Ca...   \n",
-       "1   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "2   https://data.4tu.nl/articles/dataset/Road_Traf...   \n",
-       "3   https://data.4tu.nl/articles/dataset/Real-life...   \n",
-       "4   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "5   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "6   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "7   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "8   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "9   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "10  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "11  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "12  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "13  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "14  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "15  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "16  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "17  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "18  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "19  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "20  https://data.4tu.nl/articles/dataset/Dataset_b...   \n",
-       "21  https://data.4tu.nl/articles/dataset/Receipt_p...   \n",
-       "22  https://data.4tu.nl/articles/dataset/Environme...   \n",
-       "23  https://data.4tu.nl/articles/dataset/Environme...   \n",
-       "24                                                NaN   \n",
-       "\n",
-       "                                       challenge link  \\\n",
-       "0   https://data.4tu.nl/articles/dataset/Sepsis_Ca...   \n",
-       "1   https://www.win.tue.nl/bpi/doku.php?id=2017:ch...   \n",
-       "2                                                 NaN   \n",
-       "3   https://www.win.tue.nl/bpi/doku.php?id=2011:ch...   \n",
-       "4   https://www.win.tue.nl/bpi/doku.php?id=2012:ch...   \n",
-       "5      https://www.win.tue.nl/bpi/2013/challenge.html   \n",
-       "6   https://www.win.tue.nl/bpi/doku.php?id=2013:ch...   \n",
-       "7      https://www.win.tue.nl/bpi/2013/challenge.html   \n",
-       "8   https://www.win.tue.nl/bpi/doku.php?id=2014:ch...   \n",
-       "9   https://www.win.tue.nl/bpi/doku.php?id=2014:ch...   \n",
-       "10  https://www.win.tue.nl/bpi/doku.php?id=2015:ch...   \n",
-       "11  https://www.win.tue.nl/bpi/doku.php?id=2015:ch...   \n",
-       "12  https://www.win.tue.nl/bpi/doku.php?id=2016:ch...   \n",
-       "13  https://www.win.tue.nl/bpi/doku.php?id=2017:ch...   \n",
-       "14  https://www.win.tue.nl/bpi/doku.php?id=2018:ch...   \n",
-       "15     https://icpmconference.org/2020/bpi-challenge/   \n",
-       "16  https://icpmconference.org/2019/icpm-2019/cont...   \n",
-       "17     https://icpmconference.org/2020/bpi-challenge/   \n",
-       "18     https://icpmconference.org/2020/bpi-challenge/   \n",
-       "19     https://icpmconference.org/2020/bpi-challenge/   \n",
-       "20                                                NaN   \n",
-       "21                                                NaN   \n",
-       "22                                                NaN   \n",
-       "23                                                NaN   \n",
-       "24                                                NaN   \n",
-       "\n",
-       "   Citations (Stand Februar 2023)  \\\n",
-       "0                              61   \n",
-       "1                               4   \n",
-       "2                              95   \n",
-       "3                              57   \n",
-       "4                             151   \n",
-       "5                               6   \n",
-       "6                              12   \n",
-       "7                              36   \n",
-       "8                               5   \n",
-       "9                               1   \n",
-       "10                              1   \n",
-       "11                              8   \n",
-       "12                              1   \n",
-       "13                             73   \n",
-       "14                             26   \n",
-       "15                              2   \n",
-       "16                             35   \n",
-       "17                              2   \n",
-       "18                              7   \n",
-       "19                              2   \n",
-       "20                             20   \n",
-       "21                             15   \n",
-       "22                              2   \n",
-       "23                              2   \n",
-       "24                            NaN   \n",
-       "\n",
-       "                                         Publications  \\\n",
-       "0   https://app.dimensions.ai/discover/publication...   \n",
-       "1   https://app.dimensions.ai/discover/publication...   \n",
-       "2   https://app.dimensions.ai/discover/publication...   \n",
-       "3   https://app.dimensions.ai/discover/publication...   \n",
-       "4   https://app.dimensions.ai/discover/publication...   \n",
-       "5   https://app.dimensions.ai/discover/publication...   \n",
-       "6   https://app.dimensions.ai/discover/publication...   \n",
-       "7   https://app.dimensions.ai/discover/publication...   \n",
-       "8   https://app.dimensions.ai/discover/publication...   \n",
-       "9   https://app.dimensions.ai/discover/publication...   \n",
-       "10  https://app.dimensions.ai/discover/publication...   \n",
-       "11  https://app.dimensions.ai/discover/publication...   \n",
-       "12  https://app.dimensions.ai/discover/publication...   \n",
-       "13  https://app.dimensions.ai/discover/publication...   \n",
-       "14  https://app.dimensions.ai/discover/publication...   \n",
-       "15  https://app.dimensions.ai/discover/publication...   \n",
-       "16  https://app.dimensions.ai/discover/publication...   \n",
-       "17  https://app.dimensions.ai/discover/publication...   \n",
-       "18  https://app.dimensions.ai/discover/publication...   \n",
-       "19  https://app.dimensions.ai/discover/publication...   \n",
-       "20  https://app.dimensions.ai/discover/publication...   \n",
-       "21  https://data.4tu.nl/articles/dataset/Receipt_p...   \n",
-       "22  https://app.dimensions.ai/discover/publication...   \n",
-       "23  https://app.dimensions.ai/discover/publication...   \n",
-       "24                                                NaN   \n",
-       "\n",
-       "   Process Discovery/ Declarative Conformance Checking / Alignment / Replay  \\\n",
-       "0                              17                                         7   \n",
-       "1                               1                                         0   \n",
-       "2                              32                                         9   \n",
-       "3                              13                                         1   \n",
-       "4                              40                                        15   \n",
-       "5                               1                                         0   \n",
-       "6                               3                                         2   \n",
-       "7                              14                                         5   \n",
-       "8                               1                                         0   \n",
-       "9                               0                                         0   \n",
-       "10                              0                                         0   \n",
-       "11                              1                                         1   \n",
-       "12                              1                                         0   \n",
-       "13                             11                                         5   \n",
-       "14                              7                                         1   \n",
-       "15                              0                                         0   \n",
-       "16                              3                                         1   \n",
-       "17                              0                                         0   \n",
-       "18                              0                                         2   \n",
-       "19                              0                                         0   \n",
-       "20                              4                                         1   \n",
-       "21                             -1                                        -1   \n",
-       "22                              0                                         0   \n",
-       "23                              1                                         0   \n",
-       "24                            NaN                                       NaN   \n",
-       "\n",
-       "   Online / Streaming / Realtime Performance (Analysis) / Temporal / Time  \\\n",
-       "0                              4                                        1   \n",
-       "1                              0                                        1   \n",
-       "2                              4                                        8   \n",
-       "3                              3                                        4   \n",
-       "4                              4                                       13   \n",
-       "5                              0                                        0   \n",
-       "6                              1                                        2   \n",
-       "7                              1                                        1   \n",
-       "8                              0                                        0   \n",
-       "9                              0                                        0   \n",
-       "10                             0                                        0   \n",
-       "11                             0                                        0   \n",
-       "12                             1                                        0   \n",
-       "13                             2                                       14   \n",
-       "14                             2                                        0   \n",
-       "15                             0                                        1   \n",
-       "16                             6                                        6   \n",
-       "17                             0                                        1   \n",
-       "18                             2                                        2   \n",
-       "19                             0                                        0   \n",
-       "20                             3                                        1   \n",
-       "21                            -1                                       -1   \n",
-       "22                             0                                        0   \n",
-       "23                             0                                        0   \n",
-       "24                           NaN                                      NaN   \n",
-       "\n",
-       "   Predict(ive)/  Monitoring/ Prescriptive Trace clustering / Clustering  \\\n",
-       "0                                        8                             2   \n",
-       "1                                        1                             0   \n",
-       "2                                       15                             1   \n",
-       "3                                       12                             4   \n",
-       "4                                       46                             0   \n",
-       "5                                        1                             0   \n",
-       "6                                        0                             0   \n",
-       "7                                        7                             0   \n",
-       "8                                        0                             0   \n",
-       "9                                        0                             0   \n",
-       "10                                       1                             0   \n",
-       "11                                       3                             0   \n",
-       "12                                       0                             0   \n",
-       "13                                      23                             1   \n",
-       "14                                       8                             0   \n",
-       "15                                       0                             0   \n",
-       "16                                       9                             4   \n",
-       "17                                       2                             0   \n",
-       "18                                       3                             0   \n",
-       "19                                       0                             0   \n",
-       "20                                       8                             0   \n",
-       "21                                      -1                            -1   \n",
-       "22                                       1                             0   \n",
-       "23                                       0                             0   \n",
-       "24                                     NaN                           NaN   \n",
-       "\n",
-       "   Preprocessing / Event Abstraction / Event Data Correlation  \\\n",
-       "0                                                   2           \n",
-       "1                                                   0           \n",
-       "2                                                   2           \n",
-       "3                                                   1           \n",
-       "4                                                   1           \n",
-       "5                                                   0           \n",
-       "6                                                   3           \n",
-       "7                                                   2           \n",
-       "8                                                   0           \n",
-       "9                                                   0           \n",
-       "10                                                  0           \n",
-       "11                                                  3           \n",
-       "12                                                  0           \n",
-       "13                                                  1           \n",
-       "14                                                  2           \n",
-       "15                                                  0           \n",
-       "16                                                  1           \n",
-       "17                                                  0           \n",
-       "18                                                  0           \n",
-       "19                                                  0           \n",
-       "20                                                  0           \n",
-       "21                                                 -1           \n",
-       "22                                                  0           \n",
-       "23                                                  0           \n",
-       "24                                                NaN           \n",
-       "\n",
-       "                                    Further keywords:  \n",
-       "0   (machine) learning, (online process) monitorin...  \n",
-       "1                 (machine) learning, cloud computing  \n",
-       "2   alarm-based prescriptive process monitoring, b...  \n",
-       "3   (compliance) monitoring, (machine) learning, d...  \n",
-       "4   (in)frequent patterns in process models, (mach...  \n",
-       "5   (in)frequent patterns in process models, (mach...  \n",
-       "6             (in)frequent patterns in process models  \n",
-       "7                     (machine) learning, rule mining  \n",
-       "8                      privacy preservation, security  \n",
-       "9            (machine) learning, hidden Markov models  \n",
-       "10  specification-driven predictive business proce...  \n",
-       "11                                 (machine) learning  \n",
-       "12                                         automation  \n",
-       "13  (machine) learning, alarm-based prescriptive p...  \n",
-       "14                     (machine) learning, automation  \n",
-       "15           stage-based process performance analysis  \n",
-       "16  (online process) monitoring, remaining time pr...  \n",
-       "17      (machine) learning, remaining time prediction  \n",
-       "18      (machine) learning, remaining time prediction  \n",
-       "19                                  multi-perspective  \n",
-       "20                (machine) learning, drift detection  \n",
-       "21                                                NaN  \n",
-       "22                predictions with a-priori knowledge  \n",
-       "23     multidimensional process mining, process cubes  \n",
-       "24                                                NaN  "
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "#import pm4py\n",
-    "import pandas as pd\n",
-    "INPUT_PATH = \"../data/mappings.csv\"\n",
-    "df = pd.read_csv(INPUT_PATH, sep = \";\", dtype = \"unicode\")\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "04a97f37",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

notebooks/.ipynb_checkpoints/test_feed-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/benchmarking_process_discovery.ipynb CHANGED Viewed

@@ -1277,7 +1277,7 @@
     "\n",
     "import sys\n",
     "import os\n",
-    "sys.path.append(os.path.dirname(\"../tag/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "print(benchmarked_ft.shape, benchmarked_pd.shape)\n",
@@ -1422,7 +1422,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
   }
  },
  "nbformat": 4,

     "\n",
     "import sys\n",
     "import os\n",
+    "sys.path.append(os.path.dirname(\"../gedi/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "print(benchmarked_ft.shape, benchmarked_pd.shape)\n",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

notebooks/bpic_generability_pdm.ipynb CHANGED Viewed

@@ -1223,7 +1223,7 @@
     "from scipy.stats import pearsonr\n",
     "import sys\n",
     "import os\n",
-    "sys.path.append(os.path.dirname(\"../tag/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "\n",

     "from scipy.stats import pearsonr\n",
     "import sys\n",
     "import os\n",
+    "sys.path.append(os.path.dirname(\"../gedi/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "\n",

notebooks/experiment_generator.ipynb CHANGED Viewed

@@ -2225,7 +2225,7 @@
    ],
    "source": [
     "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n",
-    "#bpic_features = pd.read_csv(\"../tag/output/features/real_event_logs.csv\", index_col=None)\n",
     "\n",
     "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n",
     "print(bpic_features.shape)\n",
@@ -3102,7 +3102,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
   }
  },
  "nbformat": 4,

    ],
    "source": [
     "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n",
+    "#bpic_features = pd.read_csv(\"../gedi/output/features/real_event_logs.csv\", index_col=None)\n",
     "\n",
     "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n",
     "print(bpic_features.shape)\n",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

notebooks/feature_distributions.ipynb CHANGED Viewed

@@ -1847,7 +1847,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
   }
  },
  "nbformat": 4,

    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

notebooks/feature_exploration.ipynb CHANGED Viewed

@@ -3810,7 +3810,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
   }
  },
  "nbformat": 4,

    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

notebooks/feature_performance_similarity.ipynb CHANGED Viewed

@@ -319,7 +319,7 @@
     "from scipy.stats import pearsonr\n",
     "import sys\n",
     "import os\n",
-    "sys.path.append(os.path.dirname(\"../tag/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "\n",
@@ -1833,7 +1833,7 @@
     "from scipy.stats import pearsonr\n",
     "import sys\n",
     "import os\n",
-    "sys.path.append(os.path.dirname(\"../tag/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "\n",
@@ -2133,7 +2133,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
   }
  },
  "nbformat": 4,

     "from scipy.stats import pearsonr\n",
     "import sys\n",
     "import os\n",
+    "sys.path.append(os.path.dirname(\"../gedi/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "\n",
     "from scipy.stats import pearsonr\n",
     "import sys\n",
     "import os\n",
+    "sys.path.append(os.path.dirname(\"../gedi/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "\n",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

notebooks/feature_selection.ipynb CHANGED Viewed

@@ -1928,7 +1928,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
   }
  },
  "nbformat": 4,

    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

notebooks/gedi_representativeness.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff