Spaces:

andreamalhera
/

igedi

Sleeping

App Files Files Community

Andrea MH commited on Apr 21, 2024

Commit

761e409

unverified ·

2 Parent(s): 8742124 973f5db

Merge pull request #6 from lmu-dbs/5-automation-test-gedi-automatically

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.github/workflows/test_gedi.yml +162 -0
.gitignore +5 -1
README.md +1 -1
config.py +3 -3
config_files/algorithm/benchmark.json +1 -2
config_files/algorithm/evaluation_plotter.json +7 -5
config_files/algorithm/experiment_test.json +7 -7
config_files/algorithm/feature_extraction.json +1 -1
config_files/algorithm/generation.json +2 -5
data/2_grid_test.csv +3 -3
data/{test_2 → test}/gen_el_168.xes +0 -0
data/{test_2 → test}/gen_el_169.xes +0 -0
data/test/grid_feat.csv +3 -0
data/test/plotter/1_enve_feat.csv +12 -0
data/test/plotter/grid_1objectives_enve.csv +12 -0
execute_grid_experiments.py +1 -1
gedi/__init__.py +8 -0
{tag → gedi}/analyser.py +3 -3
{tag → gedi}/augmentation.py +1 -1
{tag → gedi}/benchmark.py +3 -3
{tag → gedi}/features.py +1 -1
{tag → gedi}/generator.py +2 -2
{tag → gedi}/plotter.py +6 -5
{tag → gedi}/utils/algorithms/__init__.py +0 -0
{tag → gedi}/utils/algorithms/tsne.py +0 -0
{tag → gedi}/utils/array_tools.py +0 -0
{tag → gedi}/utils/io_helpers.py +0 -0
{tag → gedi}/utils/matrix_tools.py +0 -0
main.py +9 -9
notebooks/.ipynb_checkpoints/augmentation-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/benchmarking_process_discovery-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/bpic_generability_pdm-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/data_exploration-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/experiment_generator-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_distributions-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_exploration-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_performance_similarity-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_selection-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_variance-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/gedi_representativeness-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/grid_objectives-checkpoint.ipynb +0 -376
notebooks/.ipynb_checkpoints/oversampling-checkpoint.ipynb +0 -6
notebooks/.ipynb_checkpoints/performance_feature_correlation-checkpoint.ipynb +0 -6
notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/statistics_tasks_to_datasets-checkpoint.ipynb +0 -818
notebooks/.ipynb_checkpoints/test_feed-checkpoint.ipynb +0 -0
notebooks/benchmarking_process_discovery.ipynb +2 -2
notebooks/bpic_generability_pdm.ipynb +1 -1
notebooks/experiment_generator.ipynb +2 -2
notebooks/feature_distributions.ipynb +1 -1

.github/workflows/test_gedi.yml ADDED Viewed

	@@ -0,0 +1,162 @@

+name: GEDI Test
+# Specifies when the action should run
+on:
+  pull_request:
+    branches:
+      - main
+# Specifies the jobs that are to be run
+jobs:
+  test_feature-extraction:
+    runs-on: ubuntu-latest
+    # Setting up a python envronment  for the test script to run
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.9
+    - name: Install feeed
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+    - name: Run test
+      run:
+        python main.py -o config_files/options/baseline.json -a config_files/algorithm/feature_extraction.json
+    - name: Compare output
+      run: diff data/test_feat.csv data/test_feat.csv
+  test_generation:
+    runs-on: ubuntu-latest
+    # Setting up a python envronment  for the test script to run
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.9
+    - name: Install dependencies
+      run: |
+        sudo apt-get install build-essential python3 python3-dev
+    - name: Install feeed
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+    - name: Run test
+      run:
+        python main.py -o config_files/options/baseline.json -a config_files/algorithm/generation.json
+    - name: Compare output
+      run: diff output/features/grid_feat/2_enself_rt20v/genELexperiment2_07_04.json output/features/grid_feat/2_enself_rt20v/genELexperiment2_07_04.json
+  test_benchmark:
+    runs-on: ubuntu-latest
+    # Setting up a python envronment  for the test script to run
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.9
+    - name: Install feeed
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+    - name: Run test
+      run:
+        python main.py -o config_files/options/baseline.json -a config_files/algorithm/benchmark.json
+    - name: Compare output
+      run: diff output/benchmark/test_benchmark.csv output/benchmark/test_benchmark.csv
+  test_augmentation:
+    runs-on: ubuntu-latest
+    # Setting up a python envronment  for the test script to run
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.9
+    - name: Install feeed
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+    - name: Run test
+      run:
+        python main.py -o config_files/options/baseline.json -a config_files/algorithm/augmentation.json
+  test_evaluation-plotter:
+    runs-on: ubuntu-latest
+    # Setting up a python envronment  for the test script to run
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.9
+    - name: Install dependencies
+      run: |
+        sudo apt-get install build-essential python3 python3-dev
+    - name: Install feeed
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+    - name: Run test
+      run:
+        python main.py -o config_files/options/baseline.json -a config_files/algorithm/evaluation_plotter.json
+  test_integration:
+    runs-on: ubuntu-latest
+    # Setting up a python envronment  for the test script to run
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.9
+    - name: Install dependencies
+      run: |
+        sudo apt-get install build-essential python3 python3-dev
+    - name: Install feeed
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+    - name: Run test
+      run:
+        python main.py -o config_files/options/baseline.json -a config_files/algorithm/experiment_test.json

.gitignore CHANGED Viewed

@@ -1,3 +1,7 @@
 smac3_output/
 data/
-output/

 smac3_output/
 data/
+output/
+.ipynb_checkpoints/
+notebooks/.ipynb_checkpoints/*
+gedi.egg-info/
+build/

README.md CHANGED Viewed

@@ -32,7 +32,7 @@ python main.py -o config_files/options/baseline.json -a config_files/algorithm/e
 ## Usage
 Our pipeline offers several pipeline steps, which can be run sequentially or partially:
 - feature_extraction
-- event_logs_generation
 - benchmark
 - evaluation_plotter

 ## Usage
 Our pipeline offers several pipeline steps, which can be run sequentially or partially:
 - feature_extraction
+- generation
 - benchmark
 - evaluation_plotter

config.py CHANGED Viewed

@@ -2,7 +2,7 @@ import json
 import os
 import warnings
-from tag.utils.io_helpers import sort_files
 from tqdm import tqdm
 from utils.param_keys import INPUT_NAME, FILENAME, FOLDER_PATH, PARAMS
@@ -63,8 +63,8 @@ def get_files_and_kwargs(params: dict):
     #TODO: generate parent directories if they don't exist
     if input_name == 'test':
-        filename_list = list(tqdm(sort_files(os.listdir('data/test_2'))))
-        kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/test_2'}
     elif input_name == 'realLogs':
         filename_list = list(tqdm(sort_files(os.listdir('data/real_event_logs'))))
         kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/real_event_logs'}

 import os
 import warnings
+from gedi.utils.io_helpers import sort_files
 from tqdm import tqdm
 from utils.param_keys import INPUT_NAME, FILENAME, FOLDER_PATH, PARAMS
     #TODO: generate parent directories if they don't exist
     if input_name == 'test':
+        filename_list = list(tqdm(sort_files(os.listdir('data/test'))))
+        kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/test'}
     elif input_name == 'realLogs':
         filename_list = list(tqdm(sort_files(os.listdir('data/real_event_logs'))))
         kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/real_event_logs'}

config_files/algorithm/benchmark.json CHANGED Viewed

@@ -2,8 +2,7 @@
   {
     "pipeline_step": "benchmark_test",
     "benchmark_test": "discovery",
-    "input_path":"data/test_2",
-    "input_path":"data/test_2/gen_el_168.xes",
     "output_path":"output",
     "miners" : ["inductive", "heuristics", "imf", "ilp"]
   }

   {
     "pipeline_step": "benchmark_test",
     "benchmark_test": "discovery",
+    "input_path":"data/test",
     "output_path":"output",
     "miners" : ["inductive", "heuristics", "imf", "ilp"]
   }

config_files/algorithm/evaluation_plotter.json CHANGED Viewed

@@ -2,16 +2,18 @@
     {
         "pipeline_step": "evaluation_plotter",
         "input_path": "output/features/generated/34_bpic_features/",
-        "input_path": "output/features/generated/grid_1obj/1_enve_feat.csv",
         "input_path": "output/features/generated/grid_2obj/",
         "input_path": ["output/features/generated/grid_1obj/", "output/features/generated/grid_2obj/"],
-        "output_path": "output/plots",
         "reference_feature": "epa_normalized_sequence_entropy",
-        "reference_feature": "epa_normalized_variant_entropy",
         "reference_feature": "epa_normalized_sequence_entropy_exponential_forgetting",
         "targets": "data/34_bpic_features.csv",
-        "targets": "data/grid_experiments/grid_1obj/grid_1objectives_enve.csv",
         "targets": "data/grid_experiments/grid_2obj/",
-        "targets": ["data/grid_experiments/grid_1obj/", "data/grid_experiments/grid_2obj/"]
     }
 ]

     {
         "pipeline_step": "evaluation_plotter",
         "input_path": "output/features/generated/34_bpic_features/",
         "input_path": "output/features/generated/grid_2obj/",
         "input_path": ["output/features/generated/grid_1obj/", "output/features/generated/grid_2obj/"],
+        "input_path": "output/features/generated/grid_1obj/1_enve_feat.csv",
+        "input_path": "data/test/plotter/1_enve_feat.csv",
         "reference_feature": "epa_normalized_sequence_entropy",
         "reference_feature": "epa_normalized_sequence_entropy_exponential_forgetting",
+        "reference_feature": "epa_normalized_variant_entropy",
         "targets": "data/34_bpic_features.csv",
         "targets": "data/grid_experiments/grid_2obj/",
+        "targets": ["data/grid_experiments/grid_1obj/", "data/grid_experiments/grid_2obj/"],
+        "targets": "data/grid_experiments/grid_1obj/grid_1objectives_enve.csv",
+        "targets": "data/test/plotter/grid_1objectives_enve.csv",
+        "output_path": "output/plots"
     }
 ]

config_files/algorithm/experiment_test.json CHANGED Viewed

@@ -9,16 +9,16 @@
   {
     "pipeline_step": "event_logs_generation",
     "output_path": "output/features/2_bpic_features/2_ense_rmcv_feat.csv",
-    "output_path": "data/test_2",
     "generator_params": {
       "experiment": "data/grid_objectives.csv",
       "experiment": {"input_path": "data/2_bpic_features.csv",
         "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
       "experiment": [
-          {"epa_normalized_sequence_entropy_linear_forgetting": 0.05, "ratio_top_20_variants": 0.4},
-          {"epa_normalized_sequence_entropy_linear_forgetting": 0.5, "ratio_top_20_variants": 0.04}
         ],
-      "experiment": {"epa_normalized_sequence_entropy_linear_forgetting": 0.05, "ratio_top_20_variants": 0.4},
       "config_space": {
         "mode": [5, 20],
         "sequence": [0.01, 1],
@@ -27,7 +27,7 @@
         "loop": [0.01, 1],
         "silent": [0.01, 1],
         "lt_dependency": [0.01, 1],
-        "num_traces": [100, 10001],
         "duplicate": [0],
         "or": [0]
       },
@@ -36,7 +36,7 @@
   },
   {
     "pipeline_step": "feature_extraction",
-    "input_path": "data/test_2",
     "feature_params": {"feature_set":["trace_length"]},
     "output_path": "output/plots",
     "real_eventlog_path": "data/bpic_features.csv",
@@ -45,7 +45,7 @@
   {
     "pipeline_step": "benchmark_test",
     "benchmark_test": "discovery",
-    "input_path":"data/test_2",
     "output_path":"output",
     "miners" : ["inductive", "heuristics", "imf", "ilp"]
   }

   {
     "pipeline_step": "event_logs_generation",
     "output_path": "output/features/2_bpic_features/2_ense_rmcv_feat.csv",
+    "output_path": "data/test",
     "generator_params": {
       "experiment": "data/grid_objectives.csv",
       "experiment": {"input_path": "data/2_bpic_features.csv",
         "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
       "experiment": [
+          {"epa_normalized_sequence_entropy_linear_forgetting": 0.2, "ratio_top_20_variants": 0.4},
+          {"epa_normalized_sequence_entropy_linear_forgetting": 0.4, "ratio_top_20_variants": 0.7}
         ],
+      "experiment": {"epa_normalized_sequence_entropy_linear_forgetting": 0.2, "ratio_top_20_variants": 0.4},
       "config_space": {
         "mode": [5, 20],
         "sequence": [0.01, 1],
         "loop": [0.01, 1],
         "silent": [0.01, 1],
         "lt_dependency": [0.01, 1],
+        "num_traces": [10, 100],
         "duplicate": [0],
         "or": [0]
       },
   },
   {
     "pipeline_step": "feature_extraction",
+    "input_path": "data/test",
     "feature_params": {"feature_set":["trace_length"]},
     "output_path": "output/plots",
     "real_eventlog_path": "data/bpic_features.csv",
   {
     "pipeline_step": "benchmark_test",
     "benchmark_test": "discovery",
+    "input_path":"data/test",
     "output_path":"output",
     "miners" : ["inductive", "heuristics", "imf", "ilp"]
   }

config_files/algorithm/feature_extraction.json CHANGED Viewed

@@ -1,7 +1,7 @@
 [
   {
     "pipeline_step": "feature_extraction",
-    "input_path": "data/test_2",
     "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
     "output_path": "output/plots",
     "real_eventlog_path": "data/bpic_features.csv",

 [
   {
     "pipeline_step": "feature_extraction",
+    "input_path": "data/test",
     "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
     "output_path": "output/plots",
     "real_eventlog_path": "data/bpic_features.csv",

config_files/algorithm/generation.json CHANGED Viewed

@@ -3,11 +3,8 @@
     "pipeline_step": "event_logs_generation",
     "output_path": "output",
     "generator_params": {
-      "experiment": {
-        "input_path": "data/grid_objectives.csv",
-        "objectives": ["epa_normalized_variant_entropy"],
-        "objectives": ["ratio_most_common_variant", "epa_normalized_sequence_entropy"],
-        "objectives": ["ratio_top_20_variants","epa_normalized_sequence_entropy_linear_forgetting"]
       },
       "config_space": {
         "mode": [5, 20],

     "pipeline_step": "event_logs_generation",
     "output_path": "output",
     "generator_params": {
+      "experiment": {"input_path": "data/test/grid_feat.csv",
+        "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]
       },
       "config_space": {
         "mode": [5, 20],

data/2_grid_test.csv CHANGED Viewed

@@ -1,3 +1,3 @@
-task,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting
-task_1,0.0,0.0
-task_2,0.0,0.1

+log,ratio_top_20_variants,epa_normalized_sequence_entropy_linear_forgetting
+experiment1,0.2,0.4
+experiment2,0.4,0.7

data/{test_2 → test}/gen_el_168.xes RENAMED Viewed

File without changes

data/{test_2 → test}/gen_el_169.xes RENAMED Viewed

File without changes

data/test/grid_feat.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+log,ratio_top_20_variants,epa_normalized_sequence_entropy_linear_forgetting
+experiment1,0.2,0.4
+experiment2,0.4,0.7

data/test/plotter/1_enve_feat.csv ADDED Viewed

	@@ -0,0 +1,12 @@

+epa_normalized_variant_entropy,log
+0.41202322946059605,task_5
+0.79999386158591,task_9
+0.8925919422394111,task_10
+0.493812449168448,task_6
+0.20299577565110202,task_3
+0.337263992015401,task_4
+0.0,task_1
+0.102184538023266,task_2
+0.600006599245775,task_7
+0.6999779396851361,task_8
+0.8796185572534461,task_11

data/test/plotter/grid_1objectives_enve.csv ADDED Viewed

	@@ -0,0 +1,12 @@

+task,epa_normalized_variant_entropy
+task_1,0.0
+task_2,0.1
+task_3,0.2
+task_4,0.3
+task_5,0.4
+task_6,0.5
+task_7,0.6
+task_8,0.7
+task_9,0.8
+task_10,0.9
+task_11,1.0

execute_grid_experiments.py CHANGED Viewed

@@ -2,7 +2,7 @@ import multiprocessing
 import os
 from datetime import datetime as dt
-from tag.utils.io_helpers import sort_files
 from tqdm import tqdm
 #TODO: Pass i properly

 import os
 from datetime import datetime as dt
+from gedi.utils.io_helpers import sort_files
 from tqdm import tqdm
 #TODO: Pass i properly

gedi/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from .generator import GenerateEventLogs
+from .features import EventLogFeatures
+from .analyser import FeatureAnalyser
+from .augmentation import InstanceAugmentator
+from .benchmark import BenchmarkTest
+from .plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
+__all__=[ 'GenerateEventLogs', 'EventLogFeatures', 'FeatureAnalyser', 'InstanceAugmentator', 'BenchmarkTest', 'BenchmarkPlotter', 'FeaturesPlotter', 'AugmentationPlotter', 'GenerationPlotter']

{tag → gedi}/analyser.py RENAMED Viewed

@@ -4,9 +4,9 @@ import warnings
 from sklearn.decomposition import FastICA, PCA
 from sklearn.manifold import TSNE
 from sklearn.preprocessing import Normalizer, StandardScaler
-from tag.features import EventLogFeatures
-from tag.plotter import ModelResultPlotter
-from tag.utils.matrix_tools import insert_missing_data
 # TODO: Call param_keys explicitly e.g. import INPUT_PATH
 from utils.param_keys import *
 from utils.param_keys.analyser import MODEL, INPUT_PARAMS, PERPLEXITY

 from sklearn.decomposition import FastICA, PCA
 from sklearn.manifold import TSNE
 from sklearn.preprocessing import Normalizer, StandardScaler
+from gedi.features import EventLogFeatures
+from gedi.plotter import ModelResultPlotter
+from gedi.utils.matrix_tools import insert_missing_data
 # TODO: Call param_keys explicitly e.g. import INPUT_PATH
 from utils.param_keys import *
 from utils.param_keys.analyser import MODEL, INPUT_PARAMS, PERPLEXITY

{tag → gedi}/augmentation.py RENAMED Viewed

@@ -3,7 +3,7 @@ from collections import Counter
 from datetime import datetime as dt
 from imblearn.over_sampling import SMOTE, SVMSMOTE, BorderlineSMOTE, KMeansSMOTE
 from sklearn.preprocessing import Normalizer
-from tag.utils.matrix_tools import insert_missing_data
 from utils.param_keys import INPUT_PATH, OUTPUT_PATH
 from utils.param_keys.augmentation import AUGMENTATION_PARAMS, NO_SAMPLES, FEATURE_SELECTION, METHOD

 from datetime import datetime as dt
 from imblearn.over_sampling import SMOTE, SVMSMOTE, BorderlineSMOTE, KMeansSMOTE
 from sklearn.preprocessing import Normalizer
+from gedi.utils.matrix_tools import insert_missing_data
 from utils.param_keys import INPUT_PATH, OUTPUT_PATH
 from utils.param_keys.augmentation import AUGMENTATION_PARAMS, NO_SAMPLES, FEATURE_SELECTION, METHOD

{tag → gedi}/benchmark.py RENAMED Viewed

@@ -16,7 +16,7 @@ from pm4py.algo.evaluation.generalization import algorithm as generalization_eva
 from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
 from pm4py.objects.bpmn.obj import BPMN
 from pm4py.objects.log.importer.xes import importer as xes_importer
-from tag.utils.io_helpers import dump_features_json
 from tqdm import tqdm
 from utils.param_keys import INPUT_PATH, OUTPUT_PATH
 from utils.param_keys.benchmark import MINERS
@@ -113,14 +113,14 @@ class BenchmarkTest:
         return
     def split_miner_wrapper(self, log_path="data/real_event_logs/BPI_Challenges/BPI_Challenge_2012.xes"):
-        jar_path = os.path.join("tag","libs","split-miner-1.7.1-all.jar")
         filename = os.path.split(log_path)[-1].rsplit(".",1)[0]
         bpmn_path = os.path.join("output", "bpmns_split", filename)
         os.makedirs(os.path.split(bpmn_path)[0], exist_ok=True)
         command = [
                 "java",
                 "-cp",
-                f"{os.getcwd()}/tag/libs/sm2.jar:{os.getcwd()}/tag/libs/lib/*",
                 "au.edu.unimelb.services.ServiceProvider",
                 "SM2",
                 f"{os.getcwd()}/{log_path}",

 from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
 from pm4py.objects.bpmn.obj import BPMN
 from pm4py.objects.log.importer.xes import importer as xes_importer
+from gedi.utils.io_helpers import dump_features_json
 from tqdm import tqdm
 from utils.param_keys import INPUT_PATH, OUTPUT_PATH
 from utils.param_keys.benchmark import MINERS
         return
     def split_miner_wrapper(self, log_path="data/real_event_logs/BPI_Challenges/BPI_Challenge_2012.xes"):
+        jar_path = os.path.join("gedi","libs","split-miner-1.7.1-all.jar")
         filename = os.path.split(log_path)[-1].rsplit(".",1)[0]
         bpmn_path = os.path.join("output", "bpmns_split", filename)
         os.makedirs(os.path.split(bpmn_path)[0], exist_ok=True)
         command = [
                 "java",
                 "-cp",
+                f"{os.getcwd()}/gedi/libs/sm2.jar:{os.getcwd()}/tag/libs/lib/*",
                 "au.edu.unimelb.services.ServiceProvider",
                 "SM2",
                 f"{os.getcwd()}/{log_path}",

{tag → gedi}/features.py RENAMED Viewed

@@ -11,7 +11,7 @@ from pathlib import Path, PurePath
 from sklearn.impute import SimpleImputer
 from utils.param_keys import INPUT_PATH
 from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
-from tag.utils.io_helpers import dump_features_json
 def get_sortby_parameter(elem):
     number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])

 from sklearn.impute import SimpleImputer
 from utils.param_keys import INPUT_PATH
 from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
+from gedi.utils.io_helpers import dump_features_json
 def get_sortby_parameter(elem):
     number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])

{tag → gedi}/generator.py RENAMED Viewed

@@ -20,7 +20,7 @@ from pm4py.sim import play_out
 from smac import HyperparameterOptimizationFacade, Scenario
 from utils.param_keys import OUTPUT_PATH, INPUT_PATH
 from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
-from tag.utils.io_helpers import get_output_key_value_location, dump_features_json, read_csvs
@@ -73,7 +73,7 @@ def get_tasks(experiment, output_path="", reference_feature=None):
     return tasks, output_path
 class GenerateEventLogs():
-    # TODO: Clarify nomenclature: experiment, task, objective as in notebook (https://github.com/lmu-dbs/tag/blob/main/notebooks/grid_objectives.ipynb)
     def __init__(self, params):
         print("=========================== Generator ==========================")
         print(f"INFO: Running with {params}")

 from smac import HyperparameterOptimizationFacade, Scenario
 from utils.param_keys import OUTPUT_PATH, INPUT_PATH
 from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
+from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, read_csvs
     return tasks, output_path
 class GenerateEventLogs():
+    # TODO: Clarify nomenclature: experiment, task, objective as in notebook (https://github.com/lmu-dbs/gedi/blob/main/notebooks/grid_objectives.ipynb)
     def __init__(self, params):
         print("=========================== Generator ==========================")
         print(f"INFO: Running with {params}")

{tag → gedi}/plotter.py RENAMED Viewed

@@ -20,9 +20,9 @@ from collections import defaultdict
 from sklearn.preprocessing import Normalizer, StandardScaler
 from sklearn.decomposition import PCA
 from sklearn.metrics.pairwise import euclidean_distances
-from tag.generator import get_tasks
-from tag.utils.io_helpers import get_keys_abbreviation
-from tag.utils.io_helpers import read_csvs, select_instance
 def insert_newlines(string, every=140):
     return '\n'.join(string[i:i+every] for i in range(0, len(string), every))
@@ -331,6 +331,7 @@ class FeaturesPlotter:
             fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type})")
         if output_path != None:
             fig.savefig(output_path)
             print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
@@ -617,7 +618,7 @@ class AugmentationPlotter(object):
         if output_path != None:
             output_path += f"/augmentation_pca_{n_features}_{self.sampler}.jpg"
             fig.savefig(output_path)
-            print("SUCCESS: Saved augmentation pca plot at {output_path}")
 class GenerationPlotter(object):
@@ -672,7 +673,7 @@ class GenerationPlotter(object):
                 targets = orig_targets.copy()
             elif isinstance(orig_targets, defaultdict):
                 if k not in orig_targets:
-                    print("[WARNING] {k} not in targets. Only in generated features. Will continue with next feature to compare with")
                     continue
                 targets = orig_targets[k].copy()
             else:

 from sklearn.preprocessing import Normalizer, StandardScaler
 from sklearn.decomposition import PCA
 from sklearn.metrics.pairwise import euclidean_distances
+from gedi.generator import get_tasks
+from gedi.utils.io_helpers import get_keys_abbreviation
+from gedi.utils.io_helpers import read_csvs, select_instance
 def insert_newlines(string, every=140):
     return '\n'.join(string[i:i+every] for i in range(0, len(string), every))
             fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type})")
         if output_path != None:
+            os.makedirs(os.path.split(output_path)[0], exist_ok=True)
             fig.savefig(output_path)
             print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
         if output_path != None:
             output_path += f"/augmentation_pca_{n_features}_{self.sampler}.jpg"
             fig.savefig(output_path)
+            print(f"SUCCESS: Saved augmentation pca plot at {output_path}")
 class GenerationPlotter(object):
                 targets = orig_targets.copy()
             elif isinstance(orig_targets, defaultdict):
                 if k not in orig_targets:
+                    print(f"[WARNING] {k} not in targets. Only in generated features. Will continue with next feature to compare with")
                     continue
                 targets = orig_targets[k].copy()
             else:

{tag → gedi}/utils/algorithms/__init__.py RENAMED Viewed

File without changes

{tag → gedi}/utils/algorithms/tsne.py RENAMED Viewed

File without changes

{tag → gedi}/utils/array_tools.py RENAMED Viewed

File without changes

{tag → gedi}/utils/io_helpers.py RENAMED Viewed

File without changes

{tag → gedi}/utils/matrix_tools.py RENAMED Viewed

File without changes

main.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import config
 import pandas as pd
 from datetime import datetime as dt
-from tag.generator import GenerateEventLogs
-from tag.features import EventLogFeatures
-from tag.analyser import FeatureAnalyser
-from tag.augmentation import InstanceAugmentator
-from tag.benchmark import BenchmarkTest
-from tag.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
 from utils.default_argparse import ArgParser
 from utils.param_keys import *
 from utils.param_keys.run_options import *
@@ -57,8 +57,8 @@ def run(kwargs:dict, model_paramas_list: list, filename_list:list):
 if __name__=='__main__':
-    start_tag = dt.now()
-    print(f'INFO: TAG starting {start_tag}')
     args = ArgParser().parse('GEDI main')
     run_params = config.get_run_params(args.run_params_json)
@@ -70,4 +70,4 @@ if __name__=='__main__':
     else:
         load(args.result_load_files, kwargs)
-    print(f'SUCCESS: TAG took {dt.now()-start_tag} sec.')

 import config
 import pandas as pd
 from datetime import datetime as dt
+from gedi.generator import GenerateEventLogs
+from gedi.features import EventLogFeatures
+from gedi.analyser import FeatureAnalyser
+from gedi.augmentation import InstanceAugmentator
+from gedi.benchmark import BenchmarkTest
+from gedi.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
 from utils.default_argparse import ArgParser
 from utils.param_keys import *
 from utils.param_keys.run_options import *
 if __name__=='__main__':
+    start_gedi = dt.now()
+    print(f'INFO: GEDI starting {start_gedi}')
     args = ArgParser().parse('GEDI main')
     run_params = config.get_run_params(args.run_params_json)
     else:
         load(args.result_load_files, kwargs)
+    print(f'SUCCESS: GEDI took {dt.now()-start_gedi} sec.')

notebooks/.ipynb_checkpoints/augmentation-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/benchmarking_process_discovery-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/bpic_generability_pdm-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/data_exploration-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/experiment_generator-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_distributions-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_exploration-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_performance_similarity-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_selection-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_variance-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/gedi_representativeness-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/grid_objectives-checkpoint.ipynb DELETED Viewed

@@ -1,376 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "e5aa7223",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "dfd1a302",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.DataFrame(columns=[\"log\",\"ratio_top_20_variants\", \"normalized_sequence_entropy_linear_forgetting\"])    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "218946b7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "k=0\n",
-    "for i in np.arange(0.2, 1.1,0.2):\n",
-    "    for j in np.arange(0,0.55,0.1):\n",
-    "        k+=1\n",
-    "        new_entry = pd.Series({'log':f\"objective_{k}\", \"ratio_top_20_variants\":round(i,1),\n",
-    "                   \"normalized_sequence_entropy_linear_forgetting\":round(j,1)})\n",
-    "        df = pd.concat([\n",
-    "                df, \n",
-    "                pd.DataFrame([new_entry], columns=new_entry.index)]\n",
-    "           ).reset_index(drop=True)\n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "b1e3bb5a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.to_csv(\"../data/grid_objectives.csv\" ,index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "5de45389",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>log</th>\n",
-       "      <th>ratio_top_20_variants</th>\n",
-       "      <th>normalized_sequence_entropy_linear_forgetting</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>objective_1</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>objective_2</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>objective_3</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>objective_4</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>objective_5</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>objective_6</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>objective_7</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>objective_8</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>objective_9</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>objective_10</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>objective_11</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>objective_12</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>objective_13</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>objective_14</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>objective_15</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>objective_16</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>objective_17</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>objective_18</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>objective_19</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>objective_20</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>objective_21</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>objective_22</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>objective_23</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>objective_24</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>24</th>\n",
-       "      <td>objective_25</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>25</th>\n",
-       "      <td>objective_26</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>26</th>\n",
-       "      <td>objective_27</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>27</th>\n",
-       "      <td>objective_28</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>28</th>\n",
-       "      <td>objective_29</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>29</th>\n",
-       "      <td>objective_30</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.5</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "             log  ratio_top_20_variants   \n",
-       "0    objective_1                    0.2  \\\n",
-       "1    objective_2                    0.2   \n",
-       "2    objective_3                    0.2   \n",
-       "3    objective_4                    0.2   \n",
-       "4    objective_5                    0.2   \n",
-       "5    objective_6                    0.2   \n",
-       "6    objective_7                    0.4   \n",
-       "7    objective_8                    0.4   \n",
-       "8    objective_9                    0.4   \n",
-       "9   objective_10                    0.4   \n",
-       "10  objective_11                    0.4   \n",
-       "11  objective_12                    0.4   \n",
-       "12  objective_13                    0.6   \n",
-       "13  objective_14                    0.6   \n",
-       "14  objective_15                    0.6   \n",
-       "15  objective_16                    0.6   \n",
-       "16  objective_17                    0.6   \n",
-       "17  objective_18                    0.6   \n",
-       "18  objective_19                    0.8   \n",
-       "19  objective_20                    0.8   \n",
-       "20  objective_21                    0.8   \n",
-       "21  objective_22                    0.8   \n",
-       "22  objective_23                    0.8   \n",
-       "23  objective_24                    0.8   \n",
-       "24  objective_25                    1.0   \n",
-       "25  objective_26                    1.0   \n",
-       "26  objective_27                    1.0   \n",
-       "27  objective_28                    1.0   \n",
-       "28  objective_29                    1.0   \n",
-       "29  objective_30                    1.0   \n",
-       "\n",
-       "    normalized_sequence_entropy_linear_forgetting  \n",
-       "0                                             0.0  \n",
-       "1                                             0.1  \n",
-       "2                                             0.2  \n",
-       "3                                             0.3  \n",
-       "4                                             0.4  \n",
-       "5                                             0.5  \n",
-       "6                                             0.0  \n",
-       "7                                             0.1  \n",
-       "8                                             0.2  \n",
-       "9                                             0.3  \n",
-       "10                                            0.4  \n",
-       "11                                            0.5  \n",
-       "12                                            0.0  \n",
-       "13                                            0.1  \n",
-       "14                                            0.2  \n",
-       "15                                            0.3  \n",
-       "16                                            0.4  \n",
-       "17                                            0.5  \n",
-       "18                                            0.0  \n",
-       "19                                            0.1  \n",
-       "20                                            0.2  \n",
-       "21                                            0.3  \n",
-       "22                                            0.4  \n",
-       "23                                            0.5  \n",
-       "24                                            0.0  \n",
-       "25                                            0.1  \n",
-       "26                                            0.2  \n",
-       "27                                            0.3  \n",
-       "28                                            0.4  \n",
-       "29                                            0.5  "
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d726a5ae",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

notebooks/.ipynb_checkpoints/oversampling-checkpoint.ipynb DELETED Viewed

@@ -1,6 +0,0 @@
-{
- "cells": [],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 5
-}

notebooks/.ipynb_checkpoints/performance_feature_correlation-checkpoint.ipynb DELETED Viewed

@@ -1,6 +0,0 @@
-{
- "cells": [],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 5
-}

notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/statistics_tasks_to_datasets-checkpoint.ipynb DELETED Viewed

@@ -1,818 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "4827785f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Name</th>\n",
-       "      <th>Short description</th>\n",
-       "      <th>data link</th>\n",
-       "      <th>challenge link</th>\n",
-       "      <th>Citations (Stand Februar 2023)</th>\n",
-       "      <th>Publications</th>\n",
-       "      <th>Process Discovery/ Declarative</th>\n",
-       "      <th>Conformance Checking / Alignment / Replay</th>\n",
-       "      <th>Online / Streaming / Realtime</th>\n",
-       "      <th>Performance (Analysis) / Temporal / Time</th>\n",
-       "      <th>Predict(ive)/  Monitoring/ Prescriptive</th>\n",
-       "      <th>Trace clustering / Clustering</th>\n",
-       "      <th>Preprocessing / Event Abstraction / Event Data Correlation</th>\n",
-       "      <th>Further keywords:</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Sepsis Cases - Event Log</td>\n",
-       "      <td>This real-life event log contains events of se...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Sepsis_Ca...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Sepsis_Ca...</td>\n",
-       "      <td>61</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>17</td>\n",
-       "      <td>7</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>8</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>(machine) learning, (online process) monitorin...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>BPI 2017 - Offer Log</td>\n",
-       "      <td>Contains data from a financial institute inclu...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2017:ch...</td>\n",
-       "      <td>4</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(machine) learning, cloud computing</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Road Traffic Fine Management Process (not BPI)</td>\n",
-       "      <td>A real-life event log taken from an informatio...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Road_Traf...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>95</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>32</td>\n",
-       "      <td>9</td>\n",
-       "      <td>4</td>\n",
-       "      <td>8</td>\n",
-       "      <td>15</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>alarm-based prescriptive process monitoring, b...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>BPI 2011</td>\n",
-       "      <td>Contains data from from a Dutch Academic Hospi...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Real-life...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2011:ch...</td>\n",
-       "      <td>57</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>13</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "      <td>12</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>(compliance) monitoring, (machine) learning, d...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>BPI 2012</td>\n",
-       "      <td>Contains the event log of an application proce...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2012:ch...</td>\n",
-       "      <td>151</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>40</td>\n",
-       "      <td>15</td>\n",
-       "      <td>4</td>\n",
-       "      <td>13</td>\n",
-       "      <td>46</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>(in)frequent patterns in process models, (mach...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>BPI 2013 - Open Problems</td>\n",
-       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/2013/challenge.html</td>\n",
-       "      <td>6</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(in)frequent patterns in process models, (mach...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>BPI 2013 - Closed Problems</td>\n",
-       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2013:ch...</td>\n",
-       "      <td>12</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>3</td>\n",
-       "      <td>(in)frequent patterns in process models</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>BPI 2013 - Incidents</td>\n",
-       "      <td>The log contains events from an incident and p...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/2013/challenge.html</td>\n",
-       "      <td>36</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>14</td>\n",
-       "      <td>5</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>7</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>(machine) learning, rule mining</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>BPI 2014 - Incident Records</td>\n",
-       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2014:ch...</td>\n",
-       "      <td>5</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>privacy preservation, security</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>BPI 2014 - Interaction Records</td>\n",
-       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2014:ch...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(machine) learning, hidden Markov models</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>BPI 2015 - Log 3</td>\n",
-       "      <td>Provided by 5 Dutch municipalities. The data c...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2015:ch...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>specification-driven predictive business proce...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>BPI 2015 - Log 1</td>\n",
-       "      <td>Provided by 5 Dutch municipalities. The data c...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2015:ch...</td>\n",
-       "      <td>8</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>3</td>\n",
-       "      <td>0</td>\n",
-       "      <td>3</td>\n",
-       "      <td>(machine) learning</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>BPI 2016 - Clicks Logged In</td>\n",
-       "      <td>Contains clicks of users that are logged in fr...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2016:ch...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>automation</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>BPI 2017 - Application Log</td>\n",
-       "      <td>Contains data from a financial institute inclu...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2017:ch...</td>\n",
-       "      <td>73</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>11</td>\n",
-       "      <td>5</td>\n",
-       "      <td>2</td>\n",
-       "      <td>14</td>\n",
-       "      <td>23</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>(machine) learning, alarm-based prescriptive p...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>BPI 2018</td>\n",
-       "      <td>The process covers the handling of application...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2018:ch...</td>\n",
-       "      <td>26</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>7</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>8</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>(machine) learning, automation</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>BPI 2020 - Travel Permits</td>\n",
-       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
-       "      <td>2</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>stage-based process performance analysis</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>BPI 2019</td>\n",
-       "      <td>Contains the purchase order handling process o...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://icpmconference.org/2019/icpm-2019/cont...</td>\n",
-       "      <td>35</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>6</td>\n",
-       "      <td>6</td>\n",
-       "      <td>9</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>(online process) monitoring, remaining time pr...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>BPI 2020 - International Declarations</td>\n",
-       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
-       "      <td>2</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(machine) learning, remaining time prediction</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>BPI 2020 - Domestic Declarations</td>\n",
-       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
-       "      <td>7</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(machine) learning, remaining time prediction</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>BPI 2020 - Prepaid Travel Cost</td>\n",
-       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
-       "      <td>2</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>multi-perspective</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>Helpdesk</td>\n",
-       "      <td>Ticketing management process of the Help desk ...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Dataset_b...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>20</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>8</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(machine) learning, drift detection</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>Receipt phase of an environmental permit appli...</td>\n",
-       "      <td>Data originates from the CoSeLoG project where...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Receipt_p...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>15</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Receipt_p...</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>Environmental permit application process (‘WAB...</td>\n",
-       "      <td>Data originates from the CoSeLoG project where...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Environme...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>2</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>predictions with a-priori knowledge</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>Environmental permit application process (‘WAB...</td>\n",
-       "      <td>Data originates from the CoSeLoG project where...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Environme...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>2</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>multidimensional process mining, process cubes</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>24</th>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                                 Name  \\\n",
-       "0                            Sepsis Cases - Event Log   \n",
-       "1                                BPI 2017 - Offer Log   \n",
-       "2      Road Traffic Fine Management Process (not BPI)   \n",
-       "3                                            BPI 2011   \n",
-       "4                                            BPI 2012   \n",
-       "5                            BPI 2013 - Open Problems   \n",
-       "6                          BPI 2013 - Closed Problems   \n",
-       "7                                BPI 2013 - Incidents   \n",
-       "8                         BPI 2014 - Incident Records   \n",
-       "9                      BPI 2014 - Interaction Records   \n",
-       "10                                   BPI 2015 - Log 3   \n",
-       "11                                   BPI 2015 - Log 1   \n",
-       "12                        BPI 2016 - Clicks Logged In   \n",
-       "13                         BPI 2017 - Application Log   \n",
-       "14                                           BPI 2018   \n",
-       "15                          BPI 2020 - Travel Permits   \n",
-       "16                                           BPI 2019   \n",
-       "17              BPI 2020 - International Declarations   \n",
-       "18                   BPI 2020 - Domestic Declarations   \n",
-       "19                     BPI 2020 - Prepaid Travel Cost   \n",
-       "20                                           Helpdesk   \n",
-       "21  Receipt phase of an environmental permit appli...   \n",
-       "22  Environmental permit application process (‘WAB...   \n",
-       "23  Environmental permit application process (‘WAB...   \n",
-       "24                                                NaN   \n",
-       "\n",
-       "                                    Short description  \\\n",
-       "0   This real-life event log contains events of se...   \n",
-       "1   Contains data from a financial institute inclu...   \n",
-       "2   A real-life event log taken from an informatio...   \n",
-       "3   Contains data from from a Dutch Academic Hospi...   \n",
-       "4   Contains the event log of an application proce...   \n",
-       "5   Rabobank Group ICT implemented ITIL processes ...   \n",
-       "6   Rabobank Group ICT implemented ITIL processes ...   \n",
-       "7   The log contains events from an incident and p...   \n",
-       "8   Rabobank Group ICT implemented ITIL processes ...   \n",
-       "9   Rabobank Group ICT implemented ITIL processes ...   \n",
-       "10  Provided by 5 Dutch municipalities. The data c...   \n",
-       "11  Provided by 5 Dutch municipalities. The data c...   \n",
-       "12  Contains clicks of users that are logged in fr...   \n",
-       "13  Contains data from a financial institute inclu...   \n",
-       "14  The process covers the handling of application...   \n",
-       "15  Contains 2 years of data from the reimbursemen...   \n",
-       "16  Contains the purchase order handling process o...   \n",
-       "17  Contains 2 years of data from the reimbursemen...   \n",
-       "18  Contains 2 years of data from the reimbursemen...   \n",
-       "19  Contains 2 years of data from the reimbursemen...   \n",
-       "20  Ticketing management process of the Help desk ...   \n",
-       "21  Data originates from the CoSeLoG project where...   \n",
-       "22  Data originates from the CoSeLoG project where...   \n",
-       "23  Data originates from the CoSeLoG project where...   \n",
-       "24                                                NaN   \n",
-       "\n",
-       "                                            data link  \\\n",
-       "0   https://data.4tu.nl/articles/dataset/Sepsis_Ca...   \n",
-       "1   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "2   https://data.4tu.nl/articles/dataset/Road_Traf...   \n",
-       "3   https://data.4tu.nl/articles/dataset/Real-life...   \n",
-       "4   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "5   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "6   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "7   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "8   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "9   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "10  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "11  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "12  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "13  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "14  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "15  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "16  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "17  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "18  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "19  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "20  https://data.4tu.nl/articles/dataset/Dataset_b...   \n",
-       "21  https://data.4tu.nl/articles/dataset/Receipt_p...   \n",
-       "22  https://data.4tu.nl/articles/dataset/Environme...   \n",
-       "23  https://data.4tu.nl/articles/dataset/Environme...   \n",
-       "24                                                NaN   \n",
-       "\n",
-       "                                       challenge link  \\\n",
-       "0   https://data.4tu.nl/articles/dataset/Sepsis_Ca...   \n",
-       "1   https://www.win.tue.nl/bpi/doku.php?id=2017:ch...   \n",
-       "2                                                 NaN   \n",
-       "3   https://www.win.tue.nl/bpi/doku.php?id=2011:ch...   \n",
-       "4   https://www.win.tue.nl/bpi/doku.php?id=2012:ch...   \n",
-       "5      https://www.win.tue.nl/bpi/2013/challenge.html   \n",
-       "6   https://www.win.tue.nl/bpi/doku.php?id=2013:ch...   \n",
-       "7      https://www.win.tue.nl/bpi/2013/challenge.html   \n",
-       "8   https://www.win.tue.nl/bpi/doku.php?id=2014:ch...   \n",
-       "9   https://www.win.tue.nl/bpi/doku.php?id=2014:ch...   \n",
-       "10  https://www.win.tue.nl/bpi/doku.php?id=2015:ch...   \n",
-       "11  https://www.win.tue.nl/bpi/doku.php?id=2015:ch...   \n",
-       "12  https://www.win.tue.nl/bpi/doku.php?id=2016:ch...   \n",
-       "13  https://www.win.tue.nl/bpi/doku.php?id=2017:ch...   \n",
-       "14  https://www.win.tue.nl/bpi/doku.php?id=2018:ch...   \n",
-       "15     https://icpmconference.org/2020/bpi-challenge/   \n",
-       "16  https://icpmconference.org/2019/icpm-2019/cont...   \n",
-       "17     https://icpmconference.org/2020/bpi-challenge/   \n",
-       "18     https://icpmconference.org/2020/bpi-challenge/   \n",
-       "19     https://icpmconference.org/2020/bpi-challenge/   \n",
-       "20                                                NaN   \n",
-       "21                                                NaN   \n",
-       "22                                                NaN   \n",
-       "23                                                NaN   \n",
-       "24                                                NaN   \n",
-       "\n",
-       "   Citations (Stand Februar 2023)  \\\n",
-       "0                              61   \n",
-       "1                               4   \n",
-       "2                              95   \n",
-       "3                              57   \n",
-       "4                             151   \n",
-       "5                               6   \n",
-       "6                              12   \n",
-       "7                              36   \n",
-       "8                               5   \n",
-       "9                               1   \n",
-       "10                              1   \n",
-       "11                              8   \n",
-       "12                              1   \n",
-       "13                             73   \n",
-       "14                             26   \n",
-       "15                              2   \n",
-       "16                             35   \n",
-       "17                              2   \n",
-       "18                              7   \n",
-       "19                              2   \n",
-       "20                             20   \n",
-       "21                             15   \n",
-       "22                              2   \n",
-       "23                              2   \n",
-       "24                            NaN   \n",
-       "\n",
-       "                                         Publications  \\\n",
-       "0   https://app.dimensions.ai/discover/publication...   \n",
-       "1   https://app.dimensions.ai/discover/publication...   \n",
-       "2   https://app.dimensions.ai/discover/publication...   \n",
-       "3   https://app.dimensions.ai/discover/publication...   \n",
-       "4   https://app.dimensions.ai/discover/publication...   \n",
-       "5   https://app.dimensions.ai/discover/publication...   \n",
-       "6   https://app.dimensions.ai/discover/publication...   \n",
-       "7   https://app.dimensions.ai/discover/publication...   \n",
-       "8   https://app.dimensions.ai/discover/publication...   \n",
-       "9   https://app.dimensions.ai/discover/publication...   \n",
-       "10  https://app.dimensions.ai/discover/publication...   \n",
-       "11  https://app.dimensions.ai/discover/publication...   \n",
-       "12  https://app.dimensions.ai/discover/publication...   \n",
-       "13  https://app.dimensions.ai/discover/publication...   \n",
-       "14  https://app.dimensions.ai/discover/publication...   \n",
-       "15  https://app.dimensions.ai/discover/publication...   \n",
-       "16  https://app.dimensions.ai/discover/publication...   \n",
-       "17  https://app.dimensions.ai/discover/publication...   \n",
-       "18  https://app.dimensions.ai/discover/publication...   \n",
-       "19  https://app.dimensions.ai/discover/publication...   \n",
-       "20  https://app.dimensions.ai/discover/publication...   \n",
-       "21  https://data.4tu.nl/articles/dataset/Receipt_p...   \n",
-       "22  https://app.dimensions.ai/discover/publication...   \n",
-       "23  https://app.dimensions.ai/discover/publication...   \n",
-       "24                                                NaN   \n",
-       "\n",
-       "   Process Discovery/ Declarative Conformance Checking / Alignment / Replay  \\\n",
-       "0                              17                                         7   \n",
-       "1                               1                                         0   \n",
-       "2                              32                                         9   \n",
-       "3                              13                                         1   \n",
-       "4                              40                                        15   \n",
-       "5                               1                                         0   \n",
-       "6                               3                                         2   \n",
-       "7                              14                                         5   \n",
-       "8                               1                                         0   \n",
-       "9                               0                                         0   \n",
-       "10                              0                                         0   \n",
-       "11                              1                                         1   \n",
-       "12                              1                                         0   \n",
-       "13                             11                                         5   \n",
-       "14                              7                                         1   \n",
-       "15                              0                                         0   \n",
-       "16                              3                                         1   \n",
-       "17                              0                                         0   \n",
-       "18                              0                                         2   \n",
-       "19                              0                                         0   \n",
-       "20                              4                                         1   \n",
-       "21                             -1                                        -1   \n",
-       "22                              0                                         0   \n",
-       "23                              1                                         0   \n",
-       "24                            NaN                                       NaN   \n",
-       "\n",
-       "   Online / Streaming / Realtime Performance (Analysis) / Temporal / Time  \\\n",
-       "0                              4                                        1   \n",
-       "1                              0                                        1   \n",
-       "2                              4                                        8   \n",
-       "3                              3                                        4   \n",
-       "4                              4                                       13   \n",
-       "5                              0                                        0   \n",
-       "6                              1                                        2   \n",
-       "7                              1                                        1   \n",
-       "8                              0                                        0   \n",
-       "9                              0                                        0   \n",
-       "10                             0                                        0   \n",
-       "11                             0                                        0   \n",
-       "12                             1                                        0   \n",
-       "13                             2                                       14   \n",
-       "14                             2                                        0   \n",
-       "15                             0                                        1   \n",
-       "16                             6                                        6   \n",
-       "17                             0                                        1   \n",
-       "18                             2                                        2   \n",
-       "19                             0                                        0   \n",
-       "20                             3                                        1   \n",
-       "21                            -1                                       -1   \n",
-       "22                             0                                        0   \n",
-       "23                             0                                        0   \n",
-       "24                           NaN                                      NaN   \n",
-       "\n",
-       "   Predict(ive)/  Monitoring/ Prescriptive Trace clustering / Clustering  \\\n",
-       "0                                        8                             2   \n",
-       "1                                        1                             0   \n",
-       "2                                       15                             1   \n",
-       "3                                       12                             4   \n",
-       "4                                       46                             0   \n",
-       "5                                        1                             0   \n",
-       "6                                        0                             0   \n",
-       "7                                        7                             0   \n",
-       "8                                        0                             0   \n",
-       "9                                        0                             0   \n",
-       "10                                       1                             0   \n",
-       "11                                       3                             0   \n",
-       "12                                       0                             0   \n",
-       "13                                      23                             1   \n",
-       "14                                       8                             0   \n",
-       "15                                       0                             0   \n",
-       "16                                       9                             4   \n",
-       "17                                       2                             0   \n",
-       "18                                       3                             0   \n",
-       "19                                       0                             0   \n",
-       "20                                       8                             0   \n",
-       "21                                      -1                            -1   \n",
-       "22                                       1                             0   \n",
-       "23                                       0                             0   \n",
-       "24                                     NaN                           NaN   \n",
-       "\n",
-       "   Preprocessing / Event Abstraction / Event Data Correlation  \\\n",
-       "0                                                   2           \n",
-       "1                                                   0           \n",
-       "2                                                   2           \n",
-       "3                                                   1           \n",
-       "4                                                   1           \n",
-       "5                                                   0           \n",
-       "6                                                   3           \n",
-       "7                                                   2           \n",
-       "8                                                   0           \n",
-       "9                                                   0           \n",
-       "10                                                  0           \n",
-       "11                                                  3           \n",
-       "12                                                  0           \n",
-       "13                                                  1           \n",
-       "14                                                  2           \n",
-       "15                                                  0           \n",
-       "16                                                  1           \n",
-       "17                                                  0           \n",
-       "18                                                  0           \n",
-       "19                                                  0           \n",
-       "20                                                  0           \n",
-       "21                                                 -1           \n",
-       "22                                                  0           \n",
-       "23                                                  0           \n",
-       "24                                                NaN           \n",
-       "\n",
-       "                                    Further keywords:  \n",
-       "0   (machine) learning, (online process) monitorin...  \n",
-       "1                 (machine) learning, cloud computing  \n",
-       "2   alarm-based prescriptive process monitoring, b...  \n",
-       "3   (compliance) monitoring, (machine) learning, d...  \n",
-       "4   (in)frequent patterns in process models, (mach...  \n",
-       "5   (in)frequent patterns in process models, (mach...  \n",
-       "6             (in)frequent patterns in process models  \n",
-       "7                     (machine) learning, rule mining  \n",
-       "8                      privacy preservation, security  \n",
-       "9            (machine) learning, hidden Markov models  \n",
-       "10  specification-driven predictive business proce...  \n",
-       "11                                 (machine) learning  \n",
-       "12                                         automation  \n",
-       "13  (machine) learning, alarm-based prescriptive p...  \n",
-       "14                     (machine) learning, automation  \n",
-       "15           stage-based process performance analysis  \n",
-       "16  (online process) monitoring, remaining time pr...  \n",
-       "17      (machine) learning, remaining time prediction  \n",
-       "18      (machine) learning, remaining time prediction  \n",
-       "19                                  multi-perspective  \n",
-       "20                (machine) learning, drift detection  \n",
-       "21                                                NaN  \n",
-       "22                predictions with a-priori knowledge  \n",
-       "23     multidimensional process mining, process cubes  \n",
-       "24                                                NaN  "
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "#import pm4py\n",
-    "import pandas as pd\n",
-    "INPUT_PATH = \"../data/mappings.csv\"\n",
-    "df = pd.read_csv(INPUT_PATH, sep = \";\", dtype = \"unicode\")\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "04a97f37",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

notebooks/.ipynb_checkpoints/test_feed-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/benchmarking_process_discovery.ipynb CHANGED Viewed

@@ -1277,7 +1277,7 @@
     "\n",
     "import sys\n",
     "import os\n",
-    "sys.path.append(os.path.dirname(\"../tag/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "print(benchmarked_ft.shape, benchmarked_pd.shape)\n",
@@ -1422,7 +1422,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
   }
  },
  "nbformat": 4,

     "\n",
     "import sys\n",
     "import os\n",
+    "sys.path.append(os.path.dirname(\"../gedi/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "print(benchmarked_ft.shape, benchmarked_pd.shape)\n",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

notebooks/bpic_generability_pdm.ipynb CHANGED Viewed

@@ -1223,7 +1223,7 @@
     "from scipy.stats import pearsonr\n",
     "import sys\n",
     "import os\n",
-    "sys.path.append(os.path.dirname(\"../tag/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "\n",

     "from scipy.stats import pearsonr\n",
     "import sys\n",
     "import os\n",
+    "sys.path.append(os.path.dirname(\"../gedi/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "\n",

notebooks/experiment_generator.ipynb CHANGED Viewed

@@ -2225,7 +2225,7 @@
    ],
    "source": [
     "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n",
-    "#bpic_features = pd.read_csv(\"../tag/output/features/real_event_logs.csv\", index_col=None)\n",
     "\n",
     "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n",
     "print(bpic_features.shape)\n",
@@ -3102,7 +3102,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
   }
  },
  "nbformat": 4,

    ],
    "source": [
     "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n",
+    "#bpic_features = pd.read_csv(\"../gedi/output/features/real_event_logs.csv\", index_col=None)\n",
     "\n",
     "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n",
     "print(bpic_features.shape)\n",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

notebooks/feature_distributions.ipynb CHANGED Viewed

@@ -1847,7 +1847,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
   }
  },
  "nbformat": 4,

    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,