Spaces:

andreamalhera
/

igedi

Sleeping

Andrea Maldonado commited on Apr 21, 2024

Commit

17e1124

Merge branch '5-automation-test-gedi-automatically' into bpm24

* 5-automation-test-gedi-automatically: (56 commits)
Adds plotter test data
Adds provisory evaluation plotter
Renames test data dir-
Updates gitignore
Moves to data to test dir
Setup generation test with file
Adds multiple experiments to gen
Specifies Python version
Fixes integration ConfigSpace installation for ubuntu
Corrects generation output path
Removes unnecessary conda
Updates github action versions
Adds conda install for ConfigSpace
Removes .checkpoints
Gitignore
Updates github action versions
Fixes test typo
Adds integration test
specifies numpy
Fixes yml
...

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.github/workflows/test_gedi.yml +162 -0
.gitignore +5 -1
README.md +2 -3
config.py +3 -3
config_files/algorithm/benchmark.json +1 -2
config_files/algorithm/evaluation_plotter.json +7 -5
config_files/algorithm/experiment_test.json +7 -7
config_files/algorithm/feature_extraction.json +1 -1
config_files/algorithm/generation.json +2 -5
data/2_grid_test.csv +3 -3
data/{test_2 → test}/gen_el_168.xes +0 -0
data/{test_2 → test}/gen_el_169.xes +0 -0
data/test/grid_feat.csv +3 -0
data/test/plotter/1_enve_feat.csv +12 -0
data/test/plotter/grid_1objectives_enve.csv +12 -0
execute_grid_experiments.py +1 -1
gedi/__init__.py +8 -0
{tag → gedi}/analyser.py +3 -3
{tag → gedi}/augmentation.py +1 -1
{tag → gedi}/benchmark.py +3 -3
{tag → gedi}/features.py +1 -1
{tag → gedi}/generator.py +2 -2
{tag → gedi}/plotter.py +6 -5
{tag → gedi}/utils/algorithms/__init__.py +0 -0
{tag → gedi}/utils/algorithms/tsne.py +0 -0
{tag → gedi}/utils/array_tools.py +0 -0
{tag → gedi}/utils/io_helpers.py +0 -0
{tag → gedi}/utils/matrix_tools.py +0 -0
main.py +9 -9
notebooks/.ipynb_checkpoints/augmentation-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/benchmarking_process_discovery-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/bpic_generability_pdm-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/data_exploration-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/experiment_generator-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_distributions-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_exploration-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_performance_similarity-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_selection-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/feature_variance-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/gedi_representativeness-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/grid_objectives-checkpoint.ipynb +0 -376
notebooks/.ipynb_checkpoints/oversampling-checkpoint.ipynb +0 -6
notebooks/.ipynb_checkpoints/performance_feature_correlation-checkpoint.ipynb +0 -6
notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb +0 -0
notebooks/.ipynb_checkpoints/statistics_tasks_to_datasets-checkpoint.ipynb +0 -818
notebooks/.ipynb_checkpoints/test_feed-checkpoint.ipynb +0 -0
notebooks/benchmarking_process_discovery.ipynb +2 -2
notebooks/bpic_generability_pdm.ipynb +1 -1
notebooks/experiment_generator.ipynb +2 -2
notebooks/feature_distributions.ipynb +1 -1

.github/workflows/test_gedi.yml ADDED Viewed

	@@ -0,0 +1,162 @@

+name: GEDI Test
+# Specifies when the action should run
+on:
+  pull_request:
+    branches:
+      - main
+# Specifies the jobs that are to be run
+jobs:
+  test_feature-extraction:
+    runs-on: ubuntu-latest
+    # Setting up a python envronment  for the test script to run
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.9
+    - name: Install feeed
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+    - name: Run test
+      run:
+        python main.py -o config_files/options/baseline.json -a config_files/algorithm/feature_extraction.json
+    - name: Compare output
+      run: diff data/test_feat.csv data/test_feat.csv
+  test_generation:
+    runs-on: ubuntu-latest
+    # Setting up a python envronment  for the test script to run
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.9
+    - name: Install dependencies
+      run: |
+        sudo apt-get install build-essential python3 python3-dev
+    - name: Install feeed
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+    - name: Run test
+      run:
+        python main.py -o config_files/options/baseline.json -a config_files/algorithm/generation.json
+    - name: Compare output
+      run: diff output/features/grid_feat/2_enself_rt20v/genELexperiment2_07_04.json output/features/grid_feat/2_enself_rt20v/genELexperiment2_07_04.json
+  test_benchmark:
+    runs-on: ubuntu-latest
+    # Setting up a python envronment  for the test script to run
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.9
+    - name: Install feeed
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+    - name: Run test
+      run:
+        python main.py -o config_files/options/baseline.json -a config_files/algorithm/benchmark.json
+    - name: Compare output
+      run: diff output/benchmark/test_benchmark.csv output/benchmark/test_benchmark.csv
+  test_augmentation:
+    runs-on: ubuntu-latest
+    # Setting up a python envronment  for the test script to run
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.9
+    - name: Install feeed
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+    - name: Run test
+      run:
+        python main.py -o config_files/options/baseline.json -a config_files/algorithm/augmentation.json
+  test_evaluation-plotter:
+    runs-on: ubuntu-latest
+    # Setting up a python envronment  for the test script to run
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.9
+    - name: Install dependencies
+      run: |
+        sudo apt-get install build-essential python3 python3-dev
+    - name: Install feeed
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+    - name: Run test
+      run:
+        python main.py -o config_files/options/baseline.json -a config_files/algorithm/evaluation_plotter.json
+  test_integration:
+    runs-on: ubuntu-latest
+    # Setting up a python envronment  for the test script to run
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.9
+    - name: Install dependencies
+      run: |
+        sudo apt-get install build-essential python3 python3-dev
+    - name: Install feeed
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+    - name: Run test
+      run:
+        python main.py -o config_files/options/baseline.json -a config_files/algorithm/experiment_test.json

.gitignore CHANGED Viewed

@@ -1,3 +1,7 @@
 smac3_output/
 data/
-output/

 smac3_output/
 data/
+output/
+.ipynb_checkpoints/
+notebooks/.ipynb_checkpoints/*
+gedi.egg-info/
+build/

README.md CHANGED Viewed

@@ -16,12 +16,11 @@ For MacOS:
 brew install graphviz
 brew install swig
 ```
-## Installation
 - For smac:
 ```console
 conda install pyrfr swig
 ```
 - `conda env create -f .conda.yml`
 - Install [Feature Extractor for Event Data (feeed)](https://github.com/lmu-dbs/feeed) in the newly installed conda environment: `pip install feeed`
@@ -33,7 +32,7 @@ python main.py -o config_files/options/baseline.json -a config_files/algorithm/e
 ## Usage
 Our pipeline offers several pipeline steps, which can be run sequentially or partially:
 - feature_extraction
-- event_logs_generation
 - benchmark
 - evaluation_plotter

 brew install graphviz
 brew install swig
 ```
 - For smac:
 ```console
 conda install pyrfr swig
 ```
+## Installation
 - `conda env create -f .conda.yml`
 - Install [Feature Extractor for Event Data (feeed)](https://github.com/lmu-dbs/feeed) in the newly installed conda environment: `pip install feeed`
 ## Usage
 Our pipeline offers several pipeline steps, which can be run sequentially or partially:
 - feature_extraction
+- generation
 - benchmark
 - evaluation_plotter

config.py CHANGED Viewed

@@ -2,7 +2,7 @@ import json
 import os
 import warnings
-from tag.utils.io_helpers import sort_files
 from tqdm import tqdm
 from utils.param_keys import INPUT_NAME, FILENAME, FOLDER_PATH, PARAMS
@@ -63,8 +63,8 @@ def get_files_and_kwargs(params: dict):
     #TODO: generate parent directories if they don't exist
     if input_name == 'test':
-        filename_list = list(tqdm(sort_files(os.listdir('data/test_2'))))
-        kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/test_2'}
     elif input_name == 'realLogs':
         filename_list = list(tqdm(sort_files(os.listdir('data/real_event_logs'))))
         kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/real_event_logs'}

 import os
 import warnings
+from gedi.utils.io_helpers import sort_files
 from tqdm import tqdm
 from utils.param_keys import INPUT_NAME, FILENAME, FOLDER_PATH, PARAMS
     #TODO: generate parent directories if they don't exist
     if input_name == 'test':
+        filename_list = list(tqdm(sort_files(os.listdir('data/test'))))
+        kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/test'}
     elif input_name == 'realLogs':
         filename_list = list(tqdm(sort_files(os.listdir('data/real_event_logs'))))
         kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/real_event_logs'}

config_files/algorithm/benchmark.json CHANGED Viewed

@@ -2,8 +2,7 @@
   {
     "pipeline_step": "benchmark_test",
     "benchmark_test": "discovery",
-    "input_path":"data/test_2",
-    "input_path":"data/test_2/gen_el_168.xes",
     "output_path":"output",
     "miners" : ["inductive", "heuristics", "imf", "ilp"]
   }

   {
     "pipeline_step": "benchmark_test",
     "benchmark_test": "discovery",
+    "input_path":"data/test",
     "output_path":"output",
     "miners" : ["inductive", "heuristics", "imf", "ilp"]
   }

config_files/algorithm/evaluation_plotter.json CHANGED Viewed

@@ -2,16 +2,18 @@
     {
         "pipeline_step": "evaluation_plotter",
         "input_path": "output/features/generated/34_bpic_features/",
-        "input_path": "output/features/generated/grid_1obj/1_enve_feat.csv",
         "input_path": "output/features/generated/grid_2obj/",
         "input_path": ["output/features/generated/grid_1obj/", "output/features/generated/grid_2obj/"],
-        "output_path": "output/plots",
         "reference_feature": "epa_normalized_sequence_entropy",
-        "reference_feature": "epa_normalized_variant_entropy",
         "reference_feature": "epa_normalized_sequence_entropy_exponential_forgetting",
         "targets": "data/34_bpic_features.csv",
-        "targets": "data/grid_experiments/grid_1obj/grid_1objectives_enve.csv",
         "targets": "data/grid_experiments/grid_2obj/",
-        "targets": ["data/grid_experiments/grid_1obj/", "data/grid_experiments/grid_2obj/"]
     }
 ]

     {
         "pipeline_step": "evaluation_plotter",
         "input_path": "output/features/generated/34_bpic_features/",
         "input_path": "output/features/generated/grid_2obj/",
         "input_path": ["output/features/generated/grid_1obj/", "output/features/generated/grid_2obj/"],
+        "input_path": "output/features/generated/grid_1obj/1_enve_feat.csv",
+        "input_path": "data/test/plotter/1_enve_feat.csv",
         "reference_feature": "epa_normalized_sequence_entropy",
         "reference_feature": "epa_normalized_sequence_entropy_exponential_forgetting",
+        "reference_feature": "epa_normalized_variant_entropy",
         "targets": "data/34_bpic_features.csv",
         "targets": "data/grid_experiments/grid_2obj/",
+        "targets": ["data/grid_experiments/grid_1obj/", "data/grid_experiments/grid_2obj/"],
+        "targets": "data/grid_experiments/grid_1obj/grid_1objectives_enve.csv",
+        "targets": "data/test/plotter/grid_1objectives_enve.csv",
+        "output_path": "output/plots"
     }
 ]

config_files/algorithm/experiment_test.json CHANGED Viewed

@@ -9,16 +9,16 @@
   {
     "pipeline_step": "event_logs_generation",
     "output_path": "output/features/2_bpic_features/2_ense_rmcv_feat.csv",
-    "output_path": "data/test_2",
     "generator_params": {
       "experiment": "data/grid_objectives.csv",
       "experiment": {"input_path": "data/2_bpic_features.csv",
         "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
       "experiment": [
-          {"epa_normalized_sequence_entropy_linear_forgetting": 0.05, "ratio_top_20_variants": 0.4},
-          {"epa_normalized_sequence_entropy_linear_forgetting": 0.5, "ratio_top_20_variants": 0.04}
         ],
-      "experiment": {"epa_normalized_sequence_entropy_linear_forgetting": 0.05, "ratio_top_20_variants": 0.4},
       "config_space": {
         "mode": [5, 20],
         "sequence": [0.01, 1],
@@ -27,7 +27,7 @@
         "loop": [0.01, 1],
         "silent": [0.01, 1],
         "lt_dependency": [0.01, 1],
-        "num_traces": [100, 10001],
         "duplicate": [0],
         "or": [0]
       },
@@ -36,7 +36,7 @@
   },
   {
     "pipeline_step": "feature_extraction",
-    "input_path": "data/test_2",
     "feature_params": {"feature_set":["trace_length"]},
     "output_path": "output/plots",
     "real_eventlog_path": "data/bpic_features.csv",
@@ -45,7 +45,7 @@
   {
     "pipeline_step": "benchmark_test",
     "benchmark_test": "discovery",
-    "input_path":"data/test_2",
     "output_path":"output",
     "miners" : ["inductive", "heuristics", "imf", "ilp"]
   }

   {
     "pipeline_step": "event_logs_generation",
     "output_path": "output/features/2_bpic_features/2_ense_rmcv_feat.csv",
+    "output_path": "data/test",
     "generator_params": {
       "experiment": "data/grid_objectives.csv",
       "experiment": {"input_path": "data/2_bpic_features.csv",
         "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
       "experiment": [
+          {"epa_normalized_sequence_entropy_linear_forgetting": 0.2, "ratio_top_20_variants": 0.4},
+          {"epa_normalized_sequence_entropy_linear_forgetting": 0.4, "ratio_top_20_variants": 0.7}
         ],
+      "experiment": {"epa_normalized_sequence_entropy_linear_forgetting": 0.2, "ratio_top_20_variants": 0.4},
       "config_space": {
         "mode": [5, 20],
         "sequence": [0.01, 1],
         "loop": [0.01, 1],
         "silent": [0.01, 1],
         "lt_dependency": [0.01, 1],
+        "num_traces": [10, 100],
         "duplicate": [0],
         "or": [0]
       },
   },
   {
     "pipeline_step": "feature_extraction",
+    "input_path": "data/test",
     "feature_params": {"feature_set":["trace_length"]},
     "output_path": "output/plots",
     "real_eventlog_path": "data/bpic_features.csv",
   {
     "pipeline_step": "benchmark_test",
     "benchmark_test": "discovery",
+    "input_path":"data/test",
     "output_path":"output",
     "miners" : ["inductive", "heuristics", "imf", "ilp"]
   }

config_files/algorithm/feature_extraction.json CHANGED Viewed

@@ -1,7 +1,7 @@
 [
   {
     "pipeline_step": "feature_extraction",
-    "input_path": "data/test_2",
     "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
     "output_path": "output/plots",
     "real_eventlog_path": "data/bpic_features.csv",

 [
   {
     "pipeline_step": "feature_extraction",
+    "input_path": "data/test",
     "feature_params": {"feature_set":["simple_stats", "trace_length", "trace_variant", "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
     "output_path": "output/plots",
     "real_eventlog_path": "data/bpic_features.csv",

config_files/algorithm/generation.json CHANGED Viewed

@@ -3,11 +3,8 @@
     "pipeline_step": "event_logs_generation",
     "output_path": "output",
     "generator_params": {
-      "experiment": {
-        "input_path": "data/grid_objectives.csv",
-        "objectives": ["epa_normalized_variant_entropy"],
-        "objectives": ["ratio_most_common_variant", "epa_normalized_sequence_entropy"],
-        "objectives": ["ratio_top_20_variants","epa_normalized_sequence_entropy_linear_forgetting"]
       },
       "config_space": {
         "mode": [5, 20],

     "pipeline_step": "event_logs_generation",
     "output_path": "output",
     "generator_params": {
+      "experiment": {"input_path": "data/test/grid_feat.csv",
+        "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]
       },
       "config_space": {
         "mode": [5, 20],

data/2_grid_test.csv CHANGED Viewed

@@ -1,3 +1,3 @@
-task,epa_normalized_sequence_entropy,epa_normalized_sequence_entropy_linear_forgetting
-task_1,0.0,0.0
-task_2,0.0,0.1

+log,ratio_top_20_variants,epa_normalized_sequence_entropy_linear_forgetting
+experiment1,0.2,0.4
+experiment2,0.4,0.7

data/{test_2 → test}/gen_el_168.xes RENAMED Viewed

File without changes

data/{test_2 → test}/gen_el_169.xes RENAMED Viewed

File without changes

data/test/grid_feat.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+log,ratio_top_20_variants,epa_normalized_sequence_entropy_linear_forgetting
+experiment1,0.2,0.4
+experiment2,0.4,0.7

data/test/plotter/1_enve_feat.csv ADDED Viewed

	@@ -0,0 +1,12 @@

+epa_normalized_variant_entropy,log
+0.41202322946059605,task_5
+0.79999386158591,task_9
+0.8925919422394111,task_10
+0.493812449168448,task_6
+0.20299577565110202,task_3
+0.337263992015401,task_4
+0.0,task_1
+0.102184538023266,task_2
+0.600006599245775,task_7
+0.6999779396851361,task_8
+0.8796185572534461,task_11

data/test/plotter/grid_1objectives_enve.csv ADDED Viewed

	@@ -0,0 +1,12 @@

+task,epa_normalized_variant_entropy
+task_1,0.0
+task_2,0.1
+task_3,0.2
+task_4,0.3
+task_5,0.4
+task_6,0.5
+task_7,0.6
+task_8,0.7
+task_9,0.8
+task_10,0.9
+task_11,1.0

execute_grid_experiments.py CHANGED Viewed

@@ -2,7 +2,7 @@ import multiprocessing
 import os
 from datetime import datetime as dt
-from tag.utils.io_helpers import sort_files
 from tqdm import tqdm
 #TODO: Pass i properly

 import os
 from datetime import datetime as dt
+from gedi.utils.io_helpers import sort_files
 from tqdm import tqdm
 #TODO: Pass i properly

gedi/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from .generator import GenerateEventLogs
+from .features import EventLogFeatures
+from .analyser import FeatureAnalyser
+from .augmentation import InstanceAugmentator
+from .benchmark import BenchmarkTest
+from .plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
+__all__=[ 'GenerateEventLogs', 'EventLogFeatures', 'FeatureAnalyser', 'InstanceAugmentator', 'BenchmarkTest', 'BenchmarkPlotter', 'FeaturesPlotter', 'AugmentationPlotter', 'GenerationPlotter']

{tag → gedi}/analyser.py RENAMED Viewed

@@ -4,9 +4,9 @@ import warnings
 from sklearn.decomposition import FastICA, PCA
 from sklearn.manifold import TSNE
 from sklearn.preprocessing import Normalizer, StandardScaler
-from tag.features import EventLogFeatures
-from tag.plotter import ModelResultPlotter
-from tag.utils.matrix_tools import insert_missing_data
 # TODO: Call param_keys explicitly e.g. import INPUT_PATH
 from utils.param_keys import *
 from utils.param_keys.analyser import MODEL, INPUT_PARAMS, PERPLEXITY

 from sklearn.decomposition import FastICA, PCA
 from sklearn.manifold import TSNE
 from sklearn.preprocessing import Normalizer, StandardScaler
+from gedi.features import EventLogFeatures
+from gedi.plotter import ModelResultPlotter
+from gedi.utils.matrix_tools import insert_missing_data
 # TODO: Call param_keys explicitly e.g. import INPUT_PATH
 from utils.param_keys import *
 from utils.param_keys.analyser import MODEL, INPUT_PARAMS, PERPLEXITY

{tag → gedi}/augmentation.py RENAMED Viewed

@@ -3,7 +3,7 @@ from collections import Counter
 from datetime import datetime as dt
 from imblearn.over_sampling import SMOTE, SVMSMOTE, BorderlineSMOTE, KMeansSMOTE
 from sklearn.preprocessing import Normalizer
-from tag.utils.matrix_tools import insert_missing_data
 from utils.param_keys import INPUT_PATH, OUTPUT_PATH
 from utils.param_keys.augmentation import AUGMENTATION_PARAMS, NO_SAMPLES, FEATURE_SELECTION, METHOD

 from datetime import datetime as dt
 from imblearn.over_sampling import SMOTE, SVMSMOTE, BorderlineSMOTE, KMeansSMOTE
 from sklearn.preprocessing import Normalizer
+from gedi.utils.matrix_tools import insert_missing_data
 from utils.param_keys import INPUT_PATH, OUTPUT_PATH
 from utils.param_keys.augmentation import AUGMENTATION_PARAMS, NO_SAMPLES, FEATURE_SELECTION, METHOD

{tag → gedi}/benchmark.py RENAMED Viewed

@@ -16,7 +16,7 @@ from pm4py.algo.evaluation.generalization import algorithm as generalization_eva
 from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
 from pm4py.objects.bpmn.obj import BPMN
 from pm4py.objects.log.importer.xes import importer as xes_importer
-from tag.utils.io_helpers import dump_features_json
 from tqdm import tqdm
 from utils.param_keys import INPUT_PATH, OUTPUT_PATH
 from utils.param_keys.benchmark import MINERS
@@ -113,14 +113,14 @@ class BenchmarkTest:
         return
     def split_miner_wrapper(self, log_path="data/real_event_logs/BPI_Challenges/BPI_Challenge_2012.xes"):
-        jar_path = os.path.join("tag","libs","split-miner-1.7.1-all.jar")
         filename = os.path.split(log_path)[-1].rsplit(".",1)[0]
         bpmn_path = os.path.join("output", "bpmns_split", filename)
         os.makedirs(os.path.split(bpmn_path)[0], exist_ok=True)
         command = [
                 "java",
                 "-cp",
-                f"{os.getcwd()}/tag/libs/sm2.jar:{os.getcwd()}/tag/libs/lib/*",
                 "au.edu.unimelb.services.ServiceProvider",
                 "SM2",
                 f"{os.getcwd()}/{log_path}",

 from pm4py.algo.evaluation.simplicity import algorithm as simplicity_evaluator
 from pm4py.objects.bpmn.obj import BPMN
 from pm4py.objects.log.importer.xes import importer as xes_importer
+from gedi.utils.io_helpers import dump_features_json
 from tqdm import tqdm
 from utils.param_keys import INPUT_PATH, OUTPUT_PATH
 from utils.param_keys.benchmark import MINERS
         return
     def split_miner_wrapper(self, log_path="data/real_event_logs/BPI_Challenges/BPI_Challenge_2012.xes"):
+        jar_path = os.path.join("gedi","libs","split-miner-1.7.1-all.jar")
         filename = os.path.split(log_path)[-1].rsplit(".",1)[0]
         bpmn_path = os.path.join("output", "bpmns_split", filename)
         os.makedirs(os.path.split(bpmn_path)[0], exist_ok=True)
         command = [
                 "java",
                 "-cp",
+                f"{os.getcwd()}/gedi/libs/sm2.jar:{os.getcwd()}/tag/libs/lib/*",
                 "au.edu.unimelb.services.ServiceProvider",
                 "SM2",
                 f"{os.getcwd()}/{log_path}",

{tag → gedi}/features.py RENAMED Viewed

@@ -11,7 +11,7 @@ from pathlib import Path, PurePath
 from sklearn.impute import SimpleImputer
 from utils.param_keys import INPUT_PATH
 from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
-from tag.utils.io_helpers import dump_features_json
 def get_sortby_parameter(elem):
     number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])

 from sklearn.impute import SimpleImputer
 from utils.param_keys import INPUT_PATH
 from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
+from gedi.utils.io_helpers import dump_features_json
 def get_sortby_parameter(elem):
     number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])

{tag → gedi}/generator.py RENAMED Viewed

@@ -20,7 +20,7 @@ from pm4py.sim import play_out
 from smac import HyperparameterOptimizationFacade, Scenario
 from utils.param_keys import OUTPUT_PATH, INPUT_PATH
 from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
-from tag.utils.io_helpers import get_output_key_value_location, dump_features_json, read_csvs
@@ -73,7 +73,7 @@ def get_tasks(experiment, output_path="", reference_feature=None):
     return tasks, output_path
 class GenerateEventLogs():
-    # TODO: Clarify nomenclature: experiment, task, objective as in notebook (https://github.com/lmu-dbs/tag/blob/main/notebooks/grid_objectives.ipynb)
     def __init__(self, params):
         print("=========================== Generator ==========================")
         print(f"INFO: Running with {params}")

 from smac import HyperparameterOptimizationFacade, Scenario
 from utils.param_keys import OUTPUT_PATH, INPUT_PATH
 from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
+from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, read_csvs
     return tasks, output_path
 class GenerateEventLogs():
+    # TODO: Clarify nomenclature: experiment, task, objective as in notebook (https://github.com/lmu-dbs/gedi/blob/main/notebooks/grid_objectives.ipynb)
     def __init__(self, params):
         print("=========================== Generator ==========================")
         print(f"INFO: Running with {params}")

{tag → gedi}/plotter.py RENAMED Viewed

@@ -20,9 +20,9 @@ from collections import defaultdict
 from sklearn.preprocessing import Normalizer, StandardScaler
 from sklearn.decomposition import PCA
 from sklearn.metrics.pairwise import euclidean_distances
-from tag.generator import get_tasks
-from tag.utils.io_helpers import get_keys_abbreviation
-from tag.utils.io_helpers import read_csvs, select_instance
 def insert_newlines(string, every=140):
     return '\n'.join(string[i:i+every] for i in range(0, len(string), every))
@@ -331,6 +331,7 @@ class FeaturesPlotter:
             fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type})")
         if output_path != None:
             fig.savefig(output_path)
             print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
@@ -617,7 +618,7 @@ class AugmentationPlotter(object):
         if output_path != None:
             output_path += f"/augmentation_pca_{n_features}_{self.sampler}.jpg"
             fig.savefig(output_path)
-            print("SUCCESS: Saved augmentation pca plot at {output_path}")
 class GenerationPlotter(object):
@@ -672,7 +673,7 @@ class GenerationPlotter(object):
                 targets = orig_targets.copy()
             elif isinstance(orig_targets, defaultdict):
                 if k not in orig_targets:
-                    print("[WARNING] {k} not in targets. Only in generated features. Will continue with next feature to compare with")
                     continue
                 targets = orig_targets[k].copy()
             else:

 from sklearn.preprocessing import Normalizer, StandardScaler
 from sklearn.decomposition import PCA
 from sklearn.metrics.pairwise import euclidean_distances
+from gedi.generator import get_tasks
+from gedi.utils.io_helpers import get_keys_abbreviation
+from gedi.utils.io_helpers import read_csvs, select_instance
 def insert_newlines(string, every=140):
     return '\n'.join(string[i:i+every] for i in range(0, len(string), every))
             fig, output_path = eval(f"self.plot_violinplot_single(features, output_path, source='{source_name}' {plot_type})")
         if output_path != None:
+            os.makedirs(os.path.split(output_path)[0], exist_ok=True)
             fig.savefig(output_path)
             print(f"SUCCESS: Saved {plot_type} plot in {output_path}")
         if output_path != None:
             output_path += f"/augmentation_pca_{n_features}_{self.sampler}.jpg"
             fig.savefig(output_path)
+            print(f"SUCCESS: Saved augmentation pca plot at {output_path}")
 class GenerationPlotter(object):
                 targets = orig_targets.copy()
             elif isinstance(orig_targets, defaultdict):
                 if k not in orig_targets:
+                    print(f"[WARNING] {k} not in targets. Only in generated features. Will continue with next feature to compare with")
                     continue
                 targets = orig_targets[k].copy()
             else:

{tag → gedi}/utils/algorithms/__init__.py RENAMED Viewed

File without changes

{tag → gedi}/utils/algorithms/tsne.py RENAMED Viewed

File without changes

{tag → gedi}/utils/array_tools.py RENAMED Viewed

File without changes

{tag → gedi}/utils/io_helpers.py RENAMED Viewed

File without changes

{tag → gedi}/utils/matrix_tools.py RENAMED Viewed

File without changes

main.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import config
 import pandas as pd
 from datetime import datetime as dt
-from tag.generator import GenerateEventLogs
-from tag.features import EventLogFeatures
-from tag.analyser import FeatureAnalyser
-from tag.augmentation import InstanceAugmentator
-from tag.benchmark import BenchmarkTest
-from tag.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
 from utils.default_argparse import ArgParser
 from utils.param_keys import *
 from utils.param_keys.run_options import *
@@ -57,8 +57,8 @@ def run(kwargs:dict, model_paramas_list: list, filename_list:list):
 if __name__=='__main__':
-    start_tag = dt.now()
-    print(f'INFO: TAG starting {start_tag}')
     args = ArgParser().parse('GEDI main')
     run_params = config.get_run_params(args.run_params_json)
@@ -70,4 +70,4 @@ if __name__=='__main__':
     else:
         load(args.result_load_files, kwargs)
-    print(f'SUCCESS: TAG took {dt.now()-start_tag} sec.')

 import config
 import pandas as pd
 from datetime import datetime as dt
+from gedi.generator import GenerateEventLogs
+from gedi.features import EventLogFeatures
+from gedi.analyser import FeatureAnalyser
+from gedi.augmentation import InstanceAugmentator
+from gedi.benchmark import BenchmarkTest
+from gedi.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
 from utils.default_argparse import ArgParser
 from utils.param_keys import *
 from utils.param_keys.run_options import *
 if __name__=='__main__':
+    start_gedi = dt.now()
+    print(f'INFO: GEDI starting {start_gedi}')
     args = ArgParser().parse('GEDI main')
     run_params = config.get_run_params(args.run_params_json)
     else:
         load(args.result_load_files, kwargs)
+    print(f'SUCCESS: GEDI took {dt.now()-start_gedi} sec.')

notebooks/.ipynb_checkpoints/augmentation-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/benchmarking_process_discovery-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/bpic_generability_pdm-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/data_exploration-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/experiment_generator-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_distributions-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_exploration-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_performance_similarity-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_selection-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/feature_variance-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/gedi_representativeness-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/grid_objectives-checkpoint.ipynb DELETED Viewed

@@ -1,376 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "e5aa7223",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import numpy as np"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "dfd1a302",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.DataFrame(columns=[\"log\",\"ratio_top_20_variants\", \"normalized_sequence_entropy_linear_forgetting\"])    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "218946b7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "k=0\n",
-    "for i in np.arange(0.2, 1.1,0.2):\n",
-    "    for j in np.arange(0,0.55,0.1):\n",
-    "        k+=1\n",
-    "        new_entry = pd.Series({'log':f\"objective_{k}\", \"ratio_top_20_variants\":round(i,1),\n",
-    "                   \"normalized_sequence_entropy_linear_forgetting\":round(j,1)})\n",
-    "        df = pd.concat([\n",
-    "                df, \n",
-    "                pd.DataFrame([new_entry], columns=new_entry.index)]\n",
-    "           ).reset_index(drop=True)\n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "id": "b1e3bb5a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df.to_csv(\"../data/grid_objectives.csv\" ,index=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "5de45389",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>log</th>\n",
-       "      <th>ratio_top_20_variants</th>\n",
-       "      <th>normalized_sequence_entropy_linear_forgetting</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>objective_1</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>objective_2</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>objective_3</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>objective_4</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>objective_5</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>objective_6</td>\n",
-       "      <td>0.2</td>\n",
-       "      <td>0.5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>objective_7</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>objective_8</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>objective_9</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>objective_10</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>objective_11</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>objective_12</td>\n",
-       "      <td>0.4</td>\n",
-       "      <td>0.5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>objective_13</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>objective_14</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>objective_15</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>objective_16</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>objective_17</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>objective_18</td>\n",
-       "      <td>0.6</td>\n",
-       "      <td>0.5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>objective_19</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>objective_20</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>objective_21</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>objective_22</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>objective_23</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>objective_24</td>\n",
-       "      <td>0.8</td>\n",
-       "      <td>0.5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>24</th>\n",
-       "      <td>objective_25</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>25</th>\n",
-       "      <td>objective_26</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>26</th>\n",
-       "      <td>objective_27</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>27</th>\n",
-       "      <td>objective_28</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>28</th>\n",
-       "      <td>objective_29</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>29</th>\n",
-       "      <td>objective_30</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.5</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "             log  ratio_top_20_variants   \n",
-       "0    objective_1                    0.2  \\\n",
-       "1    objective_2                    0.2   \n",
-       "2    objective_3                    0.2   \n",
-       "3    objective_4                    0.2   \n",
-       "4    objective_5                    0.2   \n",
-       "5    objective_6                    0.2   \n",
-       "6    objective_7                    0.4   \n",
-       "7    objective_8                    0.4   \n",
-       "8    objective_9                    0.4   \n",
-       "9   objective_10                    0.4   \n",
-       "10  objective_11                    0.4   \n",
-       "11  objective_12                    0.4   \n",
-       "12  objective_13                    0.6   \n",
-       "13  objective_14                    0.6   \n",
-       "14  objective_15                    0.6   \n",
-       "15  objective_16                    0.6   \n",
-       "16  objective_17                    0.6   \n",
-       "17  objective_18                    0.6   \n",
-       "18  objective_19                    0.8   \n",
-       "19  objective_20                    0.8   \n",
-       "20  objective_21                    0.8   \n",
-       "21  objective_22                    0.8   \n",
-       "22  objective_23                    0.8   \n",
-       "23  objective_24                    0.8   \n",
-       "24  objective_25                    1.0   \n",
-       "25  objective_26                    1.0   \n",
-       "26  objective_27                    1.0   \n",
-       "27  objective_28                    1.0   \n",
-       "28  objective_29                    1.0   \n",
-       "29  objective_30                    1.0   \n",
-       "\n",
-       "    normalized_sequence_entropy_linear_forgetting  \n",
-       "0                                             0.0  \n",
-       "1                                             0.1  \n",
-       "2                                             0.2  \n",
-       "3                                             0.3  \n",
-       "4                                             0.4  \n",
-       "5                                             0.5  \n",
-       "6                                             0.0  \n",
-       "7                                             0.1  \n",
-       "8                                             0.2  \n",
-       "9                                             0.3  \n",
-       "10                                            0.4  \n",
-       "11                                            0.5  \n",
-       "12                                            0.0  \n",
-       "13                                            0.1  \n",
-       "14                                            0.2  \n",
-       "15                                            0.3  \n",
-       "16                                            0.4  \n",
-       "17                                            0.5  \n",
-       "18                                            0.0  \n",
-       "19                                            0.1  \n",
-       "20                                            0.2  \n",
-       "21                                            0.3  \n",
-       "22                                            0.4  \n",
-       "23                                            0.5  \n",
-       "24                                            0.0  \n",
-       "25                                            0.1  \n",
-       "26                                            0.2  \n",
-       "27                                            0.3  \n",
-       "28                                            0.4  \n",
-       "29                                            0.5  "
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "d726a5ae",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.9.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

notebooks/.ipynb_checkpoints/oversampling-checkpoint.ipynb DELETED Viewed

@@ -1,6 +0,0 @@
-{
- "cells": [],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 5
-}

notebooks/.ipynb_checkpoints/performance_feature_correlation-checkpoint.ipynb DELETED Viewed

@@ -1,6 +0,0 @@
-{
- "cells": [],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 5
-}

notebooks/.ipynb_checkpoints/pt_gen-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/.ipynb_checkpoints/statistics_tasks_to_datasets-checkpoint.ipynb DELETED Viewed

@@ -1,818 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "4827785f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Name</th>\n",
-       "      <th>Short description</th>\n",
-       "      <th>data link</th>\n",
-       "      <th>challenge link</th>\n",
-       "      <th>Citations (Stand Februar 2023)</th>\n",
-       "      <th>Publications</th>\n",
-       "      <th>Process Discovery/ Declarative</th>\n",
-       "      <th>Conformance Checking / Alignment / Replay</th>\n",
-       "      <th>Online / Streaming / Realtime</th>\n",
-       "      <th>Performance (Analysis) / Temporal / Time</th>\n",
-       "      <th>Predict(ive)/  Monitoring/ Prescriptive</th>\n",
-       "      <th>Trace clustering / Clustering</th>\n",
-       "      <th>Preprocessing / Event Abstraction / Event Data Correlation</th>\n",
-       "      <th>Further keywords:</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>Sepsis Cases - Event Log</td>\n",
-       "      <td>This real-life event log contains events of se...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Sepsis_Ca...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Sepsis_Ca...</td>\n",
-       "      <td>61</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>17</td>\n",
-       "      <td>7</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>8</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>(machine) learning, (online process) monitorin...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>BPI 2017 - Offer Log</td>\n",
-       "      <td>Contains data from a financial institute inclu...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2017:ch...</td>\n",
-       "      <td>4</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(machine) learning, cloud computing</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Road Traffic Fine Management Process (not BPI)</td>\n",
-       "      <td>A real-life event log taken from an informatio...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Road_Traf...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>95</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>32</td>\n",
-       "      <td>9</td>\n",
-       "      <td>4</td>\n",
-       "      <td>8</td>\n",
-       "      <td>15</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>alarm-based prescriptive process monitoring, b...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>BPI 2011</td>\n",
-       "      <td>Contains data from from a Dutch Academic Hospi...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Real-life...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2011:ch...</td>\n",
-       "      <td>57</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>13</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "      <td>12</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>(compliance) monitoring, (machine) learning, d...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>BPI 2012</td>\n",
-       "      <td>Contains the event log of an application proce...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2012:ch...</td>\n",
-       "      <td>151</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>40</td>\n",
-       "      <td>15</td>\n",
-       "      <td>4</td>\n",
-       "      <td>13</td>\n",
-       "      <td>46</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>(in)frequent patterns in process models, (mach...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>BPI 2013 - Open Problems</td>\n",
-       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/2013/challenge.html</td>\n",
-       "      <td>6</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(in)frequent patterns in process models, (mach...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>BPI 2013 - Closed Problems</td>\n",
-       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2013:ch...</td>\n",
-       "      <td>12</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>3</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>3</td>\n",
-       "      <td>(in)frequent patterns in process models</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>BPI 2013 - Incidents</td>\n",
-       "      <td>The log contains events from an incident and p...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/2013/challenge.html</td>\n",
-       "      <td>36</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>14</td>\n",
-       "      <td>5</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>7</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>(machine) learning, rule mining</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>BPI 2014 - Incident Records</td>\n",
-       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2014:ch...</td>\n",
-       "      <td>5</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>privacy preservation, security</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>BPI 2014 - Interaction Records</td>\n",
-       "      <td>Rabobank Group ICT implemented ITIL processes ...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2014:ch...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(machine) learning, hidden Markov models</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>BPI 2015 - Log 3</td>\n",
-       "      <td>Provided by 5 Dutch municipalities. The data c...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2015:ch...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>specification-driven predictive business proce...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>BPI 2015 - Log 1</td>\n",
-       "      <td>Provided by 5 Dutch municipalities. The data c...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2015:ch...</td>\n",
-       "      <td>8</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>3</td>\n",
-       "      <td>0</td>\n",
-       "      <td>3</td>\n",
-       "      <td>(machine) learning</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>BPI 2016 - Clicks Logged In</td>\n",
-       "      <td>Contains clicks of users that are logged in fr...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2016:ch...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>automation</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>BPI 2017 - Application Log</td>\n",
-       "      <td>Contains data from a financial institute inclu...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2017:ch...</td>\n",
-       "      <td>73</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>11</td>\n",
-       "      <td>5</td>\n",
-       "      <td>2</td>\n",
-       "      <td>14</td>\n",
-       "      <td>23</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>(machine) learning, alarm-based prescriptive p...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>BPI 2018</td>\n",
-       "      <td>The process covers the handling of application...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://www.win.tue.nl/bpi/doku.php?id=2018:ch...</td>\n",
-       "      <td>26</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>7</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>8</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>(machine) learning, automation</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>BPI 2020 - Travel Permits</td>\n",
-       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
-       "      <td>2</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>stage-based process performance analysis</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>16</th>\n",
-       "      <td>BPI 2019</td>\n",
-       "      <td>Contains the purchase order handling process o...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://icpmconference.org/2019/icpm-2019/cont...</td>\n",
-       "      <td>35</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>6</td>\n",
-       "      <td>6</td>\n",
-       "      <td>9</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>(online process) monitoring, remaining time pr...</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>17</th>\n",
-       "      <td>BPI 2020 - International Declarations</td>\n",
-       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
-       "      <td>2</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(machine) learning, remaining time prediction</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>BPI 2020 - Domestic Declarations</td>\n",
-       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
-       "      <td>7</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(machine) learning, remaining time prediction</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>19</th>\n",
-       "      <td>BPI 2020 - Prepaid Travel Cost</td>\n",
-       "      <td>Contains 2 years of data from the reimbursemen...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/BPI_Chall...</td>\n",
-       "      <td>https://icpmconference.org/2020/bpi-challenge/</td>\n",
-       "      <td>2</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>multi-perspective</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>Helpdesk</td>\n",
-       "      <td>Ticketing management process of the Help desk ...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Dataset_b...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>20</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>8</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(machine) learning, drift detection</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>21</th>\n",
-       "      <td>Receipt phase of an environmental permit appli...</td>\n",
-       "      <td>Data originates from the CoSeLoG project where...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Receipt_p...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>15</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Receipt_p...</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>-1</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>Environmental permit application process (‘WAB...</td>\n",
-       "      <td>Data originates from the CoSeLoG project where...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Environme...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>2</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>predictions with a-priori knowledge</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>23</th>\n",
-       "      <td>Environmental permit application process (‘WAB...</td>\n",
-       "      <td>Data originates from the CoSeLoG project where...</td>\n",
-       "      <td>https://data.4tu.nl/articles/dataset/Environme...</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>2</td>\n",
-       "      <td>https://app.dimensions.ai/discover/publication...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>multidimensional process mining, process cubes</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>24</th>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "      <td>NaN</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                                 Name  \\\n",
-       "0                            Sepsis Cases - Event Log   \n",
-       "1                                BPI 2017 - Offer Log   \n",
-       "2      Road Traffic Fine Management Process (not BPI)   \n",
-       "3                                            BPI 2011   \n",
-       "4                                            BPI 2012   \n",
-       "5                            BPI 2013 - Open Problems   \n",
-       "6                          BPI 2013 - Closed Problems   \n",
-       "7                                BPI 2013 - Incidents   \n",
-       "8                         BPI 2014 - Incident Records   \n",
-       "9                      BPI 2014 - Interaction Records   \n",
-       "10                                   BPI 2015 - Log 3   \n",
-       "11                                   BPI 2015 - Log 1   \n",
-       "12                        BPI 2016 - Clicks Logged In   \n",
-       "13                         BPI 2017 - Application Log   \n",
-       "14                                           BPI 2018   \n",
-       "15                          BPI 2020 - Travel Permits   \n",
-       "16                                           BPI 2019   \n",
-       "17              BPI 2020 - International Declarations   \n",
-       "18                   BPI 2020 - Domestic Declarations   \n",
-       "19                     BPI 2020 - Prepaid Travel Cost   \n",
-       "20                                           Helpdesk   \n",
-       "21  Receipt phase of an environmental permit appli...   \n",
-       "22  Environmental permit application process (‘WAB...   \n",
-       "23  Environmental permit application process (‘WAB...   \n",
-       "24                                                NaN   \n",
-       "\n",
-       "                                    Short description  \\\n",
-       "0   This real-life event log contains events of se...   \n",
-       "1   Contains data from a financial institute inclu...   \n",
-       "2   A real-life event log taken from an informatio...   \n",
-       "3   Contains data from from a Dutch Academic Hospi...   \n",
-       "4   Contains the event log of an application proce...   \n",
-       "5   Rabobank Group ICT implemented ITIL processes ...   \n",
-       "6   Rabobank Group ICT implemented ITIL processes ...   \n",
-       "7   The log contains events from an incident and p...   \n",
-       "8   Rabobank Group ICT implemented ITIL processes ...   \n",
-       "9   Rabobank Group ICT implemented ITIL processes ...   \n",
-       "10  Provided by 5 Dutch municipalities. The data c...   \n",
-       "11  Provided by 5 Dutch municipalities. The data c...   \n",
-       "12  Contains clicks of users that are logged in fr...   \n",
-       "13  Contains data from a financial institute inclu...   \n",
-       "14  The process covers the handling of application...   \n",
-       "15  Contains 2 years of data from the reimbursemen...   \n",
-       "16  Contains the purchase order handling process o...   \n",
-       "17  Contains 2 years of data from the reimbursemen...   \n",
-       "18  Contains 2 years of data from the reimbursemen...   \n",
-       "19  Contains 2 years of data from the reimbursemen...   \n",
-       "20  Ticketing management process of the Help desk ...   \n",
-       "21  Data originates from the CoSeLoG project where...   \n",
-       "22  Data originates from the CoSeLoG project where...   \n",
-       "23  Data originates from the CoSeLoG project where...   \n",
-       "24                                                NaN   \n",
-       "\n",
-       "                                            data link  \\\n",
-       "0   https://data.4tu.nl/articles/dataset/Sepsis_Ca...   \n",
-       "1   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "2   https://data.4tu.nl/articles/dataset/Road_Traf...   \n",
-       "3   https://data.4tu.nl/articles/dataset/Real-life...   \n",
-       "4   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "5   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "6   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "7   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "8   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "9   https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "10  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "11  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "12  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "13  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "14  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "15  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "16  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "17  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "18  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "19  https://data.4tu.nl/articles/dataset/BPI_Chall...   \n",
-       "20  https://data.4tu.nl/articles/dataset/Dataset_b...   \n",
-       "21  https://data.4tu.nl/articles/dataset/Receipt_p...   \n",
-       "22  https://data.4tu.nl/articles/dataset/Environme...   \n",
-       "23  https://data.4tu.nl/articles/dataset/Environme...   \n",
-       "24                                                NaN   \n",
-       "\n",
-       "                                       challenge link  \\\n",
-       "0   https://data.4tu.nl/articles/dataset/Sepsis_Ca...   \n",
-       "1   https://www.win.tue.nl/bpi/doku.php?id=2017:ch...   \n",
-       "2                                                 NaN   \n",
-       "3   https://www.win.tue.nl/bpi/doku.php?id=2011:ch...   \n",
-       "4   https://www.win.tue.nl/bpi/doku.php?id=2012:ch...   \n",
-       "5      https://www.win.tue.nl/bpi/2013/challenge.html   \n",
-       "6   https://www.win.tue.nl/bpi/doku.php?id=2013:ch...   \n",
-       "7      https://www.win.tue.nl/bpi/2013/challenge.html   \n",
-       "8   https://www.win.tue.nl/bpi/doku.php?id=2014:ch...   \n",
-       "9   https://www.win.tue.nl/bpi/doku.php?id=2014:ch...   \n",
-       "10  https://www.win.tue.nl/bpi/doku.php?id=2015:ch...   \n",
-       "11  https://www.win.tue.nl/bpi/doku.php?id=2015:ch...   \n",
-       "12  https://www.win.tue.nl/bpi/doku.php?id=2016:ch...   \n",
-       "13  https://www.win.tue.nl/bpi/doku.php?id=2017:ch...   \n",
-       "14  https://www.win.tue.nl/bpi/doku.php?id=2018:ch...   \n",
-       "15     https://icpmconference.org/2020/bpi-challenge/   \n",
-       "16  https://icpmconference.org/2019/icpm-2019/cont...   \n",
-       "17     https://icpmconference.org/2020/bpi-challenge/   \n",
-       "18     https://icpmconference.org/2020/bpi-challenge/   \n",
-       "19     https://icpmconference.org/2020/bpi-challenge/   \n",
-       "20                                                NaN   \n",
-       "21                                                NaN   \n",
-       "22                                                NaN   \n",
-       "23                                                NaN   \n",
-       "24                                                NaN   \n",
-       "\n",
-       "   Citations (Stand Februar 2023)  \\\n",
-       "0                              61   \n",
-       "1                               4   \n",
-       "2                              95   \n",
-       "3                              57   \n",
-       "4                             151   \n",
-       "5                               6   \n",
-       "6                              12   \n",
-       "7                              36   \n",
-       "8                               5   \n",
-       "9                               1   \n",
-       "10                              1   \n",
-       "11                              8   \n",
-       "12                              1   \n",
-       "13                             73   \n",
-       "14                             26   \n",
-       "15                              2   \n",
-       "16                             35   \n",
-       "17                              2   \n",
-       "18                              7   \n",
-       "19                              2   \n",
-       "20                             20   \n",
-       "21                             15   \n",
-       "22                              2   \n",
-       "23                              2   \n",
-       "24                            NaN   \n",
-       "\n",
-       "                                         Publications  \\\n",
-       "0   https://app.dimensions.ai/discover/publication...   \n",
-       "1   https://app.dimensions.ai/discover/publication...   \n",
-       "2   https://app.dimensions.ai/discover/publication...   \n",
-       "3   https://app.dimensions.ai/discover/publication...   \n",
-       "4   https://app.dimensions.ai/discover/publication...   \n",
-       "5   https://app.dimensions.ai/discover/publication...   \n",
-       "6   https://app.dimensions.ai/discover/publication...   \n",
-       "7   https://app.dimensions.ai/discover/publication...   \n",
-       "8   https://app.dimensions.ai/discover/publication...   \n",
-       "9   https://app.dimensions.ai/discover/publication...   \n",
-       "10  https://app.dimensions.ai/discover/publication...   \n",
-       "11  https://app.dimensions.ai/discover/publication...   \n",
-       "12  https://app.dimensions.ai/discover/publication...   \n",
-       "13  https://app.dimensions.ai/discover/publication...   \n",
-       "14  https://app.dimensions.ai/discover/publication...   \n",
-       "15  https://app.dimensions.ai/discover/publication...   \n",
-       "16  https://app.dimensions.ai/discover/publication...   \n",
-       "17  https://app.dimensions.ai/discover/publication...   \n",
-       "18  https://app.dimensions.ai/discover/publication...   \n",
-       "19  https://app.dimensions.ai/discover/publication...   \n",
-       "20  https://app.dimensions.ai/discover/publication...   \n",
-       "21  https://data.4tu.nl/articles/dataset/Receipt_p...   \n",
-       "22  https://app.dimensions.ai/discover/publication...   \n",
-       "23  https://app.dimensions.ai/discover/publication...   \n",
-       "24                                                NaN   \n",
-       "\n",
-       "   Process Discovery/ Declarative Conformance Checking / Alignment / Replay  \\\n",
-       "0                              17                                         7   \n",
-       "1                               1                                         0   \n",
-       "2                              32                                         9   \n",
-       "3                              13                                         1   \n",
-       "4                              40                                        15   \n",
-       "5                               1                                         0   \n",
-       "6                               3                                         2   \n",
-       "7                              14                                         5   \n",
-       "8                               1                                         0   \n",
-       "9                               0                                         0   \n",
-       "10                              0                                         0   \n",
-       "11                              1                                         1   \n",
-       "12                              1                                         0   \n",
-       "13                             11                                         5   \n",
-       "14                              7                                         1   \n",
-       "15                              0                                         0   \n",
-       "16                              3                                         1   \n",
-       "17                              0                                         0   \n",
-       "18                              0                                         2   \n",
-       "19                              0                                         0   \n",
-       "20                              4                                         1   \n",
-       "21                             -1                                        -1   \n",
-       "22                              0                                         0   \n",
-       "23                              1                                         0   \n",
-       "24                            NaN                                       NaN   \n",
-       "\n",
-       "   Online / Streaming / Realtime Performance (Analysis) / Temporal / Time  \\\n",
-       "0                              4                                        1   \n",
-       "1                              0                                        1   \n",
-       "2                              4                                        8   \n",
-       "3                              3                                        4   \n",
-       "4                              4                                       13   \n",
-       "5                              0                                        0   \n",
-       "6                              1                                        2   \n",
-       "7                              1                                        1   \n",
-       "8                              0                                        0   \n",
-       "9                              0                                        0   \n",
-       "10                             0                                        0   \n",
-       "11                             0                                        0   \n",
-       "12                             1                                        0   \n",
-       "13                             2                                       14   \n",
-       "14                             2                                        0   \n",
-       "15                             0                                        1   \n",
-       "16                             6                                        6   \n",
-       "17                             0                                        1   \n",
-       "18                             2                                        2   \n",
-       "19                             0                                        0   \n",
-       "20                             3                                        1   \n",
-       "21                            -1                                       -1   \n",
-       "22                             0                                        0   \n",
-       "23                             0                                        0   \n",
-       "24                           NaN                                      NaN   \n",
-       "\n",
-       "   Predict(ive)/  Monitoring/ Prescriptive Trace clustering / Clustering  \\\n",
-       "0                                        8                             2   \n",
-       "1                                        1                             0   \n",
-       "2                                       15                             1   \n",
-       "3                                       12                             4   \n",
-       "4                                       46                             0   \n",
-       "5                                        1                             0   \n",
-       "6                                        0                             0   \n",
-       "7                                        7                             0   \n",
-       "8                                        0                             0   \n",
-       "9                                        0                             0   \n",
-       "10                                       1                             0   \n",
-       "11                                       3                             0   \n",
-       "12                                       0                             0   \n",
-       "13                                      23                             1   \n",
-       "14                                       8                             0   \n",
-       "15                                       0                             0   \n",
-       "16                                       9                             4   \n",
-       "17                                       2                             0   \n",
-       "18                                       3                             0   \n",
-       "19                                       0                             0   \n",
-       "20                                       8                             0   \n",
-       "21                                      -1                            -1   \n",
-       "22                                       1                             0   \n",
-       "23                                       0                             0   \n",
-       "24                                     NaN                           NaN   \n",
-       "\n",
-       "   Preprocessing / Event Abstraction / Event Data Correlation  \\\n",
-       "0                                                   2           \n",
-       "1                                                   0           \n",
-       "2                                                   2           \n",
-       "3                                                   1           \n",
-       "4                                                   1           \n",
-       "5                                                   0           \n",
-       "6                                                   3           \n",
-       "7                                                   2           \n",
-       "8                                                   0           \n",
-       "9                                                   0           \n",
-       "10                                                  0           \n",
-       "11                                                  3           \n",
-       "12                                                  0           \n",
-       "13                                                  1           \n",
-       "14                                                  2           \n",
-       "15                                                  0           \n",
-       "16                                                  1           \n",
-       "17                                                  0           \n",
-       "18                                                  0           \n",
-       "19                                                  0           \n",
-       "20                                                  0           \n",
-       "21                                                 -1           \n",
-       "22                                                  0           \n",
-       "23                                                  0           \n",
-       "24                                                NaN           \n",
-       "\n",
-       "                                    Further keywords:  \n",
-       "0   (machine) learning, (online process) monitorin...  \n",
-       "1                 (machine) learning, cloud computing  \n",
-       "2   alarm-based prescriptive process monitoring, b...  \n",
-       "3   (compliance) monitoring, (machine) learning, d...  \n",
-       "4   (in)frequent patterns in process models, (mach...  \n",
-       "5   (in)frequent patterns in process models, (mach...  \n",
-       "6             (in)frequent patterns in process models  \n",
-       "7                     (machine) learning, rule mining  \n",
-       "8                      privacy preservation, security  \n",
-       "9            (machine) learning, hidden Markov models  \n",
-       "10  specification-driven predictive business proce...  \n",
-       "11                                 (machine) learning  \n",
-       "12                                         automation  \n",
-       "13  (machine) learning, alarm-based prescriptive p...  \n",
-       "14                     (machine) learning, automation  \n",
-       "15           stage-based process performance analysis  \n",
-       "16  (online process) monitoring, remaining time pr...  \n",
-       "17      (machine) learning, remaining time prediction  \n",
-       "18      (machine) learning, remaining time prediction  \n",
-       "19                                  multi-perspective  \n",
-       "20                (machine) learning, drift detection  \n",
-       "21                                                NaN  \n",
-       "22                predictions with a-priori knowledge  \n",
-       "23     multidimensional process mining, process cubes  \n",
-       "24                                                NaN  "
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "#import pm4py\n",
-    "import pandas as pd\n",
-    "INPUT_PATH = \"../data/mappings.csv\"\n",
-    "df = pd.read_csv(INPUT_PATH, sep = \";\", dtype = \"unicode\")\n",
-    "df"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "04a97f37",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

notebooks/.ipynb_checkpoints/test_feed-checkpoint.ipynb DELETED Viewed

The diff for this file is too large to render. See raw diff

notebooks/benchmarking_process_discovery.ipynb CHANGED Viewed

@@ -1277,7 +1277,7 @@
     "\n",
     "import sys\n",
     "import os\n",
-    "sys.path.append(os.path.dirname(\"../tag/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "print(benchmarked_ft.shape, benchmarked_pd.shape)\n",
@@ -1422,7 +1422,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
   }
  },
  "nbformat": 4,

     "\n",
     "import sys\n",
     "import os\n",
+    "sys.path.append(os.path.dirname(\"../gedi/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "print(benchmarked_ft.shape, benchmarked_pd.shape)\n",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

notebooks/bpic_generability_pdm.ipynb CHANGED Viewed

@@ -1223,7 +1223,7 @@
     "from scipy.stats import pearsonr\n",
     "import sys\n",
     "import os\n",
-    "sys.path.append(os.path.dirname(\"../tag/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "\n",

     "from scipy.stats import pearsonr\n",
     "import sys\n",
     "import os\n",
+    "sys.path.append(os.path.dirname(\"../gedi/utils/io_helpers.py\"))\n",
     "from io_helpers import get_keys_abbreviation\n",
     "\n",
     "\n",

notebooks/experiment_generator.ipynb CHANGED Viewed

@@ -2225,7 +2225,7 @@
    ],
    "source": [
     "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n",
-    "#bpic_features = pd.read_csv(\"../tag/output/features/real_event_logs.csv\", index_col=None)\n",
     "\n",
     "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n",
     "print(bpic_features.shape)\n",
@@ -3102,7 +3102,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.7"
   }
  },
  "nbformat": 4,

    ],
    "source": [
     "bpic_features = pd.read_csv(\"../data/34_bpic_features.csv\", index_col=None)\n",
+    "#bpic_features = pd.read_csv(\"../gedi/output/features/real_event_logs.csv\", index_col=None)\n",
     "\n",
     "#bpic_features = bpic_features.drop(['Unnamed: 0'], axis=1)\n",
     "print(bpic_features.shape)\n",
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

notebooks/feature_distributions.ipynb CHANGED Viewed

@@ -1847,7 +1847,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
   }
  },
  "nbformat": 4,

    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,