Spaces:

andreamalhera
/

igedi

Sleeping

App Files Files Community

Andrea MH commited on Sep 26, 2024

Commit

1d56290

unverified ·

2 Parent(s): 3be42cd bf97d18

Merge pull request #24 from lmu-dbs/demo-icpm24

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.github/workflows/huggingface.yml +28 -0
.github/workflows/pypi_release.yml +101 -0
.github/workflows/test_gedi.yml +50 -10
README.md +288 -23
config.py +6 -69
config_files/config_layout.json +48 -0
config_files/{algorithm/experiment_real_targets.json → experiment_real_targets.json} +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_ense_enseef.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_ense_enself.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_ense_enve.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_ense_rmcv.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_ense_rt10v.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_ense_rvpnot.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enseef_enself.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enseef_enve.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enseef_rmcv.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enseef_rt10v.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enseef_rvpnot.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enself_enve.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enself_rmcv.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enself_rt10v.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enself_rvpnot.json +0 -0
config_files/grid_2obj/generator_grid_2objectives_enve_mvo.json +1 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enve_rmcv.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enve_rt10v.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enve_rvpnot.json +0 -0
config_files/grid_2obj/generator_grid_2objectives_enve_sam.json +1 -0
config_files/grid_2obj/generator_grid_2objectives_mvo_sam.json +1 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_rmcv_rt10v.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_rmcv_rvpnot.json +0 -0
config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_rt10v_rvpnot.json +0 -0
config_files/options/baseline.json +0 -9
config_files/options/run_params.json +0 -9
config_files/{algorithm/pipeline_steps → pipeline_steps}/augmentation.json +0 -0
config_files/{algorithm/pipeline_steps → pipeline_steps}/benchmark.json +1 -1
config_files/{algorithm/pipeline_steps → pipeline_steps}/evaluation_plotter.json +0 -0
config_files/{algorithm/pipeline_steps → pipeline_steps}/feature_extraction.json +0 -0
config_files/{algorithm/pipeline_steps → pipeline_steps}/generation.json +0 -0
config_files/{algorithm → test}/experiment_test.json +0 -0
config_files/{algorithm/test → test}/generator_2bpic_2objectives_ense_enseef.json +0 -0
config_files/{algorithm/test → test}/generator_grid_1objectives_rt10v.json +0 -0
config_files/{algorithm/test → test}/generator_grid_2objectives_ense_enself.json +0 -0
config_files/test/test_abbrv_generation.json +16 -0
data/test/grid_experiments/rt10v.csv +0 -12
data/test/grid_feat.csv +2 -0
data/test/igedi_table_1.csv +4 -0
data/validation/2_ense_rmcv_feat.csv +4 -0
data/validation/genELexperiment1_04_02.json +1 -1
data/validation/genELexperiment3_04_nan.json +1 -0
data/validation/genELexperiment4_nan_02.json +1 -0

.github/workflows/huggingface.yml ADDED Viewed

	@@ -0,0 +1,28 @@

+name: Sync to Hugging Face hub
+on:
+  push:
+    branches:
+      - main
+      - demo-icpm24
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check large files
+        uses: ActionsDesk/[email protected]
+        with:
+          filesizelimit: 10485760 # this is 10MB so we can sync to HF Spaces
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Set current branch as a variable
+        id: set_branch
+        run: echo "CURRENT_BRANCH=${GITHUB_REF##*/}" >> $GITHUB_ENV
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push --force https://andreamalhera:[email protected]/spaces/andreamalhera/igedi $CURRENT_BRANCH:main

.github/workflows/pypi_release.yml ADDED Viewed

	@@ -0,0 +1,101 @@

+name: Publish Python 🐍 distribution 📦 to PyPI
+on:
+  push:
+    tags:
+      - 'v*.*.*'  # Triggers the workflow when a new version tag is pushed
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - name: Check out the code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.x'  # Specify your Python version
+    - name: Install pypa/build
+      run: >-
+        python3 -m
+        pip install
+        build
+        --user
+    - name: Build a binary wheel and a source tarball
+      run: python3 -m build
+    - name: Store the distribution packages
+      uses: actions/upload-artifact@v3
+      with:
+        name: python-package-distributions
+        path: dist/
+  publish-to-pypi:
+    name: >-
+      Publish Python 🐍 distribution 📦 to PyPI
+    if: startsWith(github.ref, 'refs/tags/')  # only publish to PyPI on tag pushes
+    needs:
+    - build
+    runs-on: ubuntu-latest
+    environment:
+      name: pypi
+      url: https://pypi.org/p/GEDI
+    permissions:
+      id-token: write  # IMPORTANT: mandatory for trusted publishing
+    steps:
+    - name: Download all the dists
+      uses: actions/download-artifact@v3
+      with:
+        name: python-package-distributions
+        path: dist/
+    - name: Publish distribution 📦 to PyPI
+      uses: pypa/gh-action-pypi-publish@release/v1
+  github-release:
+    name: >-
+      Sign the Python 🐍 distribution 📦 with Sigstore
+      and upload them to GitHub Release
+    needs:
+    - publish-to-pypi
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write  # IMPORTANT: mandatory for making GitHub Releases
+      id-token: write  # IMPORTANT: mandatory for sigstore
+    steps:
+    - name: Download all the dists
+      uses: actions/download-artifact@v3
+      with:
+        name: python-package-distributions
+        path: dist/
+    - name: Sign the dists with Sigstore
+      uses: sigstore/[email protected]
+      with:
+        inputs: >-
+          ./dist/*.tar.gz
+          ./dist/*.whl
+    - name: Create GitHub Release
+      env:
+        GITHUB_TOKEN: ${{ github.token }}
+      run: >-
+        gh release create
+        '${{ github.ref_name }}'
+        --repo '${{ github.repository }}'
+        --notes ""
+    - name: Upload artifact signatures to GitHub Release
+      env:
+        GITHUB_TOKEN: ${{ github.token }}
+      # Upload to GitHub Release using the `gh` CLI.
+      # `dist/` contains the built packages, and the
+      # sigstore-produced signatures and certificates.
+      run: >-
+        gh release upload
+        '${{ github.ref_name }}' dist/**
+        --repo '${{ github.repository }}'
+    - name: Cleanup
+      run: rm -rf dist

.github/workflows/test_gedi.yml CHANGED Viewed

@@ -31,7 +31,7 @@ jobs:
     - name: Run test
       run:
-        python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/feature_extraction.json
     - name: Compare output
       run: diff data/validation/test_feat.csv data/test_feat.csv
@@ -60,15 +60,23 @@ jobs:
     - name: Run test
       run:
-        python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/generation.json
     - name: Compare output 1
       run:
-        diff data/validation/genELexperiment2_07_04.json output/features/grid_feat/2_enself_rt20v/genELexperiment2_07_04.json
     - name: Compare output 2
       run:
-        diff data/validation/genELexperiment1_04_02.json output/features/grid_feat/2_enself_rt20v/genELexperiment1_04_02.json
   test_benchmark:
     runs-on: ubuntu-latest
@@ -90,10 +98,12 @@ jobs:
     - name: Run test
       run:
-        python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/benchmark.json
     - name: Convert output and validation to same encoding
-      run: iconv -f UTF-8 -t ASCII output/benchmark/test_benchmark.csv > data/validation/test_benchmark.csv
     - name: Compare output
       run: diff data/validation/test_benchmark.csv output/benchmark/test_benchmark.csv
@@ -118,7 +128,7 @@ jobs:
     - name: Run test
       run:
-        python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/augmentation.json
   test_evaluation-plotter:
     runs-on: ubuntu-latest
@@ -144,7 +154,7 @@ jobs:
     - name: Run test
       run:
-        python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/evaluation_plotter.json
   test_integration:
     runs-on: ubuntu-latest
@@ -170,7 +180,7 @@ jobs:
     - name: Run test
       run:
-        python main.py -o config_files/options/baseline.json -a config_files/algorithm/experiment_test.json
   test_grid_experiments_script:
     runs-on: ubuntu-latest
@@ -196,10 +206,40 @@ jobs:
     - name: Run test
       run:
-        python execute_grid_experiments.py config_files/algorithm/test
     - name: Convert output and validation to same encoding
       run: iconv -f UTF-8 -t ASCII output/features/generated/2_bpic_features/2_ense_enseef_feat.csv > data/validation/2_ense_enseef_feat.csv
     - name: Compare output
       run: diff data/validation/2_ense_enseef_feat.csv output/features/generated/2_bpic_features/2_ense_enseef_feat.csv

     - name: Run test
       run:
+        python main.py -a config_files/pipeline_steps/feature_extraction.json
     - name: Compare output
       run: diff data/validation/test_feat.csv data/test_feat.csv
     - name: Run test
       run:
+        python main.py -a config_files/pipeline_steps/generation.json
     - name: Compare output 1
       run:
+        diff data/validation/genELexperiment1_04_02.json output/features/grid_feat/2_enself_rt20v/genELexperiment1_04_02.json
     - name: Compare output 2
       run:
+        diff data/validation/genELexperiment2_07_04.json output/features/grid_feat/2_enself_rt20v/genELexperiment2_07_04.json
+    - name: Compare output 3
+      run:
+        diff data/validation/genELexperiment3_04_nan.json output/features/grid_feat/2_enself_rt20v/genELexperiment3_04_nan.json
+    - name: Compare output 4
+      run:
+        diff data/validation/genELexperiment4_nan_02.json output/features/grid_feat/2_enself_rt20v/genELexperiment4_nan_02.json
   test_benchmark:
     runs-on: ubuntu-latest
     - name: Run test
       run:
+        python main.py -a config_files/pipeline_steps/benchmark.json
     - name: Convert output and validation to same encoding
+      run: |
+        iconv -f UTF-8 -t ASCII data/validation/test_benchmark.csv > data/validation/test_benchmark.csv
+        iconv -f UTF-8 -t ASCII output/benchmark/test_benchmark.csv > output/benchmark/test_benchmark.csv
     - name: Compare output
       run: diff data/validation/test_benchmark.csv output/benchmark/test_benchmark.csv
     - name: Run test
       run:
+        python main.py -a config_files/pipeline_steps/augmentation.json
   test_evaluation-plotter:
     runs-on: ubuntu-latest
     - name: Run test
       run:
+        python main.py -a config_files/pipeline_steps/evaluation_plotter.json
   test_integration:
     runs-on: ubuntu-latest
     - name: Run test
       run:
+        python main.py -a config_files/test/experiment_test.json
   test_grid_experiments_script:
     runs-on: ubuntu-latest
     - name: Run test
       run:
+        python gedi/utils/execute_grid_experiments.py config_files/test
     - name: Convert output and validation to same encoding
       run: iconv -f UTF-8 -t ASCII output/features/generated/2_bpic_features/2_ense_enseef_feat.csv > data/validation/2_ense_enseef_feat.csv
     - name: Compare output
       run: diff data/validation/2_ense_enseef_feat.csv output/features/generated/2_bpic_features/2_ense_enseef_feat.csv
+  test_abbrv:
+    runs-on: ubuntu-latest
+    # Setting up a python envronment  for the test script to run
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.9
+    - name: Install dependencies
+      run: |
+        sudo apt-get install build-essential python3 python3-dev
+    - name: Install feeed
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+    - name: Run test
+      run:
+        python main.py -a config_files/test/test_abbrv_generation.json
+    - name: Compare output
+      run:
+        diff data/validation/2_ense_rmcv_feat.csv output/test/igedi_table_1/2_ense_rmcv_feat.csv

README.md CHANGED Viewed

@@ -1,15 +1,36 @@
-# GEDI
-**G**enerating **E**vent **D**ata with **I**ntentional Features for Benchmarking Process Mining<br />
-Codebase for the [GEDI paper](https://mcml.ai/publications/gedi.pdf) published at the [BPM'24 conference proceedings](https://link.springer.com/book/10.1007/978-3-031-70396-6).
 ## Table of Contents
-- [Requirements](#requirements)
 - [Installation](#installation)
 - [General Usage](#general-usage)
 - [Experiments](#experiments)
 - [Citation](#citation)
 ## Requirements
 - [Miniconda](https://docs.conda.io/en/latest/miniconda.html)
 - Graphviz on your OS e.g.
@@ -28,7 +49,7 @@ conda install pyrfr swig
 ### Startup
 ```console
 conda activate gedi
-python main.py -o config_files/options/baseline.json -a config_files/algorithm/experiment_test.json
 ```
 The last step should take only a few minutes to run.
@@ -42,18 +63,18 @@ Our pipeline offers several pipeline steps, which can be run sequentially or par
 To run different steps of the GEDI pipeline, please adapt the `.json` accordingly.
 ```console
 conda activate gedi
-python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/<pipeline-step>.json
 ```
-For reference of possible keys and values for each step, please see `config_files/algorithm/experiment_test.json`.
 To run the whole pipeline please create a new `.json` file, specifying all steps you want to run and specify desired keys and values for each step.
-To reproduce results from out paper, please refer to [Experiments](#experiments).
 ### Feature Extraction
 ---
 To extract the features on the event-log level and use them for hyperparameter optimization, we employ the following script:
 ```console
 conda activate gedi
-python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/feature_extraction.json
 ```
 The JSON file consists of the following key-value pairs:
@@ -64,8 +85,7 @@ The JSON file consists of the following key-value pairs:
 - real_eventlog_path: defines the file with the features extracted from the real event logs
 - plot_type: defines the style of the output plotting (possible values: violinplot, boxplot)
 - font_size: label font size of the output plot
-- boxplot_widht: width of the violinplot/boxplot
 ### Generation
 ---
@@ -75,7 +95,7 @@ The command to execute the generation step is given by a exemplarily generation.
 ```console
 conda activate gedi
-python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/generation.json
 ```
 In the `generation.json`, we have the following key-value pairs:
@@ -102,12 +122,228 @@ In the `generation.json`, we have the following key-value pairs:
     - plot_reference_feature: defines the feature, which is used on the x-axis on the output plots, i.e., each feature defined in the 'objectives' of the 'experiment' is plotted against the reference feature being defined in this value
 ### Benchmark
 The benchmarking defines the downstream task which is used for evaluating the goodness of the synthesized event log datasets with the metrics of real-world datasets. The command to execute a benchmarking is shown in the following script:
 ```console
 conda activate gedi
-python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/benchmark.json
 ```
 In the `benchmark.json`, we have the following key-value pairs:
@@ -125,7 +361,7 @@ The purpose of the evaluation plotting step is used just for visualization. Some
 ```console
 conda activate gedi
-python main.py -o config_files/options/baseline.json -a config_files/algorithm/pipeline_steps/evaluation_plotter.json
 ```
 Generally, in the `evaluation_plotter.json`, we have the following key-value pairs:
@@ -141,26 +377,35 @@ In this repository, experiments can be run selectively or from scratch, as prefe
 We present two settings for generating intentional event logs, using [real targets](#generating-data-with-real-targets) or using [grid targets](#generating-data-with-grid-targets). Both settings output `.xes` event logs, `.json` and `.csv` files containing feature values, as well as evaluation results, from running a [process discovery benchmark](#benchmark), for the generated event logs.
 ### Generating data with real targets
-To execute the experiments with real targets, we employ the [experiment_real_targets.json](config_files/algorithm/experiment_real_targets.json). The script's pipeline will output the [generated event logs (GenBaselineED)](data/event_logs/GenBaselineED), which optimize their feature values towards [real-world event data features](data/BaselineED_feat.csv), alongside their respectively measured [feature values](data/GenBaselineED_feat.csv) and [benchmark metrics values](data/GenBaselineED_bench.csv).
 ```console
 conda activate gedi
-python main.py -o config_files/options/baseline.json -a config_files/algorithm/experiment_real_targets.json
 ```
 ### Generating data with grid targets
-To execute the experiments with grid targets, a single [configuration](config_files/algorithm/grid_2obj) can be selected or all [grid objectives](data/grid_2obj) can be run with one command using the following script. This script will output the [generated event logs (GenED)](data/event_logs/GenED), alongside their respectively measured [feature values](data/GenED_feat.csv) and [benchmark metrics values](data/GenED_bench.csv).
 ```
 conda activate gedi
-python execute_grid_experiments.py config_files/algorithm/grid_2obj
 ```
-We employ the [experiment_grid_2obj_configfiles_fabric.ipynb](notebooks/experiment_grid_2obj_configfiles_fabric.ipynb) to create all necessary [configuration](config_files/algorithm/grid_2obj) and [objective](data/grid_2obj) files for this experiment. For more details about these config_files, please refer to [Feature Extraction](#feature-extraction), [Generation](#generation), and [Benchmark](#benchmark).
 ### Visualizations
 To run the visualizations, we employ [jupyter notebooks](https://jupyter.org/install) and [add the installed environment to the jupyter notebook](https://medium.com/@nrk25693/how-to-add-your-conda-environment-to-your-jupyter-notebook-in-just-4-steps-abeab8b8d084). We then start all visualizations by running e.g.: `jupyter noteboook`. In the following, we describe the `.ipynb`-files in the folder `\notebooks` to reproduce the figures from our paper.
 #### [Fig. 4 and fig. 5 Representativeness](notebooks/gedi_figs4and5_representativeness.ipynb)
-To visualize the coverage of the feasible feature space of generated event logs compared to existing real-world benchmark datasets, in this notebook, we conduct a principal component analysis on the features of both settings. The first two principal components are utilized to visualize the coverage which is further highlighted by computing a convex hull of the 2D mapping.  Additionally, we visualize the distribution of each meta feature we used in the paper as a boxplot. Additional features can be extracted with FEEED. Therefore, the notebook contains the figures 4 and 5 in the paper.
 #### [Fig. 6 Benchmark Boxplots](notebooks/gedi_fig6_benchmark_boxplots.ipynb)
 This notebook is used to visualize the metric distribution of real event logs compared to the generated ones. It shows 5 different metrics on 3 various process discovery techniques. We use 'fitness,', 'precision', 'fscore', 'size', 'cfc' (control-flow complexity) as metrics and as 'heuristic miner', 'ilp' (integer linear programming), and 'imf' (inductive miner infrequent) as miners. The notebook outputs the visualization shown in Fig.6 in the paper.
@@ -169,11 +414,14 @@ This notebook is used to visualize the metric distribution of real event logs co
 This notebook is used to answer the question if there is a statistically significant relation between feature similarity and performance metrics for the downstream tasks of process discovery. For that, we compute the pearson coefficient, as well as the kendall's tau coefficient. This elucidates the correlation between the features with metric scores being used for process discovery. Each coefficient is calculated for three different settings: i) real-world datasets; ii) synthesized event log data with real-world targets; iii) synthesized event log data with grid objectives. Figures 7 and 8 shown in the paper refer to this notebook.
 ## Citation
-The `GEDI` framework is taken directly from the original paper by [Maldonado](mailto:[email protected]), Frey, Tavares, Rehwald and Seidl and is *to appear on BPM'24*.
-```bibtex
-@InProceedings{10.1007/978-3-031-70396-6_13,
 author="Maldonado, Andrea
 and Frey, Christian M. M.
 and Tavares, Gabriel Marques
@@ -193,3 +441,20 @@ abstract="Process mining solutions include enhancing performance, conserving res
 isbn="978-3-031-70396-6"
 }
 ```

+---
+title: iGedi
+emoji: 🌖
+colorFrom: indigo
+colorTo: pink
+sdk: streamlit
+sdk_version: 1.38.0
+app_file: utils/config_fabric.py
+pinned: false
+license: mit
+---
+<p>
+  <img src="gedi/utils/logo.png" alt="Logo" width="100" align="left" />
+  <h1 style="display: inline;">(i)GEDI</h1>
+</p>
+(**i**nteractive) **G**enerating **E**vent **D**ata with **I**ntentional Features for Benchmarking Process Mining<br />
+This repository contains the codebase for the interactive web application tool (iGEDI) as well as for the [GEDI paper](https://mcml.ai/publications/gedi.pdf) accepted at the BPM'24 conference.
 ## Table of Contents
+- [Interactive Web Application (iGEDI)](#interactive-web-application)
+- [Requirements](#requirements)
 - [Installation](#installation)
 - [General Usage](#general-usage)
 - [Experiments](#experiments)
 - [Citation](#citation)
+## Interactive Web Application
+Our [interactive web application](https://huggingface.co/spaces/andreamalhera/gedi) (iGEDI) guides you through the specification process, runs GEDI for you. You can directly download the resulting generated logs or the configuration file to run GEDI locally.
+![Interface Screenshot](gedi/utils/iGEDI_interface.png)
 ## Requirements
 - [Miniconda](https://docs.conda.io/en/latest/miniconda.html)
 - Graphviz on your OS e.g.
 ### Startup
 ```console
 conda activate gedi
+python main.py -a config_files/test/experiment_test.json
 ```
 The last step should take only a few minutes to run.
 To run different steps of the GEDI pipeline, please adapt the `.json` accordingly.
 ```console
 conda activate gedi
+python main.py -a config_files/pipeline_steps/<pipeline-step>.json
 ```
+For reference of possible keys and values for each step, please see `config_files/test/experiment_test.json`.
 To run the whole pipeline please create a new `.json` file, specifying all steps you want to run and specify desired keys and values for each step.
+To reproduce results from our paper, please refer to [Experiments](#experiments).
 ### Feature Extraction
 ---
 To extract the features on the event-log level and use them for hyperparameter optimization, we employ the following script:
 ```console
 conda activate gedi
+python main.py -a config_files/pipeline_steps/feature_extraction.json
 ```
 The JSON file consists of the following key-value pairs:
 - real_eventlog_path: defines the file with the features extracted from the real event logs
 - plot_type: defines the style of the output plotting (possible values: violinplot, boxplot)
 - font_size: label font size of the output plot
+- boxplot_width: width of the violinplot/boxplot
 ### Generation
 ---
 ```console
 conda activate gedi
+python main.py -a config_files/pipeline_steps/generation.json
 ```
 In the `generation.json`, we have the following key-value pairs:
     - plot_reference_feature: defines the feature, which is used on the x-axis on the output plots, i.e., each feature defined in the 'objectives' of the 'experiment' is plotted against the reference feature being defined in this value
+In case of manually defining the targets for the features in config space, the following table shows the range of the features in the real-world event log data (BPIC's) for reference:
+<div style="overflow-x:auto;">
+    <table border="1" class="dataframe">
+    <thead>
+        <tr style="text-align: right;">
+        <th></th>
+        <th>n_traces</th>
+        <th>n_unique_traces</th>
+        <th>ratio_variants_per_number_of_traces</th>
+        <th>trace_len_min</th>
+        <th>trace_len_max</th>
+        <th>trace_len_mean</th>
+        <th>trace_len_median</th>
+        <th>trace_len_mode</th>
+        <th>trace_len_std</th>
+        <th>trace_len_variance</th>
+        <th>trace_len_q1</th>
+        <th>trace_len_q3</th>
+        <th>trace_len_iqr</th>
+        <th>trace_len_geometric_mean</th>
+        <th>trace_len_geometric_std</th>
+        <th>trace_len_harmonic_mean</th>
+        <th>trace_len_skewness</th>
+        <th>trace_len_kurtosis</th>
+        <th>trace_len_coefficient_variation</th>
+        <th>trace_len_entropy</th>
+        <th>trace_len_hist1</th>
+        <th>trace_len_hist2</th>
+        <th>trace_len_hist3</th>
+        <th>trace_len_hist4</th>
+        <th>trace_len_hist5</th>
+        <th>trace_len_hist6</th>
+        <th>trace_len_hist7</th>
+        <th>trace_len_hist8</th>
+        <th>trace_len_hist9</th>
+        <th>trace_len_hist10</th>
+        <th>trace_len_skewness_hist</th>
+        <th>trace_len_kurtosis_hist</th>
+        <th>ratio_most_common_variant</th>
+        <th>ratio_top_1_variants</th>
+        <th>ratio_top_5_variants</th>
+        <th>ratio_top_10_variants</th>
+        <th>ratio_top_20_variants</th>
+        <th>ratio_top_50_variants</th>
+        <th>ratio_top_75_variants</th>
+        <th>mean_variant_occurrence</th>
+        <th>std_variant_occurrence</th>
+        <th>skewness_variant_occurrence</th>
+        <th>kurtosis_variant_occurrence</th>
+        <th>n_unique_activities</th>
+        <th>activities_min</th>
+        <th>activities_max</th>
+        <th>activities_mean</th>
+        <th>activities_median</th>
+        <th>activities_std</th>
+        <th>activities_variance</th>
+        <th>activities_q1</th>
+        <th>activities_q3</th>
+        <th>activities_iqr</th>
+        <th>activities_skewness</th>
+        <th>activities_kurtosis</th>
+        <th>n_unique_start_activities</th>
+        <th>start_activities_min</th>
+        <th>start_activities_max</th>
+        <th>start_activities_mean</th>
+        <th>start_activities_median</th>
+        <th>start_activities_std</th>
+        <th>start_activities_variance</th>
+        <th>start_activities_q1</th>
+        <th>start_activities_q3</th>
+        <th>start_activities_iqr</th>
+        <th>start_activities_skewness</th>
+        <th>start_activities_kurtosis</th>
+        <th>n_unique_end_activities</th>
+        <th>end_activities_min</th>
+        <th>end_activities_max</th>
+        <th>end_activities_mean</th>
+        <th>end_activities_median</th>
+        <th>end_activities_std</th>
+        <th>end_activities_variance</th>
+        <th>end_activities_q1</th>
+        <th>end_activities_q3</th>
+        <th>end_activities_iqr</th>
+        <th>end_activities_skewness</th>
+        <th>end_activities_kurtosis</th>
+        <th>eventropy_trace</th>
+        <th>eventropy_prefix</th>
+        <th>eventropy_global_block</th>
+        <th>eventropy_lempel_ziv</th>
+        <th>eventropy_k_block_diff_1</th>
+        <th>eventropy_k_block_diff_3</th>
+        <th>eventropy_k_block_diff_5</th>
+        <th>eventropy_k_block_ratio_1</th>
+        <th>eventropy_k_block_ratio_3</th>
+        <th>eventropy_k_block_ratio_5</th>
+        <th>eventropy_knn_3</th>
+        <th>eventropy_knn_5</th>
+        <th>eventropy_knn_7</th>
+        <th>epa_variant_entropy</th>
+        <th>epa_normalized_variant_entropy</th>
+        <th>epa_sequence_entropy</th>
+        <th>epa_normalized_sequence_entropy</th>
+        <th>epa_sequence_entropy_linear_forgetting</th>
+        <th>epa_normalized_sequence_entropy_linear_forgetting</th>
+        <th>epa_sequence_entropy_exponential_forgetting</th>
+        <th>epa_normalized_sequence_entropy_exponential_forgetting</th>
+        </tr>
+    </thead>
+    <tbody>
+        <tr>
+        <td>[ min, max ]</td>
+        <td>[ 226.0, 251734.0 ]</td>
+        <td>[ 6.0, 28457.0 ]</td>
+        <td>[ 0.0, 1.0 ]</td>
+        <td>[ 1.0, 24.0 ]</td>
+        <td>[ 1.0, 2973.0 ]</td>
+        <td>[ 1.0, 131.49 ]</td>
+        <td>[ 1.0, 55.0 ]</td>
+        <td>[ 1.0, 61.0 ]</td>
+        <td>[ 0.0, 202.53 ]</td>
+        <td>[ 0.0, 41017.89 ]</td>
+        <td>[ 1.0, 44.0 ]</td>
+        <td>[ 1.0, 169.0 ]</td>
+        <td>[ 0.0, 161.0 ]</td>
+        <td>[ 1.0, 53.78 ]</td>
+        <td>[ 1.0, 5.65 ]</td>
+        <td>[ 1.0, 51.65 ]</td>
+        <td>[ -0.58, 111.97 ]</td>
+        <td>[ -0.97, 14006.75 ]</td>
+        <td>[ 0.0, 4.74 ]</td>
+        <td>[ 5.33, 12.04 ]</td>
+        <td>[ 0.0, 1.99 ]</td>
+        <td>[ 0.0, 0.42 ]</td>
+        <td>[ 0.0, 0.4 ]</td>
+        <td>[ 0.0, 0.19 ]</td>
+        <td>[ 0.0, 0.14 ]</td>
+        <td>[ 0.0, 10.0 ]</td>
+        <td>[ 0.0, 0.02 ]</td>
+        <td>[ 0.0, 0.04 ]</td>
+        <td>[ 0.0, 0.0 ]</td>
+        <td>[ 0.0, 2.7 ]</td>
+        <td>[ -0.58, 111.97 ]</td>
+        <td>[ -0.97, 14006.75 ]</td>
+        <td>[ 0.0, 0.79 ]</td>
+        <td>[ 0.0, 0.87 ]</td>
+        <td>[ 0.0, 0.98 ]</td>
+        <td>[ 0.0, 0.99 ]</td>
+        <td>[ 0.2, 1.0 ]</td>
+        <td>[ 0.5, 1.0 ]</td>
+        <td>[ 0.75, 1.0 ]</td>
+        <td>[ 1.0, 24500.67 ]</td>
+        <td>[ 0.04, 42344.04 ]</td>
+        <td>[ 1.54, 64.77 ]</td>
+        <td>[ 0.66, 5083.46 ]</td>
+        <td>[ 1.0, 1152.0 ]</td>
+        <td>[ 1.0, 66058.0 ]</td>
+        <td>[ 34.0, 466141.0 ]</td>
+        <td>[ 4.13, 66058.0 ]</td>
+        <td>[ 2.0, 66058.0 ]</td>
+        <td>[ 0.0, 120522.25 ]</td>
+        <td>[ 0.0, 14525612122.34 ]</td>
+        <td>[ 1.0, 66058.0 ]</td>
+        <td>[ 4.0, 79860.0 ]</td>
+        <td>[ 0.0, 77290.0 ]</td>
+        <td>[ -0.06, 15.21 ]</td>
+        <td>[ -1.5, 315.84 ]</td>
+        <td>[ 1.0, 809.0 ]</td>
+        <td>[ 1.0, 150370.0 ]</td>
+        <td>[ 27.0, 199867.0 ]</td>
+        <td>[ 3.7, 150370.0 ]</td>
+        <td>[ 1.0, 150370.0 ]</td>
+        <td>[ 0.0, 65387.49 ]</td>
+        <td>[ 0.0, 4275524278.19 ]</td>
+        <td>[ 1.0, 150370.0 ]</td>
+        <td>[ 4.0, 150370.0 ]</td>
+        <td>[ 0.0, 23387.25 ]</td>
+        <td>[ 0.0, 9.3 ]</td>
+        <td>[ -2.0, 101.82 ]</td>
+        <td>[ 1.0, 757.0 ]</td>
+        <td>[ 1.0, 16653.0 ]</td>
+        <td>[ 28.0, 181328.0 ]</td>
+        <td>[ 3.53, 24500.67 ]</td>
+        <td>[ 1.0, 16653.0 ]</td>
+        <td>[ 0.0, 42344.04 ]</td>
+        <td>[ 0.0, 1793017566.89 ]</td>
+        <td>[ 1.0, 16653.0 ]</td>
+        <td>[ 3.0, 39876.0 ]</td>
+        <td>[ 0.0, 39766.0 ]</td>
+        <td>[ -0.7, 13.82 ]</td>
+        <td>[ -2.0, 255.39 ]</td>
+        <td>[ 0.0, 13.36 ]</td>
+        <td>[ 0.0, 16.77 ]</td>
+        <td>[ 0.0, 24.71 ]</td>
+        <td>[ 0.0, 685.0 ]</td>
+        <td>[ -328.0, 962.0 ]</td>
+        <td>[ 0.0, 871.0 ]</td>
+        <td>[ 0.0, 881.0 ]</td>
+        <td>[ 0.0, 935.0 ]</td>
+        <td>[ 0.0, 7.11 ]</td>
+        <td>[ 0.0, 7.11 ]</td>
+        <td>[ 0.0, 8.93 ]</td>
+        <td>[ 0.0, 648.0 ]</td>
+        <td>[ 0.0, 618.0 ]</td>
+        <td>[ 0.0, 11563842.15 ]</td>
+        <td>[ 0.0, 0.9 ]</td>
+        <td>[ 0.0, 21146257.12 ]</td>
+        <td>[ 0.0, 0.76 ]</td>
+        <td>[ 0.0, 14140225.9 ]</td>
+        <td>[ 0.0, 0.42 ]</td>
+        <td>[ 0.0, 15576076.83 ]</td>
+        <td>[ 0.0, 0.51 ]</td>
+        </tr>
+    </tbody>
+    </table>
+</div>
 ### Benchmark
 The benchmarking defines the downstream task which is used for evaluating the goodness of the synthesized event log datasets with the metrics of real-world datasets. The command to execute a benchmarking is shown in the following script:
 ```console
 conda activate gedi
+python main.py -a config_files/pipeline_steps/benchmark.json
 ```
 In the `benchmark.json`, we have the following key-value pairs:
 ```console
 conda activate gedi
+python main.py -a config_files/pipeline_steps/evaluation_plotter.json
 ```
 Generally, in the `evaluation_plotter.json`, we have the following key-value pairs:
 We present two settings for generating intentional event logs, using [real targets](#generating-data-with-real-targets) or using [grid targets](#generating-data-with-grid-targets). Both settings output `.xes` event logs, `.json` and `.csv` files containing feature values, as well as evaluation results, from running a [process discovery benchmark](#benchmark), for the generated event logs.
 ### Generating data with real targets
+To execute the experiments with real targets, we employ the [experiment_real_targets.json](config_files/experiment_real_targets.json). The script's pipeline will output the [generated event logs (GenBaselineED)](data/event_logs/GenBaselineED), which optimize their feature values towards [real-world event data features](data/BaselineED_feat.csv), alongside their respectively measured [feature values](data/GenBaselineED_feat.csv) and [benchmark metrics values](data/GenBaselineED_bench.csv).
 ```console
 conda activate gedi
+python main.py -a config_files/experiment_real_targets.json
 ```
 ### Generating data with grid targets
+To execute the experiments with grid targets, a single [configuration](config_files/grid_2obj) can be selected or all [grid objectives](data/grid_2obj) can be run with one command using the following script. This script will output the [generated event logs (GenED)](data/event_logs/GenED), alongside their respectively measured [feature values](data/GenED_feat.csv) and [benchmark metrics values](data/GenED_bench.csv).
 ```
 conda activate gedi
+python gedi/utils/execute_grid_experiments.py config_files/test
+```
+We employ the [experiment_grid_2obj_configfiles_fabric.ipynb](notebooks/experiment_grid_2obj_configfiles_fabric.ipynb) to create all necessary [configuration](config_files/grid_2obj) and [objective](data/grid_2obj) files for this experiment.
+For more details about these config_files, please refer to [Feature Extraction](#feature-extraction), [Generation](#generation), and [Benchmark](#benchmark).
+To create configuration files for grid objectives interactively, you can use the start the following dashboard:
+```
+streamlit run utils/config_fabric.py # To tunnel to local machine add: --server.port 8501 --server.headless true
+# In local machine (only in case you are tunneling):
+ssh -N -f -L 9000:localhost:8501 <user@remote_machine.com>
+open "http://localhost:9000/"
 ```
 ### Visualizations
 To run the visualizations, we employ [jupyter notebooks](https://jupyter.org/install) and [add the installed environment to the jupyter notebook](https://medium.com/@nrk25693/how-to-add-your-conda-environment-to-your-jupyter-notebook-in-just-4-steps-abeab8b8d084). We then start all visualizations by running e.g.: `jupyter noteboook`. In the following, we describe the `.ipynb`-files in the folder `\notebooks` to reproduce the figures from our paper.
 #### [Fig. 4 and fig. 5 Representativeness](notebooks/gedi_figs4and5_representativeness.ipynb)
+To visualize the coverage of the feasible feature space of generated event logs compared to existing real-world benchmark datasets, in this notebook, we conduct a principal component analysis on the features of both settings. The first two principal components are utilized to visualize the coverage which is further highlighted by computing a convex hull of the 2D mapping.Additionally, we visualize the distribution of each meta feature we used in the paper as a boxplot. Additional features can be extracted with FEEED. Therefore, the notebook contains the figures 4 and 5 in the paper.
 #### [Fig. 6 Benchmark Boxplots](notebooks/gedi_fig6_benchmark_boxplots.ipynb)
 This notebook is used to visualize the metric distribution of real event logs compared to the generated ones. It shows 5 different metrics on 3 various process discovery techniques. We use 'fitness,', 'precision', 'fscore', 'size', 'cfc' (control-flow complexity) as metrics and as 'heuristic miner', 'ilp' (integer linear programming), and 'imf' (inductive miner infrequent) as miners. The notebook outputs the visualization shown in Fig.6 in the paper.
 This notebook is used to answer the question if there is a statistically significant relation between feature similarity and performance metrics for the downstream tasks of process discovery. For that, we compute the pearson coefficient, as well as the kendall's tau coefficient. This elucidates the correlation between the features with metric scores being used for process discovery. Each coefficient is calculated for three different settings: i) real-world datasets; ii) synthesized event log data with real-world targets; iii) synthesized event log data with grid objectives. Figures 7 and 8 shown in the paper refer to this notebook.
+#### [Fig. 9 Consistency and fig. 10 Limitations](notebooks/gedi_figs9and10_consistency.ipynb)
+Likewise to the evaluation on the statistical tests in notebook `gedi_figs7and8_benchmarking_statisticalTests.ipynb`, this notebook is used to compute the differences between two correlation matrices $\Delta C = C_1 - C_2$. This logic is employed to evaluate and visualize the distance of two correlation matrices. Furthermore, we show how significant scores are retained from the correlations being evaluated on real-world datasets coompared to synthesized event log datasets with real-world targets. In Fig. 9 and 10 in the paper, the results of the notebook are shown.
 ## Citation
+The `GEDI` framework is taken directly from the original paper by [Maldonado](mailto:[email protected]), Frey, Tavares, Rehwald and Seidl on BPM'24.
+```
+@InProceedings{maldonado2024gedi,
 author="Maldonado, Andrea
 and Frey, Christian M. M.
 and Tavares, Gabriel Marques
 isbn="978-3-031-70396-6"
 }
 ```
+Furthermore, the `iGEDI` web application is taken directly from the original paper by [Maldonado](mailto:[email protected]), Aryasomayajula, Frey, and Seidl and is *to appear on Demos@ICPM'24*.
+```
+@inproceedings{maldonado2024igedi,
+  author       = {Andrea Maldonado and
+                  Sai Anirudh Aryasomayajula and
+                  Christian M. M. Frey and
+                  Thomas Seidl},
+  editor       = {Jochen De Weerdt, Giovanni Meroni, Han van der Aa, and Karolin Winter},
+  title        = {iGEDI: interactive Generating Event Data with Intentional Features},
+  booktitle    = {ICPM 2024 Tool Demonstration Track, October 14-18, 2024, Kongens Lyngby, Denmark},
+  series       = {{CEUR} Workshop Proceedings},
+  publisher    = {CEUR-WS.org},
+  year         = {2024},
+  bibsource    = {dblp computer science bibliography, https://dblp.org}
+}
+```

config.py CHANGED Viewed

@@ -1,10 +1,8 @@
 import json
-import os
 import warnings
-from gedi.utils.io_helpers import sort_files
-from tqdm import tqdm
-from utils.param_keys import INPUT_NAME, FILENAME, FOLDER_PATH, PARAMS
 def get_model_params_list(alg_json_file: str) :#-> list[dict]:
     """
@@ -20,69 +18,8 @@ def get_model_params_list(alg_json_file: str) :#-> list[dict]:
         warnings.warn('The default model parameter list is used instead of a .json-file.\n'
                       '  Use a configuration from the `config_files`-folder together with the args `-a`.')
         return [
-            {ALGORITHM_NAME: 'pca', NDIM: TENSOR_NDIM},
             ]
-def get_run_params(alg_params_json: str) -> dict:
-    """
-    Loads the running configuration given from a json file or the default dictionary from the code.
-    @param alg_params_json: str
-        Path to the json data with the running configuration
-    @return: dict
-        Running Configuration
-    """
-    if alg_params_json is not None:
-        return json.load(open(alg_params_json))
-    else:
-        warnings.warn('The default run option is used instead of a .json-file.\n'
-                      '  Use a configuration from the `config_files`-folder together with the args `-o`.')
-        return {
-            RUN_OPTION: COMPARE,
-            PLOT_TYPE: COLOR_MAP,  # 'heat_map', 'color_map', '3d_map', 'explained_var_plot'
-            PLOT_TICS: True,
-            N_COMPONENTS: 2,
-            INPUT_NAME: 'runningExample',
-            SAVE_RESULTS: True,
-            LOAD_RESULTS: True
-        }
-def get_files_and_kwargs(params: dict):
-    """
-    This method returns the filename list of the trajectory and generates the kwargs for the DataTrajectory.
-    The method is individually created for the available data set.
-    Add new trajectory options, if different data set are used.
-    @param params: dict
-        running configuration
-    @return: tuple
-        list of filenames of the trajectories AND
-        kwargs with the important arguments for the classes
-    """
-    try:
-        input_name = params[INPUT_NAME]
-    except KeyError as e:
-        raise KeyError(f'Run option parameter is missing the key: `{e}`. This parameter is mandatory.')
-    #TODO: generate parent directories if they don't exist
-    if input_name == 'test':
-        filename_list = list(tqdm(sort_files(os.listdir('data/test'))))
-        kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/test'}
-    elif input_name == 'realLogs':
-        filename_list = list(tqdm(sort_files(os.listdir('data/real_event_logs'))))
-        kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/real_event_logs'}
-    elif input_name == 'gen5':
-        filename_list = list(tqdm(sort_files(os.listdir('data/event_log'))))[:5]
-        kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/event_log'}
-    elif input_name == 'gen20':
-        filename_list = list(tqdm(sort_files(os.listdir('data/event_log'))))[:20]
-        kwargs = {FILENAME: filename_list, FOLDER_PATH: 'data/event_log'}
-    elif input_name == 'runningExample':
-        filename_list = ['running-example.xes']
-        kwargs = {FILENAME: filename_list[0], FOLDER_PATH: 'data/'}
-    elif input_name == 'metaFeatures':
-        filename_list = ['log_features.csv']
-        kwargs = {FILENAME: filename_list[0], FOLDER_PATH: 'results/'}
-    else:
-        raise ValueError(f'No data trajectory was found with the name `{input_name}`.')
-    #filename_list.pop(file_element)
-    kwargs[PARAMS] = params
-    return filename_list, kwargs

 import json
 import warnings
+from utils.param_keys import PIPELINE_STEP, INPUT_PATH, OUTPUT_PATH
+from utils.param_keys.features import FEATURE_SET, FEATURE_PARAMS
 def get_model_params_list(alg_json_file: str) :#-> list[dict]:
     """
         warnings.warn('The default model parameter list is used instead of a .json-file.\n'
                       '  Use a configuration from the `config_files`-folder together with the args `-a`.')
         return [
+            {PIPELINE_STEP: 'feature_extraction', INPUT_PATH: 'data/test',
+             FEATURE_PARAMS: {FEATURE_SET: ['ratio_unique_traces_per_trace',
+                                            'ratio_most_common_variant']},
+             OUTPUT_PATH: 'output/plots'}
             ]

config_files/config_layout.json ADDED Viewed

	@@ -0,0 +1,48 @@

+[
+  {
+    "pipeline_step": "instance_augmentation",
+    "augmentation_params":{"method":"SMOTE", "no_samples":2,
+        "feature_selection": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
+    "input_path": "data/test/bpic_features.csv",
+    "output_path": "output"
+  },
+  {
+    "pipeline_step": "event_logs_generation",
+    "output_path": "output/features/2_bpic_features/2_ense_rmcv_feat.csv",
+    "output_path": "data/frontend/test",
+    "generator_params": {
+      "experiment": "data/grid_objectives.csv",
+      "experiment": {"input_path": "data/2_bpic_features.csv",
+        "objectives": ["ratio_top_20_variants", "epa_normalized_sequence_entropy_linear_forgetting"]},
+      "experiment": {"n_traces":832, "n_unique_traces":828, "ratio_variants_per_number_of_traces":0.99, "trace_len_min":1, "trace_len_max":132, "trace_len_mean":53.31, "trace_len_median":54, "trace_len_mode":61, "trace_len_std":19.89, "trace_len_variance":395.81, "trace_len_q1":44, "trace_len_q3":62, "trace_len_iqr":18, "trace_len_geometric_mean":48.15, "trace_len_geometric_std":1.69, "trace_len_harmonic_mean":37.58, "trace_len_skewness":0.0541, "trace_len_kurtosis":0.81, "trace_len_coefficient_variation":0.37, "trace_len_entropy":6.65, "trace_len_hist1":0.004, "trace_len_hist2":0.005, "trace_len_hist3":0.005, "trace_len_hist4":0.024, "trace_len_hist5":0.024, "trace_len_hist6":0.008, "trace_len_hist7":0.005, "trace_len_hist8":0.001, "trace_len_hist9":0.0, "trace_len_hist10":0.00, "trace_len_skewness_hist":0.05, "trace_len_kurtosis_hist":0.8, "ratio_most_common_variant":0.0, "ratio_top_1_variants":0.01, "ratio_top_5_variants":0.05, "ratio_top_10_variants":0.10, "ratio_top_20_variants":0.2, "ratio_top_50_variants":0.5, "ratio_top_75_variants":0.75, "mean_variant_occurrence":1.0, "std_variant_occurrence":0.07, "skewness_variant_occurrence":14.28, "kurtosis_variant_occurrence":202.00, "n_unique_activities":410, "activities_min":1, "activities_max":830, "activities_mean":108.18, "activities_median":12, "activities_std":187.59, "activities_variance":35189, "activities_q1":3, "activities_q3":125, "activities_iqr":122, "activities_skewness":2.13, "activities_kurtosis":3.81, "n_unique_start_activities":14, "start_activities_min":1, "start_activities_max":731, "start_activities_mean":59.43, "start_activities_median":1, "start_activities_std":186.72, "start_activities_variance":34863, "start_activities_q1":1, "start_activities_q3":8, "start_activities_iqr":7, "start_activities_skewness":3, "start_activities_kurtosis":9.0, "n_unique_end_activities":82, "end_activities_min":1, "end_activities_max":216, "end_activities_mean":10, "end_activities_median":1, "end_activities_std":35, "end_activities_variance":1247, "end_activities_q1":1, "end_activities_q3":3, "end_activities_iqr":2, "end_activities_skewness":5, "end_activities_kurtosis":26, "eventropy_trace":10, "eventropy_prefix":15, "eventropy_global_block":19, "eventropy_lempel_ziv":4, "eventropy_k_block_diff_1":7.1, "eventropy_k_block_diff_3":7.1, "eventropy_k_block_diff_5":7.1, "eventropy_k_block_ratio_1":7.1, "eventropy_k_block_ratio_3":7.1, "eventropy_k_block_ratio_5":7.1, "eventropy_knn_3":5.54, "eventropy_knn_5":5.04, "eventropy_knn_7":4.72, "epa_variant_entropy":240512, "epa_normalized_variant_entropy":0.68, "epa_sequence_entropy":285876, "epa_normalized_sequence_entropy":0.60, "epa_sequence_entropy_linear_forgetting":150546, "epa_normalized_sequence_entropy_linear_forgetting":0.32, "epa_sequence_entropy_exponential_forgetting":185312, "epa_normalized_sequence_entropy_exponential_forgetting":0.39},
+      "config_space": {
+        "mode": [5, 20],
+        "sequence": [0.01, 1],
+        "choice": [0.01, 1],
+        "parallel": [0.01, 1],
+        "loop": [0.01, 1],
+        "silent": [0.01, 1],
+        "lt_dependency": [0.01, 1],
+        "num_traces": [10, 100],
+        "duplicate": [0],
+        "or": [0]
+      },
+      "n_trials": 50
+    }
+  },
+  {
+    "pipeline_step": "feature_extraction",
+    "input_path": "data/test",
+    "feature_params": {"feature_set": ["n_traces", "n_unique_traces", "ratio_unique_traces_per_trace", "trace_len_min", "trace_len_max", "trace_len_mean", "trace_len_median", "trace_len_mode", "trace_len_std", "trace_len_variance", "trace_len_q1", "trace_len_q3", "trace_len_iqr", "trace_len_geometric_mean", "trace_len_geometric_std", "trace_len_harmonic_mean", "trace_len_skewness", "trace_len_kurtosis", "trace_len_coefficient_variation", "trace_len_entropy", "trace_len_hist1", "trace_len_hist2", "trace_len_hist3", "trace_len_hist4", "trace_len_hist5", "trace_len_hist6", "trace_len_hist7", "trace_len_hist8", "trace_len_hist9", "trace_len_hist10", "trace_len_skewness_hist", "trace_len_kurtosis_hist", "ratio_most_common_variant", "ratio_top_1_variants", "ratio_top_5_variants", "ratio_top_10_variants", "ratio_top_20_variants", "ratio_top_50_variants", "ratio_top_75_variants", "mean_variant_occurrence", "std_variant_occurrence", "skewness_variant_occurrence", "kurtosis_variant_occurrence", "n_unique_activities", "activities_min", "activities_max", "activities_mean", "activities_median", "activities_std", "activities_variance", "activities_q1", "activities_q3", "activities_iqr", "activities_skewness", "activities_kurtosis", "n_unique_start_activities", "start_activities_min", "start_activities_max", "start_activities_mean", "start_activities_median", "start_activities_std", "start_activities_variance", "start_activities_q1", "start_activities_q3", "start_activities_iqr", "start_activities_skewness", "start_activities_kurtosis", "n_unique_end_activities", "end_activities_min", "end_activities_max", "end_activities_mean", "end_activities_median", "end_activities_std", "end_activities_variance", "end_activities_q1", "end_activities_q3", "end_activities_iqr", "end_activities_skewness", "end_activities_kurtosis", "eventropy_trace", "eventropy_prefix", "eventropy_prefix_flattened", "eventropy_global_block", "eventropy_global_block_flattened", "eventropy_lempel_ziv", "eventropy_lempel_ziv_flattened", "eventropy_k_block_diff_1", "eventropy_k_block_diff_3", "eventropy_k_block_diff_5", "eventropy_k_block_ratio_1", "eventropy_k_block_ratio_3", "eventropy_k_block_ratio_5", "eventropy_knn_3", "eventropy_knn_5", "eventropy_knn_7", "epa_variant_entropy", "epa_normalized_variant_entropy", "epa_sequence_entropy", "epa_normalized_sequence_entropy", "epa_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_linear_forgetting", "epa_sequence_entropy_exponential_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]},
+    "output_path": "output/plots",
+    "real_eventlog_path": "data/BaselineED_feat.csv",
+    "plot_type": "boxplot"
+  },
+  {
+    "pipeline_step": "benchmark_test",
+    "benchmark_task": "discovery",
+    "input_path":"data/test",
+    "output_path":"output",
+    "miners" : ["inductive", "heu", "imf", "ilp"]
+  }
+]

config_files/{algorithm/experiment_real_targets.json → experiment_real_targets.json} RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_ense_enseef.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_ense_enself.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_ense_enve.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_ense_rmcv.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_ense_rt10v.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_ense_rvpnot.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enseef_enself.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enseef_enve.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enseef_rmcv.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enseef_rt10v.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enseef_rvpnot.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enself_enve.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enself_rmcv.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enself_rt10v.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enself_rvpnot.json RENAMED Viewed

File without changes

config_files/grid_2obj/generator_grid_2objectives_enve_mvo.json ADDED Viewed

	@@ -0,0 +1 @@

+ [{"pipeline_step": "event_logs_generation", "output_path": "output/shaining/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enve_mvo.csv", "objectives": ["epa_normalized_variant_entropy", "mean_variant_occurrence"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/shaining/grid_2obj/grid_2objectives_enve_mvo/2_enve_mvo", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/shaining/grid_2obj/grid_2objectives_enve_mvo/2_enve_mvo", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enve_rmcv.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enve_rt10v.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_enve_rvpnot.json RENAMED Viewed

File without changes

config_files/grid_2obj/generator_grid_2objectives_enve_sam.json ADDED Viewed

	@@ -0,0 +1 @@

+ [{"pipeline_step": "event_logs_generation", "output_path": "output/shaining/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_enve_sam.csv", "objectives": ["epa_normalized_variant_entropy", "start_activities_median"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/shaining/grid_2obj/grid_2objectives_enve_sam/2_enve_sam", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/shaining/grid_2obj/grid_2objectives_enve_sam/2_enve_sam", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]

config_files/grid_2obj/generator_grid_2objectives_mvo_sam.json ADDED Viewed

	@@ -0,0 +1 @@

+ [{"pipeline_step": "event_logs_generation", "output_path": "output/shaining/grid_2obj", "generator_params": {"experiment": {"input_path": "data/grid_2obj/grid_2objectives_mvo_sam.csv", "objectives": ["mean_variant_occurrence", "start_activities_median"]}, "config_space": {"mode": [5, 20], "sequence": [0.01, 1], "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1], "silent": [0.01, 1], "lt_dependency": [0.01, 1], "num_traces": [10, 10001], "duplicate": [0], "or": [0]}, "n_trials": 200}}, {"pipeline_step": "feature_extraction", "input_path": "output/features/shaining/grid_2obj/grid_2objectives_mvo_sam/2_mvo_sam", "feature_params": {"feature_set": ["ratio_variants_per_number_of_traces", "ratio_most_common_variant", "ratio_top_10_variants", "epa_normalized_variant_entropy", "epa_normalized_sequence_entropy", "epa_normalized_sequence_entropy_linear_forgetting", "epa_normalized_sequence_entropy_exponential_forgetting"]}, "output_path": "output/plots", "real_eventlog_path": "data/BaselineED_feat.csv", "plot_type": "boxplot"}, {"pipeline_step": "benchmark_test", "benchmark_test": "discovery", "input_path": "output/shaining/grid_2obj/grid_2objectives_mvo_sam/2_mvo_sam", "output_path": "output", "miners": ["heu", "imf", "ilp"]}]

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_rmcv_rt10v.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_rmcv_rvpnot.json RENAMED Viewed

File without changes

config_files/{algorithm/grid_2obj → grid_2obj}/generator_grid_2objectives_rt10v_rvpnot.json RENAMED Viewed

File without changes

config_files/options/baseline.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "run_option": "baseline",
-  "plot_type": "color_map",
-  "plot_tics": true,
-  "n_components": 2,
-  "input_name": "test",
-  "save_results": false,
-  "load_results": false
-}

config_files/options/run_params.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "run_option": "compare",
-  "plot_type": "color_map",
-  "plot_tics": true,
-  "n_components": 2,
-  "input_name": "gen20",
-  "save_results": false,
-  "load_results": true
-}

config_files/{algorithm/pipeline_steps → pipeline_steps}/augmentation.json RENAMED Viewed

File without changes

config_files/{algorithm/pipeline_steps → pipeline_steps}/benchmark.json RENAMED Viewed

@@ -4,6 +4,6 @@
     "benchmark_test": "discovery",
     "input_path":"data/test",
     "output_path":"output",
-    "miners" : ["inductive", "heu", "imf", "ilp"]
   }
 ]

     "benchmark_test": "discovery",
     "input_path":"data/test",
     "output_path":"output",
+    "miners" : ["ind", "heu", "imf", "ilp"]
   }
 ]

config_files/{algorithm/pipeline_steps → pipeline_steps}/evaluation_plotter.json RENAMED Viewed

File without changes

config_files/{algorithm/pipeline_steps → pipeline_steps}/feature_extraction.json RENAMED Viewed

File without changes

config_files/{algorithm/pipeline_steps → pipeline_steps}/generation.json RENAMED Viewed

File without changes

config_files/{algorithm → test}/experiment_test.json RENAMED Viewed

File without changes

config_files/{algorithm/test → test}/generator_2bpic_2objectives_ense_enseef.json RENAMED Viewed

File without changes

config_files/{algorithm/test → test}/generator_grid_1objectives_rt10v.json RENAMED Viewed

File without changes

config_files/{algorithm/test → test}/generator_grid_2objectives_ense_enself.json RENAMED Viewed

File without changes

config_files/test/test_abbrv_generation.json ADDED Viewed

	@@ -0,0 +1,16 @@

+[{"pipeline_step": "event_logs_generation",
+"output_path": "output/test",
+"generator_params": {"experiment":
+	{"input_path": "data/test/igedi_table_1.csv",
+	"objectives": ["rmcv","ense"]},
+	"config_space": {"mode": [5, 20], "sequence": [0.01, 1],
+	"choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1],
+	"silent": [0.01, 1], "lt_dependency": [0.01, 1],
+	"num_traces": [10, 10001], "duplicate": [0],
+	"or": [0]}, "n_trials": 2}},
+ {"pipeline_step": "feature_extraction",
+ "input_path": "output/test/igedi_table_1/2_ense_rmcv",
+ "feature_params": {"feature_set": ["simple_stats", "trace_length", "trace_variant",
+ "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
+ "output_path": "output/plots", "real_eventlog_path": "data/test/2_bpic_features.csv",
+ "plot_type": "boxplot"}]

data/test/grid_experiments/rt10v.csv DELETED Viewed

@@ -1,12 +0,0 @@
-task,ratio_top_10_variants
-task_1,0.0
-task_2,0.1
-task_3,0.2
-task_4,0.3
-task_5,0.4
-task_6,0.5
-task_7,0.6
-task_8,0.7
-task_9,0.8
-task_10,0.9
-task_11,1.0

data/test/grid_feat.csv CHANGED Viewed

@@ -1,3 +1,5 @@
 log,ratio_top_20_variants,epa_normalized_sequence_entropy_linear_forgetting
 experiment1,0.2,0.4
 experiment2,0.4,0.7

 log,ratio_top_20_variants,epa_normalized_sequence_entropy_linear_forgetting
 experiment1,0.2,0.4
 experiment2,0.4,0.7
+experiment3,NaN,0.4
+experiment4,0.2,NaN

data/test/igedi_table_1.csv ADDED Viewed

	@@ -0,0 +1,4 @@

+log,rmcv,ense
+BPIC15f4,0.003,0.604
+RTFMP,0.376,0.112
+HD,0.517,0.254

data/validation/2_ense_rmcv_feat.csv ADDED Viewed

	@@ -0,0 +1,4 @@

+log,n_traces,n_unique_traces,trace_len_coefficient_variation,trace_len_entropy,trace_len_geometric_mean,trace_len_geometric_std,trace_len_harmonic_mean,trace_len_hist1,trace_len_hist10,trace_len_hist2,trace_len_hist3,trace_len_hist4,trace_len_hist5,trace_len_hist6,trace_len_hist7,trace_len_hist8,trace_len_hist9,trace_len_iqr,trace_len_kurtosis,trace_len_kurtosis_hist,trace_len_max,trace_len_mean,trace_len_median,trace_len_min,trace_len_mode,trace_len_q1,trace_len_q3,trace_len_skewness,trace_len_skewness_hist,trace_len_std,trace_len_variance,kurtosis_variant_occurrence,mean_variant_occurrence,ratio_most_common_variant,ratio_top_10_variants,ratio_top_1_variants,ratio_top_20_variants,ratio_top_50_variants,ratio_top_5_variants,ratio_top_75_variants,skewness_variant_occurrence,std_variant_occurrence,activities_iqr,activities_kurtosis,activities_max,activities_mean,activities_median,activities_min,activities_q1,activities_q3,activities_skewness,activities_std,activities_variance,n_unique_activities,n_unique_start_activities,start_activities_iqr,start_activities_kurtosis,start_activities_max,start_activities_mean,start_activities_median,start_activities_min,start_activities_q1,start_activities_q3,start_activities_skewness,start_activities_std,start_activities_variance,end_activities_iqr,end_activities_kurtosis,end_activities_max,end_activities_mean,end_activities_median,end_activities_min,end_activities_q1,end_activities_q3,end_activities_skewness,end_activities_std,end_activities_variance,n_unique_end_activities,eventropy_global_block,eventropy_global_block_flattened,eventropy_k_block_diff_1,eventropy_k_block_diff_3,eventropy_k_block_diff_5,eventropy_k_block_ratio_1,eventropy_k_block_ratio_3,eventropy_k_block_ratio_5,eventropy_knn_3,eventropy_knn_5,eventropy_knn_7,eventropy_lempel_ziv,eventropy_lempel_ziv_flattened,eventropy_prefix,eventropy_prefix_flattened,eventropy_trace,epa_variant_entropy,epa_normalized_variant_entropy,epa_sequence_entropy,epa_normalized_sequence_entropy,epa_sequence_entropy_linear_forgetting,epa_normalized_sequence_entropy_linear_forgetting,epa_sequence_entropy_exponential_forgetting,epa_normalized_sequence_entropy_exponential_forgetting,ratio_variants_per_number_of_traces
+genELBPIC15f4_0604_0003,8616,4031,1.0086445672512825,8.700230419287818,8.516920996327995,2.1832133718212567,6.58111248846037,0.05713165933282198,1.682074468800883e-05,0.009932649738269211,0.0033136867035377378,0.0012279143622246447,0.0005214430853282738,0.00017661781922409254,0.0001093348404720574,3.364148937601766e-05,0.0,9.0,11.77613857723645,4.64306597180025,141,11.964136490250697,7.0,3,3,5.0,14.0,2.836323931248485,2.5294876299887217,12.067561272744191,145.6260350714354,1651.5545366193303,2.137434879682461,0.09099350046425256,0.5789229340761374,0.40401578458681525,0.6256963788300836,0.766016713091922,0.5258820798514392,0.883008356545961,36.276105773051086,15.574023282690577,2184.5,1.9085746306932307,34121,12885.375,8627.0,8584,8616.0,10800.5,1.8663249384138656,8507.416043333898,72376127.734375,8,2,2111.0,-2.0,6419,4308.0,4308.0,2197,3252.5,5363.5,0.0,2111.0,4456321.0,768.0,0.0021026107788850723,4895,1723.2,832.0,495,813.0,1581.0,1.331337855426617,1625.5283940922102,2642342.56,5,15.897,16.276,2.756,1.525,1.375,2.756,2.016,1.775,6.564,6.07,5.761,1.405,1.786,12.139,13.493,9.703,365917.06171394786,0.7166786736830569,651595.1462643282,0.5475971681938718,62016.045914910814,0.05211796208164211,266396.7627350506,0.22387845232743814,0.46785051067780875
+genELHD_0254_0517,6822,565,1.1300022933733087,8.390788875278787,1.9006921917027269,2.263915758458681,1.4763543408149593,0.28822871537617945,0.00010858116985352402,0.04077222927999826,0.02383356678284851,0.006080545511797346,0.005591930247456488,0.002823110416191621,0.0017915893025831464,0.0006514870191211442,0.0004886152643408582,2.0,9.718268017319556,4.770965470001153,28,2.8346525945470535,1.0,1,1,1.0,3.0,2.765986310146101,2.5637920433464965,3.2031639327547703,10.260259180101007,226.4931382842208,12.07433628318584,0.24860744649662855,0.9079448841981823,0.6807387862796834,0.9321313397830548,0.9585165640574611,0.8717384931105248,0.9791849897390794,14.639488482439702,105.6342402074512,1283.0,8.118508585327676,6848,1137.5294117647059,472.0,208,413.0,1696.0,2.9234849385484285,1541.823981624173,2377221.1903114184,17,10,294.25,2.299363631971671,3383,682.2,217.0,101,121.75,416.0,1.9301655015244086,1008.2924972447232,1016653.7600000001,334.5,2.8813625853874614,3383,620.1818181818181,157.0,79,104.5,439.0,2.0614116860983223,981.5564465945092,963453.0578512397,11,9.069,10.932,3.265,0.908,0.67,3.265,1.808,1.456,4.81,4.359,4.05,0.696,2.01,6.995,10.12,4.469,16958.33766640406,0.7450438396474315,70379.87102533762,0.36874603139171797,9719.481922433943,0.050923940806750986,30545.050254490514,0.16003675334882345,0.08282028730577544
+genELRTFMP_0112_0376,6822,565,1.1300022933733087,8.390788875278787,1.9006921917027269,2.263915758458681,1.4763543408149593,0.28822871537617945,0.00010858116985352402,0.04077222927999826,0.02383356678284851,0.006080545511797346,0.005591930247456488,0.002823110416191621,0.0017915893025831464,0.0006514870191211442,0.0004886152643408582,2.0,9.718268017319556,4.770965470001153,28,2.8346525945470535,1.0,1,1,1.0,3.0,2.765986310146101,2.5637920433464965,3.2031639327547703,10.260259180101007,226.4931382842208,12.07433628318584,0.24860744649662855,0.9079448841981823,0.6807387862796834,0.9321313397830548,0.9585165640574611,0.8717384931105248,0.9791849897390794,14.639488482439702,105.6342402074512,1283.0,8.118508585327676,6848,1137.5294117647059,472.0,208,413.0,1696.0,2.9234849385484285,1541.823981624173,2377221.1903114184,17,10,294.25,2.299363631971671,3383,682.2,217.0,101,121.75,416.0,1.9301655015244086,1008.2924972447232,1016653.7600000001,334.5,2.8813625853874614,3383,620.1818181818181,157.0,79,104.5,439.0,2.0614116860983223,981.5564465945092,963453.0578512397,11,9.069,10.932,3.265,0.908,0.67,3.265,1.808,1.456,4.81,4.359,4.05,0.696,2.01,6.995,10.12,4.469,16958.33766640406,0.7450438396474315,70379.87102533762,0.36874603139171797,9719.481922433943,0.050923940806750986,30545.050254490514,0.16003675334882345,0.08282028730577544

data/validation/genELexperiment1_04_02.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"ratio_top_20_variants": 0.20017714791851196, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "~~experiment1~~"}


1	+ {"ratio_top_20_variants": 0.20017714791851196, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment1_04_02", "target_similarity": 0.7418932364693804}

data/validation/genELexperiment3_04_nan.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment3_04_nan", "target_similarity": 0.7418932612931086}

data/validation/genELexperiment4_nan_02.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"ratio_top_20_variants": 0.2, "log": "genELexperiment4_nan_02", "target_similarity": 1.0}