Spaces:

andreamalhera
/

igedi

Sleeping

App Files Files Community

Andrea Maldonado commited on Oct 14, 2024

Commit

ae8f2b3

1 Parent(s): 0c619d9

Merge from main

Browse files

Files changed (9) hide show

data/validation/test_benchmark.csv +3 -0
gedi/__init__.py +2 -6
gedi/generator.py +1 -0
gedi/run.py +53 -0
main.py +3 -45
notebooks/gedi_fig6_benchmark_boxplots.ipynb +0 -0
notebooks/gedi_figs4and5_representativeness.ipynb +0 -0
notebooks/gedi_figs7and8_benchmarking_statisticalTests.ipynb +36 -28
setup.py +48 -3

data/validation/test_benchmark.csv CHANGED Viewed

	@@ -0,0 +1,3 @@

+log,fitness_inductive,precision_inductive,fscore_inductive,size_inductive,pnsize_inductive,cfc_inductive,fitness_heu,precision_heu,fscore_heu,size_heu,pnsize_heu,cfc_heu,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp
+gen_el_169,0.9998052420892378,0.6662312989788649,0.7996241723917423,34,24,22,0.9383563249832565,0.5979149389882715,0.7304143193451293,22,14,13,0.9358843752091403,0.6513022517490741,0.7680805654451066,28,18,16,0.9999637006454563,0.432690150325331,0.6040181215566763,27,7,9
+gen_el_168,0.9997678338833808,0.6033523537803138,0.7525477883058467,61,34,20,0.48155419290534085,0.9449078138718174,0.6379760800037585,60,35,32,0.9479094601490539,0.5169524053224155,0.669037930473001,67,38,24,0.9999513902099882,0.4283471743974073,0.5997714527549697,93,30,28

gedi/__init__.py CHANGED Viewed

@@ -1,7 +1,3 @@
-from .generator import GenerateEventLogs
-from .features import EventLogFeatures
-from .augmentation import InstanceAugmentator
-from .benchmark import BenchmarkTest
-from .plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
-__all__=[ 'GenerateEventLogs', 'EventLogFeatures', 'FeatureAnalyser', 'InstanceAugmentator', 'BenchmarkTest', 'BenchmarkPlotter', 'FeaturesPlotter', 'AugmentationPlotter', 'GenerationPlotter']


1	+ from .run import gedi




2
3	+ __all__=['gedi']

gedi/generator.py CHANGED Viewed

@@ -152,6 +152,7 @@ class GenerateEventLogs():
         self.params = params.get(GENERATOR_PARAMS)
         experiment = self.params.get(EXPERIMENT)
         if experiment is not None:
             tasks, output_path = get_tasks(experiment, self.output_path)
             columns_to_rename = {col: column_mappings()[col] for col in tasks.columns if col in column_mappings()}

         self.params = params.get(GENERATOR_PARAMS)
         experiment = self.params.get(EXPERIMENT)
         if experiment is not None:
             tasks, output_path = get_tasks(experiment, self.output_path)
             columns_to_rename = {col: column_mappings()[col] for col in tasks.columns if col in column_mappings()}

gedi/run.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import config
+import pandas as pd
+from datetime import datetime as dt
+from gedi.generator import GenerateEventLogs
+from gedi.features import EventLogFeatures
+from gedi.augmentation import InstanceAugmentator
+from gedi.benchmark import BenchmarkTest
+from gedi.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
+from utils.default_argparse import ArgParser
+from utils.param_keys import *
+def run(kwargs:dict, model_params_list: list, filename_list:list):
+    """
+    This function chooses the running option for the program.
+    @param kwargs: dict
+        contains the running parameters and the event-log file information
+    @param model_params_list: list
+        contains a list of model parameters, which are used to analyse this different models.
+    @param filename_list: list
+        contains the list of the filenames to load multiple event-logs
+    @return:
+    """
+    params = kwargs[PARAMS]
+    ft = EventLogFeatures(None)
+    augmented_ft = InstanceAugmentator()
+    gen = pd.DataFrame(columns=['log'])
+    for model_params in model_params_list:
+        if model_params.get(PIPELINE_STEP) == 'instance_augmentation':
+            augmented_ft = InstanceAugmentator(aug_params=model_params, samples=ft.feat)
+            AugmentationPlotter(augmented_ft, model_params)
+        elif model_params.get(PIPELINE_STEP) == 'event_logs_generation':
+            gen = pd.DataFrame(GenerateEventLogs(model_params).log_config)
+            #gen = pd.read_csv("output/features/generated/grid_2objectives_enseef_enve/2_enseef_enve_feat.csv")
+            #GenerationPlotter(gen, model_params, output_path="output/plots")
+        elif model_params.get(PIPELINE_STEP) == 'benchmark_test':
+            benchmark = BenchmarkTest(model_params, event_logs=gen['log'])
+            # BenchmarkPlotter(benchmark.features, output_path="output/plots")
+        elif model_params.get(PIPELINE_STEP) == 'feature_extraction':
+            ft = EventLogFeatures(**kwargs, logs=gen['log'], ft_params=model_params)
+            FeaturesPlotter(ft.feat, model_params)
+        elif model_params.get(PIPELINE_STEP) == "evaluation_plotter":
+            GenerationPlotter(gen, model_params, output_path=model_params['output_path'], input_path=model_params['input_path'])
+def gedi(config_path):
+    """
+    This function runs the GEDI pipeline.
+    @param config_path: str
+        contains the path to the config file
+    @return:
+    """
+    model_params_list = config.get_model_params_list(config_path)
+    run({'params':""}, model_params_list, [])

main.py CHANGED Viewed

@@ -1,54 +1,12 @@
 import config
-import pandas as pd
 from datetime import datetime as dt
-from gedi.generator import GenerateEventLogs
-from gedi.features import EventLogFeatures
-from gedi.augmentation import InstanceAugmentator
-from gedi.benchmark import BenchmarkTest
-from gedi.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
 from utils.default_argparse import ArgParser
 from utils.param_keys import *
-def run(kwargs:dict, model_paramas_list: list, filename_list:list):
-    """
-    This function chooses the running option for the program.
-    @param kwargs: dict
-        contains the running parameters and the event-log file information
-    @param model_params_list: list
-        contains a list of model parameters, which are used to analyse this different models.
-    @param filename_list: list
-        contains the list of the filenames to load multiple event-logs
-    @return:
-    """
-    params = kwargs[PARAMS]
-    ft = EventLogFeatures(None)
-    augmented_ft = InstanceAugmentator()
-    gen = pd.DataFrame(columns=['log'])
-    for model_params in model_params_list:
-        if model_params.get(PIPELINE_STEP) == 'instance_augmentation':
-            augmented_ft = InstanceAugmentator(aug_params=model_params, samples=ft.feat)
-            AugmentationPlotter(augmented_ft, model_params)
-        elif model_params.get(PIPELINE_STEP) == 'event_logs_generation':
-            gen = pd.DataFrame(GenerateEventLogs(model_params).log_config)
-            #gen = pd.read_csv("output/features/generated/grid_2objectives_enseef_enve/2_enseef_enve_feat.csv")
-            #GenerationPlotter(gen, model_params, output_path="output/plots")
-        elif model_params.get(PIPELINE_STEP) == 'benchmark_test':
-            benchmark = BenchmarkTest(model_params, event_logs=gen['log'])
-            # BenchmarkPlotter(benchmark.features, output_path="output/plots")
-        elif model_params.get(PIPELINE_STEP) == 'feature_extraction':
-            ft = EventLogFeatures(**kwargs, logs=gen['log'], ft_params=model_params)
-            FeaturesPlotter(ft.feat, model_params)
-        elif model_params.get(PIPELINE_STEP) == "evaluation_plotter":
-            GenerationPlotter(gen, model_params, output_path=model_params['output_path'], input_path=model_params['input_path'])
 if __name__=='__main__':
     start_gedi = dt.now()
     print(f'INFO: GEDI starting {start_gedi}')
     args = ArgParser().parse('GEDI main')
-    model_params_list = config.get_model_params_list(args.alg_params_json)
-    run({'params':""}, model_params_list, [])
-    print(f'SUCCESS: GEDI took {dt.now()-start_gedi} sec.')

 import config
 from datetime import datetime as dt
+from gedi.run import gedi, run
 from utils.default_argparse import ArgParser
 from utils.param_keys import *
 if __name__=='__main__':
     start_gedi = dt.now()
     print(f'INFO: GEDI starting {start_gedi}')
     args = ArgParser().parse('GEDI main')
+    gedi(args.alg_params_json)
+    print(f'SUCCESS: GEDI took {dt.now()-start_gedi} sec.')

notebooks/gedi_fig6_benchmark_boxplots.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

notebooks/gedi_figs4and5_representativeness.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

notebooks/gedi_figs7and8_benchmarking_statisticalTests.ipynb CHANGED Viewed

@@ -1,5 +1,21 @@
 {
  "cells": [
   {
    "cell_type": "code",
    "execution_count": 8,
@@ -64,6 +80,14 @@
     "    return data"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 11,
@@ -110,7 +134,7 @@
    "id": "07370d54",
    "metadata": {},
    "source": [
-    "## Statistical test: Is there a statistical significant relation between feature similarity and performance metrics?"
    ]
   },
   {
@@ -192,6 +216,14 @@
     "#df_tmp = statistical_test(DATA_SOURCE+\"_feat\", \"Gen\"+DATA_SOURCE+\"_bench\", TEST, IMPUTE)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 62,
@@ -466,37 +498,13 @@
     "        plot_stat_test(masked_results, data_source+\"_feat\", data_source+\"_bench\", test, IMPUTE, cbar=cbar, ylabels=ylabels)\n",
     "        plt.clf()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "52c58c64",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3717a694",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c6afe4d9",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "tag",
    "language": "python",
-   "name": "tag"
   },
   "language_info": {
    "codemirror_mode": {
@@ -508,7 +516,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.16"
   }
  },
  "nbformat": 4,

 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "32241302-7f73-4756-b8a5-27f752de0dea",
+   "metadata": {},
+   "source": [
+    "# Plot - Statistical Tests"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "51cee5d6-2d4c-4bdd-bdbf-4b3a3b76e6d6",
+   "metadata": {},
+   "source": [
+    "#### Load Data"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 8,
     "    return data"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "f0d6e731-5f46-4747-82f8-a2f308d150ee",
+   "metadata": {},
+   "source": [
+    "#### Data Preprocessing"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 11,
    "id": "07370d54",
    "metadata": {},
    "source": [
+    "#### Statistical test: Is there a statistical significant relation between feature similarity and performance metrics?"
    ]
   },
   {
     "#df_tmp = statistical_test(DATA_SOURCE+\"_feat\", \"Gen\"+DATA_SOURCE+\"_bench\", TEST, IMPUTE)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "5e6ecc81-c14d-4859-ab04-49bbf458f7eb",
+   "metadata": {},
+   "source": [
+    "#### Plot - statistical Test of features vs metrics"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 62,
     "        plot_stat_test(masked_results, data_source+\"_feat\", data_source+\"_bench\", test, IMPUTE, cbar=cbar, ylabels=ylabels)\n",
     "        plt.clf()"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

setup.py CHANGED Viewed

@@ -4,7 +4,7 @@ import os
 with open("README.md", "r") as fh:
     long_description = fh.read()
-version_string = os.environ.get("VERSION_PLACEHOLDER", "1.0.0")
 print(version_string)
 version = version_string
@@ -32,7 +32,52 @@ setup(
             'seaborn==0.13.2',
             'smac==2.0.2',
             'tqdm==4.65.0',
-            'streamlit-toggle-switch>=1.0.2'
             ],
         packages = ['gedi'],
         classifiers=[
@@ -42,4 +87,4 @@ setup(
             'License :: OSI Approved :: MIT License',   # Again, pick a license
             'Programming Language :: Python :: 3.9',
     ],
-)

 with open("README.md", "r") as fh:
     long_description = fh.read()
+version_string = os.environ.get("VERSION_PLACEHOLDER", "0.0.6")
 print(version_string)
 version = version_string
             'seaborn==0.13.2',
             'smac==2.0.2',
             'tqdm==4.65.0',
+            'streamlit-toggle-switch>=1.0.2',
+            'click==8.1.7',
+            'cloudpickle==3.0.0',
+            'configspace==0.7.1',
+            'cvxopt==1.3.2',
+            'dask==2024.2.1',
+            'dask-jobqueue==0.8.5',
+            'deprecation==2.1.0',
+            'distributed==2024.2.1',
+            'emcee==3.1.4',
+            'feeed == 1.2.0',
+            'fsspec==2024.2.0',
+            'imbalanced-learn==0.12.0',
+            'imblearn==0.0',
+            'importlib-metadata==7.0.1',
+            'intervaltree==3.1.0',
+            'jinja2==3.1.3',
+            'levenshtein==0.23.0',
+            'locket==1.0.0',
+            'lxml==5.1.0',
+            'markupsafe==2.1.5',
+            'more-itertools==10.2.0',
+            'msgpack==1.0.8',
+            'networkx==3.2.1',
+            'numpy==1.26.4',
+            'pandas>=2.0.0',
+            'partd==1.4.1',
+            'pm4py==2.7.2',
+            'psutil==5.9.8',
+            'pydotplus==2.0.2',
+            'pynisher==1.0.10',
+            'pyrfr==0.9.0',
+            'pyyaml==6.0.1',
+            'rapidfuzz==3.6.1',
+            'regex==2023.12.25',
+            'scikit-learn==1.2.2',
+            'seaborn==0.13.2',
+            'smac==2.0.2',
+            'sortedcontainers==2.4.0',
+            'stringdist==1.0.9',
+            'tblib==3.0.0',
+            'toolz==0.12.1',
+            'tqdm==4.65.0',
+            'typing-extensions==4.10.0',
+            'urllib3==2.2.1',
+            'zict==3.0.0'
             ],
         packages = ['gedi'],
         classifiers=[
             'License :: OSI Approved :: MIT License',   # Again, pick a license
             'Programming Language :: Python :: 3.9',
     ],
+)