Andrea Maldonado commited on
Commit
ae8f2b3
·
1 Parent(s): 0c619d9

Merge from main

Browse files
data/validation/test_benchmark.csv CHANGED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ log,fitness_inductive,precision_inductive,fscore_inductive,size_inductive,pnsize_inductive,cfc_inductive,fitness_heu,precision_heu,fscore_heu,size_heu,pnsize_heu,cfc_heu,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp
2
+ gen_el_169,0.9998052420892378,0.6662312989788649,0.7996241723917423,34,24,22,0.9383563249832565,0.5979149389882715,0.7304143193451293,22,14,13,0.9358843752091403,0.6513022517490741,0.7680805654451066,28,18,16,0.9999637006454563,0.432690150325331,0.6040181215566763,27,7,9
3
+ gen_el_168,0.9997678338833808,0.6033523537803138,0.7525477883058467,61,34,20,0.48155419290534085,0.9449078138718174,0.6379760800037585,60,35,32,0.9479094601490539,0.5169524053224155,0.669037930473001,67,38,24,0.9999513902099882,0.4283471743974073,0.5997714527549697,93,30,28
gedi/__init__.py CHANGED
@@ -1,7 +1,3 @@
1
- from .generator import GenerateEventLogs
2
- from .features import EventLogFeatures
3
- from .augmentation import InstanceAugmentator
4
- from .benchmark import BenchmarkTest
5
- from .plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
6
 
7
- __all__=[ 'GenerateEventLogs', 'EventLogFeatures', 'FeatureAnalyser', 'InstanceAugmentator', 'BenchmarkTest', 'BenchmarkPlotter', 'FeaturesPlotter', 'AugmentationPlotter', 'GenerationPlotter']
 
1
+ from .run import gedi
 
 
 
 
2
 
3
+ __all__=['gedi']
gedi/generator.py CHANGED
@@ -152,6 +152,7 @@ class GenerateEventLogs():
152
 
153
  self.params = params.get(GENERATOR_PARAMS)
154
  experiment = self.params.get(EXPERIMENT)
 
155
  if experiment is not None:
156
  tasks, output_path = get_tasks(experiment, self.output_path)
157
  columns_to_rename = {col: column_mappings()[col] for col in tasks.columns if col in column_mappings()}
 
152
 
153
  self.params = params.get(GENERATOR_PARAMS)
154
  experiment = self.params.get(EXPERIMENT)
155
+
156
  if experiment is not None:
157
  tasks, output_path = get_tasks(experiment, self.output_path)
158
  columns_to_rename = {col: column_mappings()[col] for col in tasks.columns if col in column_mappings()}
gedi/run.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import config
2
+ import pandas as pd
3
+ from datetime import datetime as dt
4
+ from gedi.generator import GenerateEventLogs
5
+ from gedi.features import EventLogFeatures
6
+ from gedi.augmentation import InstanceAugmentator
7
+ from gedi.benchmark import BenchmarkTest
8
+ from gedi.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
9
+ from utils.default_argparse import ArgParser
10
+ from utils.param_keys import *
11
+
12
+ def run(kwargs:dict, model_params_list: list, filename_list:list):
13
+ """
14
+ This function chooses the running option for the program.
15
+ @param kwargs: dict
16
+ contains the running parameters and the event-log file information
17
+ @param model_params_list: list
18
+ contains a list of model parameters, which are used to analyse this different models.
19
+ @param filename_list: list
20
+ contains the list of the filenames to load multiple event-logs
21
+ @return:
22
+ """
23
+ params = kwargs[PARAMS]
24
+ ft = EventLogFeatures(None)
25
+ augmented_ft = InstanceAugmentator()
26
+ gen = pd.DataFrame(columns=['log'])
27
+
28
+ for model_params in model_params_list:
29
+ if model_params.get(PIPELINE_STEP) == 'instance_augmentation':
30
+ augmented_ft = InstanceAugmentator(aug_params=model_params, samples=ft.feat)
31
+ AugmentationPlotter(augmented_ft, model_params)
32
+ elif model_params.get(PIPELINE_STEP) == 'event_logs_generation':
33
+ gen = pd.DataFrame(GenerateEventLogs(model_params).log_config)
34
+ #gen = pd.read_csv("output/features/generated/grid_2objectives_enseef_enve/2_enseef_enve_feat.csv")
35
+ #GenerationPlotter(gen, model_params, output_path="output/plots")
36
+ elif model_params.get(PIPELINE_STEP) == 'benchmark_test':
37
+ benchmark = BenchmarkTest(model_params, event_logs=gen['log'])
38
+ # BenchmarkPlotter(benchmark.features, output_path="output/plots")
39
+ elif model_params.get(PIPELINE_STEP) == 'feature_extraction':
40
+ ft = EventLogFeatures(**kwargs, logs=gen['log'], ft_params=model_params)
41
+ FeaturesPlotter(ft.feat, model_params)
42
+ elif model_params.get(PIPELINE_STEP) == "evaluation_plotter":
43
+ GenerationPlotter(gen, model_params, output_path=model_params['output_path'], input_path=model_params['input_path'])
44
+
45
+ def gedi(config_path):
46
+ """
47
+ This function runs the GEDI pipeline.
48
+ @param config_path: str
49
+ contains the path to the config file
50
+ @return:
51
+ """
52
+ model_params_list = config.get_model_params_list(config_path)
53
+ run({'params':""}, model_params_list, [])
main.py CHANGED
@@ -1,54 +1,12 @@
1
  import config
2
- import pandas as pd
3
  from datetime import datetime as dt
4
- from gedi.generator import GenerateEventLogs
5
- from gedi.features import EventLogFeatures
6
- from gedi.augmentation import InstanceAugmentator
7
- from gedi.benchmark import BenchmarkTest
8
- from gedi.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
9
  from utils.default_argparse import ArgParser
10
  from utils.param_keys import *
11
 
12
- def run(kwargs:dict, model_paramas_list: list, filename_list:list):
13
- """
14
- This function chooses the running option for the program.
15
- @param kwargs: dict
16
- contains the running parameters and the event-log file information
17
- @param model_params_list: list
18
- contains a list of model parameters, which are used to analyse this different models.
19
- @param filename_list: list
20
- contains the list of the filenames to load multiple event-logs
21
- @return:
22
- """
23
- params = kwargs[PARAMS]
24
- ft = EventLogFeatures(None)
25
- augmented_ft = InstanceAugmentator()
26
- gen = pd.DataFrame(columns=['log'])
27
-
28
- for model_params in model_params_list:
29
- if model_params.get(PIPELINE_STEP) == 'instance_augmentation':
30
- augmented_ft = InstanceAugmentator(aug_params=model_params, samples=ft.feat)
31
- AugmentationPlotter(augmented_ft, model_params)
32
- elif model_params.get(PIPELINE_STEP) == 'event_logs_generation':
33
- gen = pd.DataFrame(GenerateEventLogs(model_params).log_config)
34
- #gen = pd.read_csv("output/features/generated/grid_2objectives_enseef_enve/2_enseef_enve_feat.csv")
35
- #GenerationPlotter(gen, model_params, output_path="output/plots")
36
- elif model_params.get(PIPELINE_STEP) == 'benchmark_test':
37
- benchmark = BenchmarkTest(model_params, event_logs=gen['log'])
38
- # BenchmarkPlotter(benchmark.features, output_path="output/plots")
39
- elif model_params.get(PIPELINE_STEP) == 'feature_extraction':
40
- ft = EventLogFeatures(**kwargs, logs=gen['log'], ft_params=model_params)
41
- FeaturesPlotter(ft.feat, model_params)
42
- elif model_params.get(PIPELINE_STEP) == "evaluation_plotter":
43
- GenerationPlotter(gen, model_params, output_path=model_params['output_path'], input_path=model_params['input_path'])
44
-
45
-
46
  if __name__=='__main__':
47
  start_gedi = dt.now()
48
  print(f'INFO: GEDI starting {start_gedi}')
49
-
50
  args = ArgParser().parse('GEDI main')
51
- model_params_list = config.get_model_params_list(args.alg_params_json)
52
- run({'params':""}, model_params_list, [])
53
-
54
- print(f'SUCCESS: GEDI took {dt.now()-start_gedi} sec.')
 
1
  import config
 
2
  from datetime import datetime as dt
3
+ from gedi.run import gedi, run
 
 
 
 
4
  from utils.default_argparse import ArgParser
5
  from utils.param_keys import *
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  if __name__=='__main__':
8
  start_gedi = dt.now()
9
  print(f'INFO: GEDI starting {start_gedi}')
 
10
  args = ArgParser().parse('GEDI main')
11
+ gedi(args.alg_params_json)
12
+ print(f'SUCCESS: GEDI took {dt.now()-start_gedi} sec.')
 
 
notebooks/gedi_fig6_benchmark_boxplots.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/gedi_figs4and5_representativeness.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
notebooks/gedi_figs7and8_benchmarking_statisticalTests.ipynb CHANGED
@@ -1,5 +1,21 @@
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
  "execution_count": 8,
@@ -64,6 +80,14 @@
64
  " return data"
65
  ]
66
  },
 
 
 
 
 
 
 
 
67
  {
68
  "cell_type": "code",
69
  "execution_count": 11,
@@ -110,7 +134,7 @@
110
  "id": "07370d54",
111
  "metadata": {},
112
  "source": [
113
- "## Statistical test: Is there a statistical significant relation between feature similarity and performance metrics?"
114
  ]
115
  },
116
  {
@@ -192,6 +216,14 @@
192
  "#df_tmp = statistical_test(DATA_SOURCE+\"_feat\", \"Gen\"+DATA_SOURCE+\"_bench\", TEST, IMPUTE)"
193
  ]
194
  },
 
 
 
 
 
 
 
 
195
  {
196
  "cell_type": "code",
197
  "execution_count": 62,
@@ -466,37 +498,13 @@
466
  " plot_stat_test(masked_results, data_source+\"_feat\", data_source+\"_bench\", test, IMPUTE, cbar=cbar, ylabels=ylabels)\n",
467
  " plt.clf()"
468
  ]
469
- },
470
- {
471
- "cell_type": "code",
472
- "execution_count": null,
473
- "id": "52c58c64",
474
- "metadata": {},
475
- "outputs": [],
476
- "source": []
477
- },
478
- {
479
- "cell_type": "code",
480
- "execution_count": null,
481
- "id": "3717a694",
482
- "metadata": {},
483
- "outputs": [],
484
- "source": []
485
- },
486
- {
487
- "cell_type": "code",
488
- "execution_count": null,
489
- "id": "c6afe4d9",
490
- "metadata": {},
491
- "outputs": [],
492
- "source": []
493
  }
494
  ],
495
  "metadata": {
496
  "kernelspec": {
497
- "display_name": "tag",
498
  "language": "python",
499
- "name": "tag"
500
  },
501
  "language_info": {
502
  "codemirror_mode": {
@@ -508,7 +516,7 @@
508
  "name": "python",
509
  "nbconvert_exporter": "python",
510
  "pygments_lexer": "ipython3",
511
- "version": "3.9.16"
512
  }
513
  },
514
  "nbformat": 4,
 
1
  {
2
  "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "32241302-7f73-4756-b8a5-27f752de0dea",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Plot - Statistical Tests"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "markdown",
13
+ "id": "51cee5d6-2d4c-4bdd-bdbf-4b3a3b76e6d6",
14
+ "metadata": {},
15
+ "source": [
16
+ "#### Load Data"
17
+ ]
18
+ },
19
  {
20
  "cell_type": "code",
21
  "execution_count": 8,
 
80
  " return data"
81
  ]
82
  },
83
+ {
84
+ "cell_type": "markdown",
85
+ "id": "f0d6e731-5f46-4747-82f8-a2f308d150ee",
86
+ "metadata": {},
87
+ "source": [
88
+ "#### Data Preprocessing"
89
+ ]
90
+ },
91
  {
92
  "cell_type": "code",
93
  "execution_count": 11,
 
134
  "id": "07370d54",
135
  "metadata": {},
136
  "source": [
137
+ "#### Statistical test: Is there a statistical significant relation between feature similarity and performance metrics?"
138
  ]
139
  },
140
  {
 
216
  "#df_tmp = statistical_test(DATA_SOURCE+\"_feat\", \"Gen\"+DATA_SOURCE+\"_bench\", TEST, IMPUTE)"
217
  ]
218
  },
219
+ {
220
+ "cell_type": "markdown",
221
+ "id": "5e6ecc81-c14d-4859-ab04-49bbf458f7eb",
222
+ "metadata": {},
223
+ "source": [
224
+ "#### Plot - statistical Test of features vs metrics"
225
+ ]
226
+ },
227
  {
228
  "cell_type": "code",
229
  "execution_count": 62,
 
498
  " plot_stat_test(masked_results, data_source+\"_feat\", data_source+\"_bench\", test, IMPUTE, cbar=cbar, ylabels=ylabels)\n",
499
  " plt.clf()"
500
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  }
502
  ],
503
  "metadata": {
504
  "kernelspec": {
505
+ "display_name": "Python 3 (ipykernel)",
506
  "language": "python",
507
+ "name": "python3"
508
  },
509
  "language_info": {
510
  "codemirror_mode": {
 
516
  "name": "python",
517
  "nbconvert_exporter": "python",
518
  "pygments_lexer": "ipython3",
519
+ "version": "3.9.19"
520
  }
521
  },
522
  "nbformat": 4,
setup.py CHANGED
@@ -4,7 +4,7 @@ import os
4
  with open("README.md", "r") as fh:
5
  long_description = fh.read()
6
 
7
- version_string = os.environ.get("VERSION_PLACEHOLDER", "1.0.0")
8
  print(version_string)
9
  version = version_string
10
 
@@ -32,7 +32,52 @@ setup(
32
  'seaborn==0.13.2',
33
  'smac==2.0.2',
34
  'tqdm==4.65.0',
35
- 'streamlit-toggle-switch>=1.0.2'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  ],
37
  packages = ['gedi'],
38
  classifiers=[
@@ -42,4 +87,4 @@ setup(
42
  'License :: OSI Approved :: MIT License', # Again, pick a license
43
  'Programming Language :: Python :: 3.9',
44
  ],
45
- )
 
4
  with open("README.md", "r") as fh:
5
  long_description = fh.read()
6
 
7
+ version_string = os.environ.get("VERSION_PLACEHOLDER", "0.0.6")
8
  print(version_string)
9
  version = version_string
10
 
 
32
  'seaborn==0.13.2',
33
  'smac==2.0.2',
34
  'tqdm==4.65.0',
35
+ 'streamlit-toggle-switch>=1.0.2',
36
+ 'click==8.1.7',
37
+ 'cloudpickle==3.0.0',
38
+ 'configspace==0.7.1',
39
+ 'cvxopt==1.3.2',
40
+ 'dask==2024.2.1',
41
+ 'dask-jobqueue==0.8.5',
42
+ 'deprecation==2.1.0',
43
+ 'distributed==2024.2.1',
44
+ 'emcee==3.1.4',
45
+ 'feeed == 1.2.0',
46
+ 'fsspec==2024.2.0',
47
+ 'imbalanced-learn==0.12.0',
48
+ 'imblearn==0.0',
49
+ 'importlib-metadata==7.0.1',
50
+ 'intervaltree==3.1.0',
51
+ 'jinja2==3.1.3',
52
+ 'levenshtein==0.23.0',
53
+ 'locket==1.0.0',
54
+ 'lxml==5.1.0',
55
+ 'markupsafe==2.1.5',
56
+ 'more-itertools==10.2.0',
57
+ 'msgpack==1.0.8',
58
+ 'networkx==3.2.1',
59
+ 'numpy==1.26.4',
60
+ 'pandas>=2.0.0',
61
+ 'partd==1.4.1',
62
+ 'pm4py==2.7.2',
63
+ 'psutil==5.9.8',
64
+ 'pydotplus==2.0.2',
65
+ 'pynisher==1.0.10',
66
+ 'pyrfr==0.9.0',
67
+ 'pyyaml==6.0.1',
68
+ 'rapidfuzz==3.6.1',
69
+ 'regex==2023.12.25',
70
+ 'scikit-learn==1.2.2',
71
+ 'seaborn==0.13.2',
72
+ 'smac==2.0.2',
73
+ 'sortedcontainers==2.4.0',
74
+ 'stringdist==1.0.9',
75
+ 'tblib==3.0.0',
76
+ 'toolz==0.12.1',
77
+ 'tqdm==4.65.0',
78
+ 'typing-extensions==4.10.0',
79
+ 'urllib3==2.2.1',
80
+ 'zict==3.0.0'
81
  ],
82
  packages = ['gedi'],
83
  classifiers=[
 
87
  'License :: OSI Approved :: MIT License', # Again, pick a license
88
  'Programming Language :: Python :: 3.9',
89
  ],
90
+ )