Spaces:
Running
Running
Andrea Maldonado
commited on
Commit
·
ae8f2b3
1
Parent(s):
0c619d9
Merge from main
Browse files- data/validation/test_benchmark.csv +3 -0
- gedi/__init__.py +2 -6
- gedi/generator.py +1 -0
- gedi/run.py +53 -0
- main.py +3 -45
- notebooks/gedi_fig6_benchmark_boxplots.ipynb +0 -0
- notebooks/gedi_figs4and5_representativeness.ipynb +0 -0
- notebooks/gedi_figs7and8_benchmarking_statisticalTests.ipynb +36 -28
- setup.py +48 -3
data/validation/test_benchmark.csv
CHANGED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
log,fitness_inductive,precision_inductive,fscore_inductive,size_inductive,pnsize_inductive,cfc_inductive,fitness_heu,precision_heu,fscore_heu,size_heu,pnsize_heu,cfc_heu,fitness_imf,precision_imf,fscore_imf,size_imf,pnsize_imf,cfc_imf,fitness_ilp,precision_ilp,fscore_ilp,size_ilp,pnsize_ilp,cfc_ilp
|
2 |
+
gen_el_169,0.9998052420892378,0.6662312989788649,0.7996241723917423,34,24,22,0.9383563249832565,0.5979149389882715,0.7304143193451293,22,14,13,0.9358843752091403,0.6513022517490741,0.7680805654451066,28,18,16,0.9999637006454563,0.432690150325331,0.6040181215566763,27,7,9
|
3 |
+
gen_el_168,0.9997678338833808,0.6033523537803138,0.7525477883058467,61,34,20,0.48155419290534085,0.9449078138718174,0.6379760800037585,60,35,32,0.9479094601490539,0.5169524053224155,0.669037930473001,67,38,24,0.9999513902099882,0.4283471743974073,0.5997714527549697,93,30,28
|
gedi/__init__.py
CHANGED
@@ -1,7 +1,3 @@
|
|
1 |
-
from .
|
2 |
-
from .features import EventLogFeatures
|
3 |
-
from .augmentation import InstanceAugmentator
|
4 |
-
from .benchmark import BenchmarkTest
|
5 |
-
from .plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
|
6 |
|
7 |
-
__all__=[
|
|
|
1 |
+
from .run import gedi
|
|
|
|
|
|
|
|
|
2 |
|
3 |
+
__all__=['gedi']
|
gedi/generator.py
CHANGED
@@ -152,6 +152,7 @@ class GenerateEventLogs():
|
|
152 |
|
153 |
self.params = params.get(GENERATOR_PARAMS)
|
154 |
experiment = self.params.get(EXPERIMENT)
|
|
|
155 |
if experiment is not None:
|
156 |
tasks, output_path = get_tasks(experiment, self.output_path)
|
157 |
columns_to_rename = {col: column_mappings()[col] for col in tasks.columns if col in column_mappings()}
|
|
|
152 |
|
153 |
self.params = params.get(GENERATOR_PARAMS)
|
154 |
experiment = self.params.get(EXPERIMENT)
|
155 |
+
|
156 |
if experiment is not None:
|
157 |
tasks, output_path = get_tasks(experiment, self.output_path)
|
158 |
columns_to_rename = {col: column_mappings()[col] for col in tasks.columns if col in column_mappings()}
|
gedi/run.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import config
|
2 |
+
import pandas as pd
|
3 |
+
from datetime import datetime as dt
|
4 |
+
from gedi.generator import GenerateEventLogs
|
5 |
+
from gedi.features import EventLogFeatures
|
6 |
+
from gedi.augmentation import InstanceAugmentator
|
7 |
+
from gedi.benchmark import BenchmarkTest
|
8 |
+
from gedi.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
|
9 |
+
from utils.default_argparse import ArgParser
|
10 |
+
from utils.param_keys import *
|
11 |
+
|
12 |
+
def run(kwargs:dict, model_params_list: list, filename_list:list):
|
13 |
+
"""
|
14 |
+
This function chooses the running option for the program.
|
15 |
+
@param kwargs: dict
|
16 |
+
contains the running parameters and the event-log file information
|
17 |
+
@param model_params_list: list
|
18 |
+
contains a list of model parameters, which are used to analyse this different models.
|
19 |
+
@param filename_list: list
|
20 |
+
contains the list of the filenames to load multiple event-logs
|
21 |
+
@return:
|
22 |
+
"""
|
23 |
+
params = kwargs[PARAMS]
|
24 |
+
ft = EventLogFeatures(None)
|
25 |
+
augmented_ft = InstanceAugmentator()
|
26 |
+
gen = pd.DataFrame(columns=['log'])
|
27 |
+
|
28 |
+
for model_params in model_params_list:
|
29 |
+
if model_params.get(PIPELINE_STEP) == 'instance_augmentation':
|
30 |
+
augmented_ft = InstanceAugmentator(aug_params=model_params, samples=ft.feat)
|
31 |
+
AugmentationPlotter(augmented_ft, model_params)
|
32 |
+
elif model_params.get(PIPELINE_STEP) == 'event_logs_generation':
|
33 |
+
gen = pd.DataFrame(GenerateEventLogs(model_params).log_config)
|
34 |
+
#gen = pd.read_csv("output/features/generated/grid_2objectives_enseef_enve/2_enseef_enve_feat.csv")
|
35 |
+
#GenerationPlotter(gen, model_params, output_path="output/plots")
|
36 |
+
elif model_params.get(PIPELINE_STEP) == 'benchmark_test':
|
37 |
+
benchmark = BenchmarkTest(model_params, event_logs=gen['log'])
|
38 |
+
# BenchmarkPlotter(benchmark.features, output_path="output/plots")
|
39 |
+
elif model_params.get(PIPELINE_STEP) == 'feature_extraction':
|
40 |
+
ft = EventLogFeatures(**kwargs, logs=gen['log'], ft_params=model_params)
|
41 |
+
FeaturesPlotter(ft.feat, model_params)
|
42 |
+
elif model_params.get(PIPELINE_STEP) == "evaluation_plotter":
|
43 |
+
GenerationPlotter(gen, model_params, output_path=model_params['output_path'], input_path=model_params['input_path'])
|
44 |
+
|
45 |
+
def gedi(config_path):
|
46 |
+
"""
|
47 |
+
This function runs the GEDI pipeline.
|
48 |
+
@param config_path: str
|
49 |
+
contains the path to the config file
|
50 |
+
@return:
|
51 |
+
"""
|
52 |
+
model_params_list = config.get_model_params_list(config_path)
|
53 |
+
run({'params':""}, model_params_list, [])
|
main.py
CHANGED
@@ -1,54 +1,12 @@
|
|
1 |
import config
|
2 |
-
import pandas as pd
|
3 |
from datetime import datetime as dt
|
4 |
-
from gedi.
|
5 |
-
from gedi.features import EventLogFeatures
|
6 |
-
from gedi.augmentation import InstanceAugmentator
|
7 |
-
from gedi.benchmark import BenchmarkTest
|
8 |
-
from gedi.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
|
9 |
from utils.default_argparse import ArgParser
|
10 |
from utils.param_keys import *
|
11 |
|
12 |
-
def run(kwargs:dict, model_paramas_list: list, filename_list:list):
|
13 |
-
"""
|
14 |
-
This function chooses the running option for the program.
|
15 |
-
@param kwargs: dict
|
16 |
-
contains the running parameters and the event-log file information
|
17 |
-
@param model_params_list: list
|
18 |
-
contains a list of model parameters, which are used to analyse this different models.
|
19 |
-
@param filename_list: list
|
20 |
-
contains the list of the filenames to load multiple event-logs
|
21 |
-
@return:
|
22 |
-
"""
|
23 |
-
params = kwargs[PARAMS]
|
24 |
-
ft = EventLogFeatures(None)
|
25 |
-
augmented_ft = InstanceAugmentator()
|
26 |
-
gen = pd.DataFrame(columns=['log'])
|
27 |
-
|
28 |
-
for model_params in model_params_list:
|
29 |
-
if model_params.get(PIPELINE_STEP) == 'instance_augmentation':
|
30 |
-
augmented_ft = InstanceAugmentator(aug_params=model_params, samples=ft.feat)
|
31 |
-
AugmentationPlotter(augmented_ft, model_params)
|
32 |
-
elif model_params.get(PIPELINE_STEP) == 'event_logs_generation':
|
33 |
-
gen = pd.DataFrame(GenerateEventLogs(model_params).log_config)
|
34 |
-
#gen = pd.read_csv("output/features/generated/grid_2objectives_enseef_enve/2_enseef_enve_feat.csv")
|
35 |
-
#GenerationPlotter(gen, model_params, output_path="output/plots")
|
36 |
-
elif model_params.get(PIPELINE_STEP) == 'benchmark_test':
|
37 |
-
benchmark = BenchmarkTest(model_params, event_logs=gen['log'])
|
38 |
-
# BenchmarkPlotter(benchmark.features, output_path="output/plots")
|
39 |
-
elif model_params.get(PIPELINE_STEP) == 'feature_extraction':
|
40 |
-
ft = EventLogFeatures(**kwargs, logs=gen['log'], ft_params=model_params)
|
41 |
-
FeaturesPlotter(ft.feat, model_params)
|
42 |
-
elif model_params.get(PIPELINE_STEP) == "evaluation_plotter":
|
43 |
-
GenerationPlotter(gen, model_params, output_path=model_params['output_path'], input_path=model_params['input_path'])
|
44 |
-
|
45 |
-
|
46 |
if __name__=='__main__':
|
47 |
start_gedi = dt.now()
|
48 |
print(f'INFO: GEDI starting {start_gedi}')
|
49 |
-
|
50 |
args = ArgParser().parse('GEDI main')
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
print(f'SUCCESS: GEDI took {dt.now()-start_gedi} sec.')
|
|
|
1 |
import config
|
|
|
2 |
from datetime import datetime as dt
|
3 |
+
from gedi.run import gedi, run
|
|
|
|
|
|
|
|
|
4 |
from utils.default_argparse import ArgParser
|
5 |
from utils.param_keys import *
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
if __name__=='__main__':
|
8 |
start_gedi = dt.now()
|
9 |
print(f'INFO: GEDI starting {start_gedi}')
|
|
|
10 |
args = ArgParser().parse('GEDI main')
|
11 |
+
gedi(args.alg_params_json)
|
12 |
+
print(f'SUCCESS: GEDI took {dt.now()-start_gedi} sec.')
|
|
|
|
notebooks/gedi_fig6_benchmark_boxplots.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/gedi_figs4and5_representativeness.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
notebooks/gedi_figs7and8_benchmarking_statisticalTests.ipynb
CHANGED
@@ -1,5 +1,21 @@
|
|
1 |
{
|
2 |
"cells": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
"execution_count": 8,
|
@@ -64,6 +80,14 @@
|
|
64 |
" return data"
|
65 |
]
|
66 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
{
|
68 |
"cell_type": "code",
|
69 |
"execution_count": 11,
|
@@ -110,7 +134,7 @@
|
|
110 |
"id": "07370d54",
|
111 |
"metadata": {},
|
112 |
"source": [
|
113 |
-
"
|
114 |
]
|
115 |
},
|
116 |
{
|
@@ -192,6 +216,14 @@
|
|
192 |
"#df_tmp = statistical_test(DATA_SOURCE+\"_feat\", \"Gen\"+DATA_SOURCE+\"_bench\", TEST, IMPUTE)"
|
193 |
]
|
194 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
{
|
196 |
"cell_type": "code",
|
197 |
"execution_count": 62,
|
@@ -466,37 +498,13 @@
|
|
466 |
" plot_stat_test(masked_results, data_source+\"_feat\", data_source+\"_bench\", test, IMPUTE, cbar=cbar, ylabels=ylabels)\n",
|
467 |
" plt.clf()"
|
468 |
]
|
469 |
-
},
|
470 |
-
{
|
471 |
-
"cell_type": "code",
|
472 |
-
"execution_count": null,
|
473 |
-
"id": "52c58c64",
|
474 |
-
"metadata": {},
|
475 |
-
"outputs": [],
|
476 |
-
"source": []
|
477 |
-
},
|
478 |
-
{
|
479 |
-
"cell_type": "code",
|
480 |
-
"execution_count": null,
|
481 |
-
"id": "3717a694",
|
482 |
-
"metadata": {},
|
483 |
-
"outputs": [],
|
484 |
-
"source": []
|
485 |
-
},
|
486 |
-
{
|
487 |
-
"cell_type": "code",
|
488 |
-
"execution_count": null,
|
489 |
-
"id": "c6afe4d9",
|
490 |
-
"metadata": {},
|
491 |
-
"outputs": [],
|
492 |
-
"source": []
|
493 |
}
|
494 |
],
|
495 |
"metadata": {
|
496 |
"kernelspec": {
|
497 |
-
"display_name": "
|
498 |
"language": "python",
|
499 |
-
"name": "
|
500 |
},
|
501 |
"language_info": {
|
502 |
"codemirror_mode": {
|
@@ -508,7 +516,7 @@
|
|
508 |
"name": "python",
|
509 |
"nbconvert_exporter": "python",
|
510 |
"pygments_lexer": "ipython3",
|
511 |
-
"version": "3.9.
|
512 |
}
|
513 |
},
|
514 |
"nbformat": 4,
|
|
|
1 |
{
|
2 |
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"id": "32241302-7f73-4756-b8a5-27f752de0dea",
|
6 |
+
"metadata": {},
|
7 |
+
"source": [
|
8 |
+
"# Plot - Statistical Tests"
|
9 |
+
]
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"cell_type": "markdown",
|
13 |
+
"id": "51cee5d6-2d4c-4bdd-bdbf-4b3a3b76e6d6",
|
14 |
+
"metadata": {},
|
15 |
+
"source": [
|
16 |
+
"#### Load Data"
|
17 |
+
]
|
18 |
+
},
|
19 |
{
|
20 |
"cell_type": "code",
|
21 |
"execution_count": 8,
|
|
|
80 |
" return data"
|
81 |
]
|
82 |
},
|
83 |
+
{
|
84 |
+
"cell_type": "markdown",
|
85 |
+
"id": "f0d6e731-5f46-4747-82f8-a2f308d150ee",
|
86 |
+
"metadata": {},
|
87 |
+
"source": [
|
88 |
+
"#### Data Preprocessing"
|
89 |
+
]
|
90 |
+
},
|
91 |
{
|
92 |
"cell_type": "code",
|
93 |
"execution_count": 11,
|
|
|
134 |
"id": "07370d54",
|
135 |
"metadata": {},
|
136 |
"source": [
|
137 |
+
"#### Statistical test: Is there a statistical significant relation between feature similarity and performance metrics?"
|
138 |
]
|
139 |
},
|
140 |
{
|
|
|
216 |
"#df_tmp = statistical_test(DATA_SOURCE+\"_feat\", \"Gen\"+DATA_SOURCE+\"_bench\", TEST, IMPUTE)"
|
217 |
]
|
218 |
},
|
219 |
+
{
|
220 |
+
"cell_type": "markdown",
|
221 |
+
"id": "5e6ecc81-c14d-4859-ab04-49bbf458f7eb",
|
222 |
+
"metadata": {},
|
223 |
+
"source": [
|
224 |
+
"#### Plot - statistical Test of features vs metrics"
|
225 |
+
]
|
226 |
+
},
|
227 |
{
|
228 |
"cell_type": "code",
|
229 |
"execution_count": 62,
|
|
|
498 |
" plot_stat_test(masked_results, data_source+\"_feat\", data_source+\"_bench\", test, IMPUTE, cbar=cbar, ylabels=ylabels)\n",
|
499 |
" plt.clf()"
|
500 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
501 |
}
|
502 |
],
|
503 |
"metadata": {
|
504 |
"kernelspec": {
|
505 |
+
"display_name": "Python 3 (ipykernel)",
|
506 |
"language": "python",
|
507 |
+
"name": "python3"
|
508 |
},
|
509 |
"language_info": {
|
510 |
"codemirror_mode": {
|
|
|
516 |
"name": "python",
|
517 |
"nbconvert_exporter": "python",
|
518 |
"pygments_lexer": "ipython3",
|
519 |
+
"version": "3.9.19"
|
520 |
}
|
521 |
},
|
522 |
"nbformat": 4,
|
setup.py
CHANGED
@@ -4,7 +4,7 @@ import os
|
|
4 |
with open("README.md", "r") as fh:
|
5 |
long_description = fh.read()
|
6 |
|
7 |
-
version_string = os.environ.get("VERSION_PLACEHOLDER", "
|
8 |
print(version_string)
|
9 |
version = version_string
|
10 |
|
@@ -32,7 +32,52 @@ setup(
|
|
32 |
'seaborn==0.13.2',
|
33 |
'smac==2.0.2',
|
34 |
'tqdm==4.65.0',
|
35 |
-
'streamlit-toggle-switch>=1.0.2'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
],
|
37 |
packages = ['gedi'],
|
38 |
classifiers=[
|
@@ -42,4 +87,4 @@ setup(
|
|
42 |
'License :: OSI Approved :: MIT License', # Again, pick a license
|
43 |
'Programming Language :: Python :: 3.9',
|
44 |
],
|
45 |
-
)
|
|
|
4 |
with open("README.md", "r") as fh:
|
5 |
long_description = fh.read()
|
6 |
|
7 |
+
version_string = os.environ.get("VERSION_PLACEHOLDER", "0.0.6")
|
8 |
print(version_string)
|
9 |
version = version_string
|
10 |
|
|
|
32 |
'seaborn==0.13.2',
|
33 |
'smac==2.0.2',
|
34 |
'tqdm==4.65.0',
|
35 |
+
'streamlit-toggle-switch>=1.0.2',
|
36 |
+
'click==8.1.7',
|
37 |
+
'cloudpickle==3.0.0',
|
38 |
+
'configspace==0.7.1',
|
39 |
+
'cvxopt==1.3.2',
|
40 |
+
'dask==2024.2.1',
|
41 |
+
'dask-jobqueue==0.8.5',
|
42 |
+
'deprecation==2.1.0',
|
43 |
+
'distributed==2024.2.1',
|
44 |
+
'emcee==3.1.4',
|
45 |
+
'feeed == 1.2.0',
|
46 |
+
'fsspec==2024.2.0',
|
47 |
+
'imbalanced-learn==0.12.0',
|
48 |
+
'imblearn==0.0',
|
49 |
+
'importlib-metadata==7.0.1',
|
50 |
+
'intervaltree==3.1.0',
|
51 |
+
'jinja2==3.1.3',
|
52 |
+
'levenshtein==0.23.0',
|
53 |
+
'locket==1.0.0',
|
54 |
+
'lxml==5.1.0',
|
55 |
+
'markupsafe==2.1.5',
|
56 |
+
'more-itertools==10.2.0',
|
57 |
+
'msgpack==1.0.8',
|
58 |
+
'networkx==3.2.1',
|
59 |
+
'numpy==1.26.4',
|
60 |
+
'pandas>=2.0.0',
|
61 |
+
'partd==1.4.1',
|
62 |
+
'pm4py==2.7.2',
|
63 |
+
'psutil==5.9.8',
|
64 |
+
'pydotplus==2.0.2',
|
65 |
+
'pynisher==1.0.10',
|
66 |
+
'pyrfr==0.9.0',
|
67 |
+
'pyyaml==6.0.1',
|
68 |
+
'rapidfuzz==3.6.1',
|
69 |
+
'regex==2023.12.25',
|
70 |
+
'scikit-learn==1.2.2',
|
71 |
+
'seaborn==0.13.2',
|
72 |
+
'smac==2.0.2',
|
73 |
+
'sortedcontainers==2.4.0',
|
74 |
+
'stringdist==1.0.9',
|
75 |
+
'tblib==3.0.0',
|
76 |
+
'toolz==0.12.1',
|
77 |
+
'tqdm==4.65.0',
|
78 |
+
'typing-extensions==4.10.0',
|
79 |
+
'urllib3==2.2.1',
|
80 |
+
'zict==3.0.0'
|
81 |
],
|
82 |
packages = ['gedi'],
|
83 |
classifiers=[
|
|
|
87 |
'License :: OSI Approved :: MIT License', # Again, pick a license
|
88 |
'Programming Language :: Python :: 3.9',
|
89 |
],
|
90 |
+
)
|