Spaces:
Running
Running
Andrea Maldonado
commited on
Commit
Β·
f137caa
1
Parent(s):
cc4dd04
Moves internal utils to package
Browse files- gedi/augmentation.py +2 -2
- gedi/benchmark.py +2 -2
- gedi/config.py +2 -2
- gedi/features.py +4 -3
- gedi/generator.py +3 -3
- gedi/plotter.py +9 -9
- gedi/run.py +7 -6
- gedi/utils/__init__.py +8 -0
- {utils β gedi/utils}/column_mappings.py +2 -2
- {utils β gedi/utils}/default_argparse.py +0 -0
- {utils β gedi/utils}/param_keys/__init__.py +2 -0
- {utils β gedi/utils}/param_keys/augmentation.py +0 -0
- {utils β gedi/utils}/param_keys/benchmark.py +0 -0
- {utils β gedi/utils}/param_keys/features.py +0 -0
- {utils β gedi/utils}/param_keys/generator.py +0 -0
- {utils β gedi/utils}/param_keys/plotter.py +0 -0
- setup.py +2 -2
- utils/param_keys/analyser.py +0 -4
gedi/augmentation.py
CHANGED
@@ -3,8 +3,8 @@ from collections import Counter
|
|
3 |
from datetime import datetime as dt
|
4 |
from imblearn.over_sampling import SMOTE
|
5 |
from gedi.utils.matrix_tools import insert_missing_data
|
6 |
-
from utils.param_keys import INPUT_PATH, OUTPUT_PATH
|
7 |
-
from utils.param_keys.augmentation import AUGMENTATION_PARAMS, NO_SAMPLES, FEATURE_SELECTION, METHOD
|
8 |
|
9 |
class InstanceAugmentator:
|
10 |
def __init__(self, aug_params=None, samples=None):
|
|
|
3 |
from datetime import datetime as dt
|
4 |
from imblearn.over_sampling import SMOTE
|
5 |
from gedi.utils.matrix_tools import insert_missing_data
|
6 |
+
from gedi.utils.param_keys import INPUT_PATH, OUTPUT_PATH
|
7 |
+
from gedi.utils.param_keys.augmentation import AUGMENTATION_PARAMS, NO_SAMPLES, FEATURE_SELECTION, METHOD
|
8 |
|
9 |
class InstanceAugmentator:
|
10 |
def __init__(self, aug_params=None, samples=None):
|
gedi/benchmark.py
CHANGED
@@ -14,9 +14,9 @@ from pm4py import precision_alignments
|
|
14 |
from pm4py.objects.bpmn.obj import BPMN
|
15 |
from pm4py.objects.log.importer.xes import importer as xes_importer
|
16 |
from gedi.utils.io_helpers import dump_features_json
|
|
|
|
|
17 |
from tqdm import tqdm
|
18 |
-
from utils.param_keys import INPUT_PATH, OUTPUT_PATH
|
19 |
-
from utils.param_keys.benchmark import MINERS
|
20 |
|
21 |
class BenchmarkTest:
|
22 |
def __init__(self, params=None, event_logs=None):
|
|
|
14 |
from pm4py.objects.bpmn.obj import BPMN
|
15 |
from pm4py.objects.log.importer.xes import importer as xes_importer
|
16 |
from gedi.utils.io_helpers import dump_features_json
|
17 |
+
from gedi.utils.param_keys import INPUT_PATH, OUTPUT_PATH
|
18 |
+
from gedi.utils.param_keys.benchmark import MINERS
|
19 |
from tqdm import tqdm
|
|
|
|
|
20 |
|
21 |
class BenchmarkTest:
|
22 |
def __init__(self, params=None, event_logs=None):
|
gedi/config.py
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
import json
|
2 |
import warnings
|
3 |
|
4 |
-
from utils.param_keys import PIPELINE_STEP, INPUT_PATH, OUTPUT_PATH
|
5 |
-
from utils.param_keys.features import FEATURE_SET, FEATURE_PARAMS
|
6 |
|
7 |
def get_model_params_list(alg_json_file: str) :#-> list[dict]:
|
8 |
"""
|
|
|
1 |
import json
|
2 |
import warnings
|
3 |
|
4 |
+
from gedi.utils.param_keys import PIPELINE_STEP, INPUT_PATH, OUTPUT_PATH
|
5 |
+
from gedi.utils.param_keys.features import FEATURE_SET, FEATURE_PARAMS
|
6 |
|
7 |
def get_model_params_list(alg_json_file: str) :#-> list[dict]:
|
8 |
"""
|
gedi/features.py
CHANGED
@@ -7,10 +7,11 @@ from datetime import datetime as dt
|
|
7 |
from functools import partial
|
8 |
from feeed.feature_extractor import extract_features
|
9 |
from pathlib import Path
|
10 |
-
from utils.
|
11 |
-
from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
|
12 |
from gedi.utils.io_helpers import dump_features_json
|
13 |
-
from utils.
|
|
|
|
|
14 |
def get_sortby_parameter(elem):
|
15 |
number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
|
16 |
return number
|
|
|
7 |
from functools import partial
|
8 |
from feeed.feature_extractor import extract_features
|
9 |
from pathlib import Path
|
10 |
+
from gedi.utils.column_mappings import column_mappings
|
|
|
11 |
from gedi.utils.io_helpers import dump_features_json
|
12 |
+
from gedi.utils.param_keys import INPUT_PATH
|
13 |
+
from gedi.utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
|
14 |
+
|
15 |
def get_sortby_parameter(elem):
|
16 |
number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
|
17 |
return number
|
gedi/generator.py
CHANGED
@@ -17,11 +17,11 @@ from pm4py import generate_process_tree
|
|
17 |
from pm4py import write_xes
|
18 |
from pm4py.sim import play_out
|
19 |
from smac import HyperparameterOptimizationFacade, Scenario
|
20 |
-
from utils.
|
21 |
-
from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
|
22 |
from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, compute_similarity
|
23 |
from gedi.utils.io_helpers import read_csvs
|
24 |
-
from utils.
|
|
|
25 |
import xml.etree.ElementTree as ET
|
26 |
import re
|
27 |
from xml.dom import minidom
|
|
|
17 |
from pm4py import write_xes
|
18 |
from pm4py.sim import play_out
|
19 |
from smac import HyperparameterOptimizationFacade, Scenario
|
20 |
+
from gedi.utils.column_mappings import column_mappings
|
|
|
21 |
from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, compute_similarity
|
22 |
from gedi.utils.io_helpers import read_csvs
|
23 |
+
from gedi.utils.param_keys import OUTPUT_PATH, INPUT_PATH
|
24 |
+
from gedi.utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
|
25 |
import xml.etree.ElementTree as ET
|
26 |
import re
|
27 |
from xml.dom import minidom
|
gedi/plotter.py
CHANGED
@@ -8,20 +8,20 @@ import seaborn as sns
|
|
8 |
import os
|
9 |
import glob
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
from matplotlib.axes import Axes
|
12 |
from matplotlib.figure import Figure
|
13 |
from matplotlib.lines import Line2D
|
14 |
-
from utils.param_keys import PLOT_TYPE, PROJECTION, EXPLAINED_VAR, PLOT_3D_MAP
|
15 |
-
from utils.param_keys import OUTPUT_PATH, PIPELINE_STEP
|
16 |
-
from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, PLOT_REFERENCE_FEATURE
|
17 |
-
from utils.param_keys.plotter import REAL_EVENTLOG_PATH, FONT_SIZE, BOXPLOT_WIDTH
|
18 |
-
from collections import defaultdict
|
19 |
-
|
20 |
from sklearn.preprocessing import Normalizer, StandardScaler
|
21 |
from sklearn.decomposition import PCA
|
22 |
-
|
23 |
-
from gedi.utils.io_helpers import get_keys_abbreviation
|
24 |
-
from gedi.utils.io_helpers import read_csvs, select_instance
|
25 |
|
26 |
def insert_newlines(string, every=140):
|
27 |
return '\n'.join(string[i:i+every] for i in range(0, len(string), every))
|
|
|
8 |
import os
|
9 |
import glob
|
10 |
|
11 |
+
from collections import defaultdict
|
12 |
+
from gedi.generator import get_tasks
|
13 |
+
from gedi.utils.io_helpers import get_keys_abbreviation
|
14 |
+
from gedi.utils.io_helpers import read_csvs, select_instance
|
15 |
+
from gedi.utils.param_keys import PLOT_TYPE, PROJECTION, EXPLAINED_VAR, PLOT_3D_MAP
|
16 |
+
from gedi.utils.param_keys import OUTPUT_PATH, PIPELINE_STEP
|
17 |
+
from gedi.utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, PLOT_REFERENCE_FEATURE
|
18 |
+
from gedi.utils.param_keys.plotter import REAL_EVENTLOG_PATH, FONT_SIZE, BOXPLOT_WIDTH
|
19 |
from matplotlib.axes import Axes
|
20 |
from matplotlib.figure import Figure
|
21 |
from matplotlib.lines import Line2D
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
from sklearn.preprocessing import Normalizer, StandardScaler
|
23 |
from sklearn.decomposition import PCA
|
24 |
+
|
|
|
|
|
25 |
|
26 |
def insert_newlines(string, every=140):
|
27 |
return '\n'.join(string[i:i+every] for i in range(0, len(string), every))
|
gedi/run.py
CHANGED
@@ -1,13 +1,14 @@
|
|
1 |
-
import gedi.config as config
|
2 |
import pandas as pd
|
|
|
3 |
from datetime import datetime as dt
|
4 |
-
from gedi.generator import GenerateEventLogs
|
5 |
-
from gedi.features import EventLogFeatures
|
6 |
from gedi.augmentation import InstanceAugmentator
|
7 |
from gedi.benchmark import BenchmarkTest
|
|
|
|
|
|
|
8 |
from gedi.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
|
9 |
-
from utils.default_argparse import ArgParser
|
10 |
-
from utils.param_keys import PARAMS, PIPELINE_STEP
|
11 |
|
12 |
def run(kwargs:dict, model_params_list: list, filename_list:list):
|
13 |
"""
|
@@ -49,5 +50,5 @@ def gedi(config_path):
|
|
49 |
contains the path to the config file
|
50 |
@return:
|
51 |
"""
|
52 |
-
model_params_list =
|
53 |
run({'params':""}, model_params_list, [])
|
|
|
|
|
1 |
import pandas as pd
|
2 |
+
|
3 |
from datetime import datetime as dt
|
|
|
|
|
4 |
from gedi.augmentation import InstanceAugmentator
|
5 |
from gedi.benchmark import BenchmarkTest
|
6 |
+
from gedi.config import get_model_params_list
|
7 |
+
from gedi.features import EventLogFeatures
|
8 |
+
from gedi.generator import GenerateEventLogs
|
9 |
from gedi.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
|
10 |
+
from gedi.utils.default_argparse import ArgParser
|
11 |
+
from gedi.utils.param_keys import PARAMS, PIPELINE_STEP
|
12 |
|
13 |
def run(kwargs:dict, model_params_list: list, filename_list:list):
|
14 |
"""
|
|
|
50 |
contains the path to the config file
|
51 |
@return:
|
52 |
"""
|
53 |
+
model_params_list = get_model_params_list(config_path)
|
54 |
run({'params':""}, model_params_list, [])
|
gedi/utils/__init__.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from .param_keys import PIPELINE_STEP, INPUT_PATH, OUTPUT_PATH
|
2 |
+
from .io_helpers import sort_files
|
3 |
+
from .column_mappings import column_mappings
|
4 |
+
|
5 |
+
__all__ = [
|
6 |
+
"column_mappings","sort_files",
|
7 |
+
"PIPELINE_STEP", "INPUT_PATH", "OUTPUT_PATH"
|
8 |
+
]
|
{utils β gedi/utils}/column_mappings.py
RENAMED
@@ -1,5 +1,5 @@
|
|
1 |
def column_mappings():
|
2 |
-
|
3 |
column_names_short = {
|
4 |
'rutpt': 'ratio_unique_traces_per_trace',
|
5 |
'rmcv': 'ratio_most_common_variant',
|
@@ -12,5 +12,5 @@ def column_mappings():
|
|
12 |
'eseef': 'epa_sequence_entropy_exponential_forgetting',
|
13 |
'enseef': 'epa_normalized_sequence_entropy_exponential_forgetting'
|
14 |
}
|
15 |
-
|
16 |
return column_names_short
|
|
|
1 |
def column_mappings():
|
2 |
+
|
3 |
column_names_short = {
|
4 |
'rutpt': 'ratio_unique_traces_per_trace',
|
5 |
'rmcv': 'ratio_most_common_variant',
|
|
|
12 |
'eseef': 'epa_sequence_entropy_exponential_forgetting',
|
13 |
'enseef': 'epa_normalized_sequence_entropy_exponential_forgetting'
|
14 |
}
|
15 |
+
|
16 |
return column_names_short
|
{utils β gedi/utils}/default_argparse.py
RENAMED
File without changes
|
{utils β gedi/utils}/param_keys/__init__.py
RENAMED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
# Model params
|
2 |
ALGORITHM_NAME = 'algorithm_name'
|
3 |
PIPELINE_STEP = 'pipeline_step'
|
|
|
1 |
+
from .features import FEATURE_PARAMS, FEATURE_SET
|
2 |
+
|
3 |
# Model params
|
4 |
ALGORITHM_NAME = 'algorithm_name'
|
5 |
PIPELINE_STEP = 'pipeline_step'
|
{utils β gedi/utils}/param_keys/augmentation.py
RENAMED
File without changes
|
{utils β gedi/utils}/param_keys/benchmark.py
RENAMED
File without changes
|
{utils β gedi/utils}/param_keys/features.py
RENAMED
File without changes
|
{utils β gedi/utils}/param_keys/generator.py
RENAMED
File without changes
|
{utils β gedi/utils}/param_keys/plotter.py
RENAMED
File without changes
|
setup.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from setuptools import setup
|
2 |
import os
|
3 |
|
4 |
with open("README.md", "r") as fh:
|
@@ -11,6 +11,7 @@ version = version_string
|
|
11 |
setup(
|
12 |
name = 'gedi',
|
13 |
version = str(version),
|
|
|
14 |
description = 'Generating Event Data with Intentional Features for Benchmarking Process Mining',
|
15 |
author = 'Andrea Maldonado',
|
16 |
author_email = '[email protected]',
|
@@ -80,7 +81,6 @@ setup(
|
|
80 |
'urllib3==2.2.1',
|
81 |
'zict==3.0.0'
|
82 |
],
|
83 |
-
packages = ['gedi'],
|
84 |
classifiers=[
|
85 |
'Development Status :: 3 - Alpha', # Chose either "3 - Alpha", "4 - Beta" or "5 - Production/Stable" as the current state of your package
|
86 |
'Intended Audience :: Science/Research', # Define that your audience are developers
|
|
|
1 |
+
from setuptools import setup, find_packages
|
2 |
import os
|
3 |
|
4 |
with open("README.md", "r") as fh:
|
|
|
11 |
setup(
|
12 |
name = 'gedi',
|
13 |
version = str(version),
|
14 |
+
packages=find_packages(),
|
15 |
description = 'Generating Event Data with Intentional Features for Benchmarking Process Mining',
|
16 |
author = 'Andrea Maldonado',
|
17 |
author_email = '[email protected]',
|
|
|
81 |
'urllib3==2.2.1',
|
82 |
'zict==3.0.0'
|
83 |
],
|
|
|
84 |
classifiers=[
|
85 |
'Development Status :: 3 - Alpha', # Chose either "3 - Alpha", "4 - Beta" or "5 - Production/Stable" as the current state of your package
|
86 |
'Intended Audience :: Science/Research', # Define that your audience are developers
|
utils/param_keys/analyser.py
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
# Analyser params
|
2 |
-
MODEL = 'model'
|
3 |
-
INPUT_PARAMS = 'input_params'
|
4 |
-
PERPLEXITY = 'perplexity'
|
|
|
|
|
|
|
|
|
|