Andrea Maldonado commited on
Commit
f137caa
Β·
1 Parent(s): cc4dd04

Moves internal utils to package

Browse files
gedi/augmentation.py CHANGED
@@ -3,8 +3,8 @@ from collections import Counter
3
  from datetime import datetime as dt
4
  from imblearn.over_sampling import SMOTE
5
  from gedi.utils.matrix_tools import insert_missing_data
6
- from utils.param_keys import INPUT_PATH, OUTPUT_PATH
7
- from utils.param_keys.augmentation import AUGMENTATION_PARAMS, NO_SAMPLES, FEATURE_SELECTION, METHOD
8
 
9
  class InstanceAugmentator:
10
  def __init__(self, aug_params=None, samples=None):
 
3
  from datetime import datetime as dt
4
  from imblearn.over_sampling import SMOTE
5
  from gedi.utils.matrix_tools import insert_missing_data
6
+ from gedi.utils.param_keys import INPUT_PATH, OUTPUT_PATH
7
+ from gedi.utils.param_keys.augmentation import AUGMENTATION_PARAMS, NO_SAMPLES, FEATURE_SELECTION, METHOD
8
 
9
  class InstanceAugmentator:
10
  def __init__(self, aug_params=None, samples=None):
gedi/benchmark.py CHANGED
@@ -14,9 +14,9 @@ from pm4py import precision_alignments
14
  from pm4py.objects.bpmn.obj import BPMN
15
  from pm4py.objects.log.importer.xes import importer as xes_importer
16
  from gedi.utils.io_helpers import dump_features_json
 
 
17
  from tqdm import tqdm
18
- from utils.param_keys import INPUT_PATH, OUTPUT_PATH
19
- from utils.param_keys.benchmark import MINERS
20
 
21
  class BenchmarkTest:
22
  def __init__(self, params=None, event_logs=None):
 
14
  from pm4py.objects.bpmn.obj import BPMN
15
  from pm4py.objects.log.importer.xes import importer as xes_importer
16
  from gedi.utils.io_helpers import dump_features_json
17
+ from gedi.utils.param_keys import INPUT_PATH, OUTPUT_PATH
18
+ from gedi.utils.param_keys.benchmark import MINERS
19
  from tqdm import tqdm
 
 
20
 
21
  class BenchmarkTest:
22
  def __init__(self, params=None, event_logs=None):
gedi/config.py CHANGED
@@ -1,8 +1,8 @@
1
  import json
2
  import warnings
3
 
4
- from utils.param_keys import PIPELINE_STEP, INPUT_PATH, OUTPUT_PATH
5
- from utils.param_keys.features import FEATURE_SET, FEATURE_PARAMS
6
 
7
  def get_model_params_list(alg_json_file: str) :#-> list[dict]:
8
  """
 
1
  import json
2
  import warnings
3
 
4
+ from gedi.utils.param_keys import PIPELINE_STEP, INPUT_PATH, OUTPUT_PATH
5
+ from gedi.utils.param_keys.features import FEATURE_SET, FEATURE_PARAMS
6
 
7
  def get_model_params_list(alg_json_file: str) :#-> list[dict]:
8
  """
gedi/features.py CHANGED
@@ -7,10 +7,11 @@ from datetime import datetime as dt
7
  from functools import partial
8
  from feeed.feature_extractor import extract_features
9
  from pathlib import Path
10
- from utils.param_keys import INPUT_PATH
11
- from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
12
  from gedi.utils.io_helpers import dump_features_json
13
- from utils.column_mappings import column_mappings
 
 
14
  def get_sortby_parameter(elem):
15
  number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
16
  return number
 
7
  from functools import partial
8
  from feeed.feature_extractor import extract_features
9
  from pathlib import Path
10
+ from gedi.utils.column_mappings import column_mappings
 
11
  from gedi.utils.io_helpers import dump_features_json
12
+ from gedi.utils.param_keys import INPUT_PATH
13
+ from gedi.utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
14
+
15
  def get_sortby_parameter(elem):
16
  number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
17
  return number
gedi/generator.py CHANGED
@@ -17,11 +17,11 @@ from pm4py import generate_process_tree
17
  from pm4py import write_xes
18
  from pm4py.sim import play_out
19
  from smac import HyperparameterOptimizationFacade, Scenario
20
- from utils.param_keys import OUTPUT_PATH, INPUT_PATH
21
- from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
22
  from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, compute_similarity
23
  from gedi.utils.io_helpers import read_csvs
24
- from utils.column_mappings import column_mappings
 
25
  import xml.etree.ElementTree as ET
26
  import re
27
  from xml.dom import minidom
 
17
  from pm4py import write_xes
18
  from pm4py.sim import play_out
19
  from smac import HyperparameterOptimizationFacade, Scenario
20
+ from gedi.utils.column_mappings import column_mappings
 
21
  from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, compute_similarity
22
  from gedi.utils.io_helpers import read_csvs
23
+ from gedi.utils.param_keys import OUTPUT_PATH, INPUT_PATH
24
+ from gedi.utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
25
  import xml.etree.ElementTree as ET
26
  import re
27
  from xml.dom import minidom
gedi/plotter.py CHANGED
@@ -8,20 +8,20 @@ import seaborn as sns
8
  import os
9
  import glob
10
 
 
 
 
 
 
 
 
 
11
  from matplotlib.axes import Axes
12
  from matplotlib.figure import Figure
13
  from matplotlib.lines import Line2D
14
- from utils.param_keys import PLOT_TYPE, PROJECTION, EXPLAINED_VAR, PLOT_3D_MAP
15
- from utils.param_keys import OUTPUT_PATH, PIPELINE_STEP
16
- from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, PLOT_REFERENCE_FEATURE
17
- from utils.param_keys.plotter import REAL_EVENTLOG_PATH, FONT_SIZE, BOXPLOT_WIDTH
18
- from collections import defaultdict
19
-
20
  from sklearn.preprocessing import Normalizer, StandardScaler
21
  from sklearn.decomposition import PCA
22
- from gedi.generator import get_tasks
23
- from gedi.utils.io_helpers import get_keys_abbreviation
24
- from gedi.utils.io_helpers import read_csvs, select_instance
25
 
26
  def insert_newlines(string, every=140):
27
  return '\n'.join(string[i:i+every] for i in range(0, len(string), every))
 
8
  import os
9
  import glob
10
 
11
+ from collections import defaultdict
12
+ from gedi.generator import get_tasks
13
+ from gedi.utils.io_helpers import get_keys_abbreviation
14
+ from gedi.utils.io_helpers import read_csvs, select_instance
15
+ from gedi.utils.param_keys import PLOT_TYPE, PROJECTION, EXPLAINED_VAR, PLOT_3D_MAP
16
+ from gedi.utils.param_keys import OUTPUT_PATH, PIPELINE_STEP
17
+ from gedi.utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, PLOT_REFERENCE_FEATURE
18
+ from gedi.utils.param_keys.plotter import REAL_EVENTLOG_PATH, FONT_SIZE, BOXPLOT_WIDTH
19
  from matplotlib.axes import Axes
20
  from matplotlib.figure import Figure
21
  from matplotlib.lines import Line2D
 
 
 
 
 
 
22
  from sklearn.preprocessing import Normalizer, StandardScaler
23
  from sklearn.decomposition import PCA
24
+
 
 
25
 
26
  def insert_newlines(string, every=140):
27
  return '\n'.join(string[i:i+every] for i in range(0, len(string), every))
gedi/run.py CHANGED
@@ -1,13 +1,14 @@
1
- import gedi.config as config
2
  import pandas as pd
 
3
  from datetime import datetime as dt
4
- from gedi.generator import GenerateEventLogs
5
- from gedi.features import EventLogFeatures
6
  from gedi.augmentation import InstanceAugmentator
7
  from gedi.benchmark import BenchmarkTest
 
 
 
8
  from gedi.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
9
- from utils.default_argparse import ArgParser
10
- from utils.param_keys import PARAMS, PIPELINE_STEP
11
 
12
  def run(kwargs:dict, model_params_list: list, filename_list:list):
13
  """
@@ -49,5 +50,5 @@ def gedi(config_path):
49
  contains the path to the config file
50
  @return:
51
  """
52
- model_params_list = config.get_model_params_list(config_path)
53
  run({'params':""}, model_params_list, [])
 
 
1
  import pandas as pd
2
+
3
  from datetime import datetime as dt
 
 
4
  from gedi.augmentation import InstanceAugmentator
5
  from gedi.benchmark import BenchmarkTest
6
+ from gedi.config import get_model_params_list
7
+ from gedi.features import EventLogFeatures
8
+ from gedi.generator import GenerateEventLogs
9
  from gedi.plotter import BenchmarkPlotter, FeaturesPlotter, AugmentationPlotter, GenerationPlotter
10
+ from gedi.utils.default_argparse import ArgParser
11
+ from gedi.utils.param_keys import PARAMS, PIPELINE_STEP
12
 
13
  def run(kwargs:dict, model_params_list: list, filename_list:list):
14
  """
 
50
  contains the path to the config file
51
  @return:
52
  """
53
+ model_params_list = get_model_params_list(config_path)
54
  run({'params':""}, model_params_list, [])
gedi/utils/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from .param_keys import PIPELINE_STEP, INPUT_PATH, OUTPUT_PATH
2
+ from .io_helpers import sort_files
3
+ from .column_mappings import column_mappings
4
+
5
+ __all__ = [
6
+ "column_mappings","sort_files",
7
+ "PIPELINE_STEP", "INPUT_PATH", "OUTPUT_PATH"
8
+ ]
{utils β†’ gedi/utils}/column_mappings.py RENAMED
@@ -1,5 +1,5 @@
1
  def column_mappings():
2
-
3
  column_names_short = {
4
  'rutpt': 'ratio_unique_traces_per_trace',
5
  'rmcv': 'ratio_most_common_variant',
@@ -12,5 +12,5 @@ def column_mappings():
12
  'eseef': 'epa_sequence_entropy_exponential_forgetting',
13
  'enseef': 'epa_normalized_sequence_entropy_exponential_forgetting'
14
  }
15
-
16
  return column_names_short
 
1
  def column_mappings():
2
+
3
  column_names_short = {
4
  'rutpt': 'ratio_unique_traces_per_trace',
5
  'rmcv': 'ratio_most_common_variant',
 
12
  'eseef': 'epa_sequence_entropy_exponential_forgetting',
13
  'enseef': 'epa_normalized_sequence_entropy_exponential_forgetting'
14
  }
15
+
16
  return column_names_short
{utils β†’ gedi/utils}/default_argparse.py RENAMED
File without changes
{utils β†’ gedi/utils}/param_keys/__init__.py RENAMED
@@ -1,3 +1,5 @@
 
 
1
  # Model params
2
  ALGORITHM_NAME = 'algorithm_name'
3
  PIPELINE_STEP = 'pipeline_step'
 
1
+ from .features import FEATURE_PARAMS, FEATURE_SET
2
+
3
  # Model params
4
  ALGORITHM_NAME = 'algorithm_name'
5
  PIPELINE_STEP = 'pipeline_step'
{utils β†’ gedi/utils}/param_keys/augmentation.py RENAMED
File without changes
{utils β†’ gedi/utils}/param_keys/benchmark.py RENAMED
File without changes
{utils β†’ gedi/utils}/param_keys/features.py RENAMED
File without changes
{utils β†’ gedi/utils}/param_keys/generator.py RENAMED
File without changes
{utils β†’ gedi/utils}/param_keys/plotter.py RENAMED
File without changes
setup.py CHANGED
@@ -1,4 +1,4 @@
1
- from setuptools import setup
2
  import os
3
 
4
  with open("README.md", "r") as fh:
@@ -11,6 +11,7 @@ version = version_string
11
  setup(
12
  name = 'gedi',
13
  version = str(version),
 
14
  description = 'Generating Event Data with Intentional Features for Benchmarking Process Mining',
15
  author = 'Andrea Maldonado',
16
  author_email = '[email protected]',
@@ -80,7 +81,6 @@ setup(
80
  'urllib3==2.2.1',
81
  'zict==3.0.0'
82
  ],
83
- packages = ['gedi'],
84
  classifiers=[
85
  'Development Status :: 3 - Alpha', # Chose either "3 - Alpha", "4 - Beta" or "5 - Production/Stable" as the current state of your package
86
  'Intended Audience :: Science/Research', # Define that your audience are developers
 
1
+ from setuptools import setup, find_packages
2
  import os
3
 
4
  with open("README.md", "r") as fh:
 
11
  setup(
12
  name = 'gedi',
13
  version = str(version),
14
+ packages=find_packages(),
15
  description = 'Generating Event Data with Intentional Features for Benchmarking Process Mining',
16
  author = 'Andrea Maldonado',
17
  author_email = '[email protected]',
 
81
  'urllib3==2.2.1',
82
  'zict==3.0.0'
83
  ],
 
84
  classifiers=[
85
  'Development Status :: 3 - Alpha', # Chose either "3 - Alpha", "4 - Beta" or "5 - Production/Stable" as the current state of your package
86
  'Intended Audience :: Science/Research', # Define that your audience are developers
utils/param_keys/analyser.py DELETED
@@ -1,4 +0,0 @@
1
- # Analyser params
2
- MODEL = 'model'
3
- INPUT_PARAMS = 'input_params'
4
- PERPLEXITY = 'perplexity'