Andrea Maldonado commited on
Commit
a451e02
·
1 Parent(s): fd6f629

directory structure

Browse files
.conda.yml CHANGED
@@ -55,7 +55,6 @@ dependencies:
55
  - pydotplus==2.0.2
56
  - pynisher==1.0.10
57
  - pyrfr==0.9.0
58
- - pywin32==306
59
  - pyyaml==6.0.1
60
  - rapidfuzz==3.8.1
61
  - referencing==0.35.1
 
55
  - pydotplus==2.0.2
56
  - pynisher==1.0.10
57
  - pyrfr==0.9.0
 
58
  - pyyaml==6.0.1
59
  - rapidfuzz==3.8.1
60
  - referencing==0.35.1
gedi/generator.py CHANGED
@@ -19,7 +19,8 @@ from pm4py.sim import play_out
19
  from smac import HyperparameterOptimizationFacade, Scenario
20
  from utils.param_keys import OUTPUT_PATH, INPUT_PATH
21
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
22
- from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, read_csvs
 
23
  import xml.etree.ElementTree as ET
24
  import re
25
  from xml.dom import minidom
@@ -80,7 +81,7 @@ def removeextralines(elem):
80
  element.tail=""
81
  if not re.search(hasWords,str(element.text)):
82
  element.text = ""
83
-
84
  def add_extension_before_traces(xes_file):
85
  # Register the namespace
86
  ET.register_namespace('', "http://www.xes-standard.org/")
@@ -158,6 +159,7 @@ class GenerateEventLogs():
158
  tasks=tasks.rename(columns={"ratio_variants_per_number_of_traces": "ratio_unique_traces_per_trace"})
159
 
160
  if tasks is not None:
 
161
  num_cores = multiprocessing.cpu_count() if len(tasks) >= multiprocessing.cpu_count() else len(tasks)
162
  #self.generator_wrapper([*tasks.iterrows()][0])# For testing
163
  with multiprocessing.Pool(num_cores) as p:
@@ -208,7 +210,7 @@ class GenerateEventLogs():
208
  self.objectives['ratio_variants_per_number_of_traces']=self.objectives.pop('ratio_unique_traces_per_trace')
209
 
210
  save_path = get_output_key_value_location(self.objectives,
211
- self.output_path, identifier)+".xes"
212
 
213
  write_xes(log_config['log'], save_path)
214
  add_extension_before_traces(save_path)
@@ -219,7 +221,7 @@ class GenerateEventLogs():
219
  if features_to_dump.get('ratio_unique_traces_per_trace'):#HOTFIX
220
  features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
221
  features_to_dump['log'] = identifier.replace('genEL', '')
222
- dump_features_json(features_to_dump, self.output_path, identifier, objectives=self.objectives)
223
  return log_config
224
 
225
  def generate_optimized_log(self, config):
 
19
  from smac import HyperparameterOptimizationFacade, Scenario
20
  from utils.param_keys import OUTPUT_PATH, INPUT_PATH
21
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
22
+ from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json
23
+ from gedi.utils.io_helpers import read_csvs
24
  import xml.etree.ElementTree as ET
25
  import re
26
  from xml.dom import minidom
 
81
  element.tail=""
82
  if not re.search(hasWords,str(element.text)):
83
  element.text = ""
84
+
85
  def add_extension_before_traces(xes_file):
86
  # Register the namespace
87
  ET.register_namespace('', "http://www.xes-standard.org/")
 
159
  tasks=tasks.rename(columns={"ratio_variants_per_number_of_traces": "ratio_unique_traces_per_trace"})
160
 
161
  if tasks is not None:
162
+ self.feature_keys = tasks.columns.tolist()
163
  num_cores = multiprocessing.cpu_count() if len(tasks) >= multiprocessing.cpu_count() else len(tasks)
164
  #self.generator_wrapper([*tasks.iterrows()][0])# For testing
165
  with multiprocessing.Pool(num_cores) as p:
 
210
  self.objectives['ratio_variants_per_number_of_traces']=self.objectives.pop('ratio_unique_traces_per_trace')
211
 
212
  save_path = get_output_key_value_location(self.objectives,
213
+ self.output_path, identifier, self.feature_keys)+".xes"
214
 
215
  write_xes(log_config['log'], save_path)
216
  add_extension_before_traces(save_path)
 
221
  if features_to_dump.get('ratio_unique_traces_per_trace'):#HOTFIX
222
  features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
223
  features_to_dump['log'] = identifier.replace('genEL', '')
224
+ dump_features_json(features_to_dump, self.output_path, identifier, objectives=self.objectives, obj_keys=self.feature_keys)
225
  return log_config
226
 
227
  def generate_optimized_log(self, config):
gedi/utils/io_helpers.py CHANGED
@@ -52,9 +52,10 @@ def get_keys_abbreviation(obj_keys):
52
  abbreviated_keys.append(abbreviated_key)
53
  return '_'.join(abbreviated_keys)
54
 
55
- def get_output_key_value_location(obj, output_path, identifier):
56
  obj_sorted = dict(sorted(obj.items()))
57
- obj_keys = [*obj_sorted.keys()]
 
58
 
59
  obj_values = [round(x, 4) for x in [*obj_sorted.values()]]
60
 
@@ -71,13 +72,13 @@ def get_output_key_value_location(obj, output_path, identifier):
71
  save_path = os.path.join(folder_path, generated_file_name)
72
  return save_path
73
 
74
- def dump_features_json(features: dict, output_path, identifier, objectives=None, content_type="features"):
75
  output_parts = PurePath(output_path).parts
76
  feature_dir = os.path.join(output_parts[0], content_type,
77
  *output_parts[1:])
78
  if objectives is not None:
79
  json_path = get_output_key_value_location(objectives,
80
- feature_dir, identifier)+".json"
81
  else:
82
  json_path = os.path.join(feature_dir, identifier)+".json"
83
 
 
52
  abbreviated_keys.append(abbreviated_key)
53
  return '_'.join(abbreviated_keys)
54
 
55
+ def get_output_key_value_location(obj, output_path, identifier, obj_keys=None):
56
  obj_sorted = dict(sorted(obj.items()))
57
+ if obj_keys is None:
58
+ obj_keys = [*obj_sorted.keys()]
59
 
60
  obj_values = [round(x, 4) for x in [*obj_sorted.values()]]
61
 
 
72
  save_path = os.path.join(folder_path, generated_file_name)
73
  return save_path
74
 
75
+ def dump_features_json(features: dict, output_path, identifier, objectives=None, content_type="features", obj_keys=None):
76
  output_parts = PurePath(output_path).parts
77
  feature_dir = os.path.join(output_parts[0], content_type,
78
  *output_parts[1:])
79
  if objectives is not None:
80
  json_path = get_output_key_value_location(objectives,
81
+ feature_dir, identifier, obj_keys)+".json"
82
  else:
83
  json_path = os.path.join(feature_dir, identifier)+".json"
84
 
utils/merge_jsons.py CHANGED
@@ -4,12 +4,12 @@ import csv
4
  import os
5
 
6
  """
7
- Run using:
8
  python merge_jsons.py path_to_your_json_directory output.csv
9
 
10
  """
11
  def json_to_csv(json_dir, output_csv):
12
-
13
  json_files = [os.path.join(json_dir, file) for file in os.listdir(json_dir) if file.endswith('.json')]
14
 
15
  # Collect data from all JSON files
@@ -18,13 +18,13 @@ def json_to_csv(json_dir, output_csv):
18
  with open(json_file, 'r') as f:
19
  data = json.load(f)
20
  all_data.append(data)
21
-
22
  # Extract the headers from the first JSON object
23
  if all_data:
24
- headers = all_data[0].keys()
25
  else:
26
  raise ValueError("No data found in JSON files")
27
-
28
  # Write data to CSV
29
  with open(output_csv, 'w', newline='') as f:
30
  writer = csv.DictWriter(f, fieldnames=headers)
 
4
  import os
5
 
6
  """
7
+ Run using:
8
  python merge_jsons.py path_to_your_json_directory output.csv
9
 
10
  """
11
  def json_to_csv(json_dir, output_csv):
12
+
13
  json_files = [os.path.join(json_dir, file) for file in os.listdir(json_dir) if file.endswith('.json')]
14
 
15
  # Collect data from all JSON files
 
18
  with open(json_file, 'r') as f:
19
  data = json.load(f)
20
  all_data.append(data)
21
+
22
  # Extract the headers from the first JSON object
23
  if all_data:
24
+ headers = {elem for s in [set(i) for i in [d.keys() for d in all_data]] for elem in s}
25
  else:
26
  raise ValueError("No data found in JSON files")
27
+
28
  # Write data to CSV
29
  with open(output_csv, 'w', newline='') as f:
30
  writer = csv.DictWriter(f, fieldnames=headers)