Spaces:
Sleeping
Sleeping
Andrea Maldonado
commited on
Commit
·
a451e02
1
Parent(s):
fd6f629
directory structure
Browse files- .conda.yml +0 -1
- gedi/generator.py +6 -4
- gedi/utils/io_helpers.py +5 -4
- utils/merge_jsons.py +5 -5
.conda.yml
CHANGED
@@ -55,7 +55,6 @@ dependencies:
|
|
55 |
- pydotplus==2.0.2
|
56 |
- pynisher==1.0.10
|
57 |
- pyrfr==0.9.0
|
58 |
-
- pywin32==306
|
59 |
- pyyaml==6.0.1
|
60 |
- rapidfuzz==3.8.1
|
61 |
- referencing==0.35.1
|
|
|
55 |
- pydotplus==2.0.2
|
56 |
- pynisher==1.0.10
|
57 |
- pyrfr==0.9.0
|
|
|
58 |
- pyyaml==6.0.1
|
59 |
- rapidfuzz==3.8.1
|
60 |
- referencing==0.35.1
|
gedi/generator.py
CHANGED
@@ -19,7 +19,8 @@ from pm4py.sim import play_out
|
|
19 |
from smac import HyperparameterOptimizationFacade, Scenario
|
20 |
from utils.param_keys import OUTPUT_PATH, INPUT_PATH
|
21 |
from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
|
22 |
-
from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json
|
|
|
23 |
import xml.etree.ElementTree as ET
|
24 |
import re
|
25 |
from xml.dom import minidom
|
@@ -80,7 +81,7 @@ def removeextralines(elem):
|
|
80 |
element.tail=""
|
81 |
if not re.search(hasWords,str(element.text)):
|
82 |
element.text = ""
|
83 |
-
|
84 |
def add_extension_before_traces(xes_file):
|
85 |
# Register the namespace
|
86 |
ET.register_namespace('', "http://www.xes-standard.org/")
|
@@ -158,6 +159,7 @@ class GenerateEventLogs():
|
|
158 |
tasks=tasks.rename(columns={"ratio_variants_per_number_of_traces": "ratio_unique_traces_per_trace"})
|
159 |
|
160 |
if tasks is not None:
|
|
|
161 |
num_cores = multiprocessing.cpu_count() if len(tasks) >= multiprocessing.cpu_count() else len(tasks)
|
162 |
#self.generator_wrapper([*tasks.iterrows()][0])# For testing
|
163 |
with multiprocessing.Pool(num_cores) as p:
|
@@ -208,7 +210,7 @@ class GenerateEventLogs():
|
|
208 |
self.objectives['ratio_variants_per_number_of_traces']=self.objectives.pop('ratio_unique_traces_per_trace')
|
209 |
|
210 |
save_path = get_output_key_value_location(self.objectives,
|
211 |
-
self.output_path, identifier)+".xes"
|
212 |
|
213 |
write_xes(log_config['log'], save_path)
|
214 |
add_extension_before_traces(save_path)
|
@@ -219,7 +221,7 @@ class GenerateEventLogs():
|
|
219 |
if features_to_dump.get('ratio_unique_traces_per_trace'):#HOTFIX
|
220 |
features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
|
221 |
features_to_dump['log'] = identifier.replace('genEL', '')
|
222 |
-
dump_features_json(features_to_dump, self.output_path, identifier, objectives=self.objectives)
|
223 |
return log_config
|
224 |
|
225 |
def generate_optimized_log(self, config):
|
|
|
19 |
from smac import HyperparameterOptimizationFacade, Scenario
|
20 |
from utils.param_keys import OUTPUT_PATH, INPUT_PATH
|
21 |
from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
|
22 |
+
from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json
|
23 |
+
from gedi.utils.io_helpers import read_csvs
|
24 |
import xml.etree.ElementTree as ET
|
25 |
import re
|
26 |
from xml.dom import minidom
|
|
|
81 |
element.tail=""
|
82 |
if not re.search(hasWords,str(element.text)):
|
83 |
element.text = ""
|
84 |
+
|
85 |
def add_extension_before_traces(xes_file):
|
86 |
# Register the namespace
|
87 |
ET.register_namespace('', "http://www.xes-standard.org/")
|
|
|
159 |
tasks=tasks.rename(columns={"ratio_variants_per_number_of_traces": "ratio_unique_traces_per_trace"})
|
160 |
|
161 |
if tasks is not None:
|
162 |
+
self.feature_keys = tasks.columns.tolist()
|
163 |
num_cores = multiprocessing.cpu_count() if len(tasks) >= multiprocessing.cpu_count() else len(tasks)
|
164 |
#self.generator_wrapper([*tasks.iterrows()][0])# For testing
|
165 |
with multiprocessing.Pool(num_cores) as p:
|
|
|
210 |
self.objectives['ratio_variants_per_number_of_traces']=self.objectives.pop('ratio_unique_traces_per_trace')
|
211 |
|
212 |
save_path = get_output_key_value_location(self.objectives,
|
213 |
+
self.output_path, identifier, self.feature_keys)+".xes"
|
214 |
|
215 |
write_xes(log_config['log'], save_path)
|
216 |
add_extension_before_traces(save_path)
|
|
|
221 |
if features_to_dump.get('ratio_unique_traces_per_trace'):#HOTFIX
|
222 |
features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
|
223 |
features_to_dump['log'] = identifier.replace('genEL', '')
|
224 |
+
dump_features_json(features_to_dump, self.output_path, identifier, objectives=self.objectives, obj_keys=self.feature_keys)
|
225 |
return log_config
|
226 |
|
227 |
def generate_optimized_log(self, config):
|
gedi/utils/io_helpers.py
CHANGED
@@ -52,9 +52,10 @@ def get_keys_abbreviation(obj_keys):
|
|
52 |
abbreviated_keys.append(abbreviated_key)
|
53 |
return '_'.join(abbreviated_keys)
|
54 |
|
55 |
-
def get_output_key_value_location(obj, output_path, identifier):
|
56 |
obj_sorted = dict(sorted(obj.items()))
|
57 |
-
obj_keys
|
|
|
58 |
|
59 |
obj_values = [round(x, 4) for x in [*obj_sorted.values()]]
|
60 |
|
@@ -71,13 +72,13 @@ def get_output_key_value_location(obj, output_path, identifier):
|
|
71 |
save_path = os.path.join(folder_path, generated_file_name)
|
72 |
return save_path
|
73 |
|
74 |
-
def dump_features_json(features: dict, output_path, identifier, objectives=None, content_type="features"):
|
75 |
output_parts = PurePath(output_path).parts
|
76 |
feature_dir = os.path.join(output_parts[0], content_type,
|
77 |
*output_parts[1:])
|
78 |
if objectives is not None:
|
79 |
json_path = get_output_key_value_location(objectives,
|
80 |
-
feature_dir, identifier)+".json"
|
81 |
else:
|
82 |
json_path = os.path.join(feature_dir, identifier)+".json"
|
83 |
|
|
|
52 |
abbreviated_keys.append(abbreviated_key)
|
53 |
return '_'.join(abbreviated_keys)
|
54 |
|
55 |
+
def get_output_key_value_location(obj, output_path, identifier, obj_keys=None):
|
56 |
obj_sorted = dict(sorted(obj.items()))
|
57 |
+
if obj_keys is None:
|
58 |
+
obj_keys = [*obj_sorted.keys()]
|
59 |
|
60 |
obj_values = [round(x, 4) for x in [*obj_sorted.values()]]
|
61 |
|
|
|
72 |
save_path = os.path.join(folder_path, generated_file_name)
|
73 |
return save_path
|
74 |
|
75 |
+
def dump_features_json(features: dict, output_path, identifier, objectives=None, content_type="features", obj_keys=None):
|
76 |
output_parts = PurePath(output_path).parts
|
77 |
feature_dir = os.path.join(output_parts[0], content_type,
|
78 |
*output_parts[1:])
|
79 |
if objectives is not None:
|
80 |
json_path = get_output_key_value_location(objectives,
|
81 |
+
feature_dir, identifier, obj_keys)+".json"
|
82 |
else:
|
83 |
json_path = os.path.join(feature_dir, identifier)+".json"
|
84 |
|
utils/merge_jsons.py
CHANGED
@@ -4,12 +4,12 @@ import csv
|
|
4 |
import os
|
5 |
|
6 |
"""
|
7 |
-
Run using:
|
8 |
python merge_jsons.py path_to_your_json_directory output.csv
|
9 |
|
10 |
"""
|
11 |
def json_to_csv(json_dir, output_csv):
|
12 |
-
|
13 |
json_files = [os.path.join(json_dir, file) for file in os.listdir(json_dir) if file.endswith('.json')]
|
14 |
|
15 |
# Collect data from all JSON files
|
@@ -18,13 +18,13 @@ def json_to_csv(json_dir, output_csv):
|
|
18 |
with open(json_file, 'r') as f:
|
19 |
data = json.load(f)
|
20 |
all_data.append(data)
|
21 |
-
|
22 |
# Extract the headers from the first JSON object
|
23 |
if all_data:
|
24 |
-
headers =
|
25 |
else:
|
26 |
raise ValueError("No data found in JSON files")
|
27 |
-
|
28 |
# Write data to CSV
|
29 |
with open(output_csv, 'w', newline='') as f:
|
30 |
writer = csv.DictWriter(f, fieldnames=headers)
|
|
|
4 |
import os
|
5 |
|
6 |
"""
|
7 |
+
Run using:
|
8 |
python merge_jsons.py path_to_your_json_directory output.csv
|
9 |
|
10 |
"""
|
11 |
def json_to_csv(json_dir, output_csv):
|
12 |
+
|
13 |
json_files = [os.path.join(json_dir, file) for file in os.listdir(json_dir) if file.endswith('.json')]
|
14 |
|
15 |
# Collect data from all JSON files
|
|
|
18 |
with open(json_file, 'r') as f:
|
19 |
data = json.load(f)
|
20 |
all_data.append(data)
|
21 |
+
|
22 |
# Extract the headers from the first JSON object
|
23 |
if all_data:
|
24 |
+
headers = {elem for s in [set(i) for i in [d.keys() for d in all_data]] for elem in s}
|
25 |
else:
|
26 |
raise ValueError("No data found in JSON files")
|
27 |
+
|
28 |
# Write data to CSV
|
29 |
with open(output_csv, 'w', newline='') as f:
|
30 |
writer = csv.DictWriter(f, fieldnames=headers)
|