Spaces:
Running
Running
added distance metric to the output jsons
Browse files- gedi/generator.py +3 -1
- gedi/utils/io_helpers.py +25 -1
gedi/generator.py
CHANGED
@@ -19,7 +19,7 @@ from pm4py.sim import play_out
|
|
19 |
from smac import HyperparameterOptimizationFacade, Scenario
|
20 |
from utils.param_keys import OUTPUT_PATH, INPUT_PATH
|
21 |
from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
|
22 |
-
from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json
|
23 |
from gedi.utils.io_helpers import read_csvs
|
24 |
import xml.etree.ElementTree as ET
|
25 |
import re
|
@@ -221,6 +221,8 @@ class GenerateEventLogs():
|
|
221 |
if features_to_dump.get('ratio_unique_traces_per_trace'):#HOTFIX
|
222 |
features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
|
223 |
features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
|
|
|
|
|
224 |
dump_features_json(features_to_dump, save_path)
|
225 |
|
226 |
return log_config
|
|
|
19 |
from smac import HyperparameterOptimizationFacade, Scenario
|
20 |
from utils.param_keys import OUTPUT_PATH, INPUT_PATH
|
21 |
from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
|
22 |
+
from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, calculate_manhattan_distance
|
23 |
from gedi.utils.io_helpers import read_csvs
|
24 |
import xml.etree.ElementTree as ET
|
25 |
import re
|
|
|
221 |
if features_to_dump.get('ratio_unique_traces_per_trace'):#HOTFIX
|
222 |
features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
|
223 |
features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
|
224 |
+
# calculating the manhattan distance of the generated log to the target features
|
225 |
+
features_to_dump['distance_to_target'] = calculate_manhattan_distance(self.objectives, features_to_dump)
|
226 |
dump_features_json(features_to_dump, save_path)
|
227 |
|
228 |
return log_config
|
gedi/utils/io_helpers.py
CHANGED
@@ -4,7 +4,7 @@ import os
|
|
4 |
import pandas as pd
|
5 |
import re
|
6 |
import shutil
|
7 |
-
|
8 |
from collections import defaultdict
|
9 |
from pathlib import Path, PurePath
|
10 |
|
@@ -82,3 +82,27 @@ def dump_features_json(features: dict, output_path, content_type="features"):
|
|
82 |
with open(json_path, 'w') as fp:
|
83 |
json.dump(features, fp, default=int)
|
84 |
print(f"SUCCESS: Saved {len(features)-1} {content_type} in {json_path}")#-1 because 'log' is not a feature
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
import pandas as pd
|
5 |
import re
|
6 |
import shutil
|
7 |
+
import numpy as np
|
8 |
from collections import defaultdict
|
9 |
from pathlib import Path, PurePath
|
10 |
|
|
|
82 |
with open(json_path, 'w') as fp:
|
83 |
json.dump(features, fp, default=int)
|
84 |
print(f"SUCCESS: Saved {len(features)-1} {content_type} in {json_path}")#-1 because 'log' is not a feature
|
85 |
+
|
86 |
+
def calculate_manhattan_distance(v1, v2):
|
87 |
+
|
88 |
+
# HOTFIX: Rename 'ratio_unique_traces_per_trace
|
89 |
+
if 'ratio_unique_traces_per_trace' in v1:
|
90 |
+
v1['ratio_variants_per_number_of_traces'] = v1.pop('ratio_unique_traces_per_trace')
|
91 |
+
|
92 |
+
# Filter out non-numeric values and ensure the same keys exist in both dictionaries
|
93 |
+
common_keys = set(v1.keys()).intersection(set(v2.keys()))
|
94 |
+
numeric_keys = [k for k in common_keys if isinstance(v1[k], (int, float)) and isinstance(v2[k], (int, float))]
|
95 |
+
|
96 |
+
# Create vectors from the filtered keys
|
97 |
+
vec1 = np.array([v1[k] for k in numeric_keys])
|
98 |
+
vec2 = np.array([v2[k] for k in numeric_keys])
|
99 |
+
|
100 |
+
if len(vec1) == 0 or len(vec2) == 0:
|
101 |
+
print("[ERROR]: No common numeric keys found for (Manhattan) Distance calculation.")
|
102 |
+
return None
|
103 |
+
|
104 |
+
else:
|
105 |
+
# Calculate Manhattan Distance
|
106 |
+
manhattan_distance = np.sum(np.abs(vec1 - vec2))
|
107 |
+
|
108 |
+
return manhattan_distance
|