Spaces:
Running
Running
Andrea Maldonado
commited on
Commit
·
78d7948
1
Parent(s):
9b8cb42
Computes eucledean similarity
Browse files
data/validation/genELexperiment1_04_02.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"ratio_top_20_variants": 0.20017714791851196, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment1_04_02"}
|
|
|
1 |
+
{"ratio_top_20_variants": 0.20017714791851196, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment1_04_02", "target_similarity": 0.6520972056586477}
|
data/validation/genELexperiment2_07_04.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"ratio_top_20_variants": 0.38863337713534823, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment2_07_04"}
|
|
|
1 |
+
{"ratio_top_20_variants": 0.38863337713534823, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment2_07_04", "target_similarity": 0.3520969938410784}
|
data/validation/genELexperiment3_04_nan.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment3_04_nan"}
|
|
|
1 |
+
{"epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment3_04_nan", "target_similarity": 0.6520972056586477}
|
data/validation/genELexperiment4_nan_02.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"ratio_top_20_variants": 0.2, "log": "genELexperiment4_nan_02"}
|
|
|
1 |
+
{"ratio_top_20_variants": 0.2, "log": "genELexperiment4_nan_02", "target_similarity": 1.0}
|
gedi/generator.py
CHANGED
@@ -19,7 +19,7 @@ from pm4py.sim import play_out
|
|
19 |
from smac import HyperparameterOptimizationFacade, Scenario
|
20 |
from utils.param_keys import OUTPUT_PATH, INPUT_PATH
|
21 |
from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
|
22 |
-
from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json,
|
23 |
from gedi.utils.io_helpers import read_csvs
|
24 |
import xml.etree.ElementTree as ET
|
25 |
import re
|
@@ -222,7 +222,8 @@ class GenerateEventLogs():
|
|
222 |
features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
|
223 |
features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
|
224 |
# calculating the manhattan distance of the generated log to the target features
|
225 |
-
features_to_dump['distance_to_target'] = calculate_manhattan_distance(self.objectives, features_to_dump)
|
|
|
226 |
dump_features_json(features_to_dump, save_path)
|
227 |
|
228 |
return log_config
|
|
|
19 |
from smac import HyperparameterOptimizationFacade, Scenario
|
20 |
from utils.param_keys import OUTPUT_PATH, INPUT_PATH
|
21 |
from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
|
22 |
+
from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, compute_similarity
|
23 |
from gedi.utils.io_helpers import read_csvs
|
24 |
import xml.etree.ElementTree as ET
|
25 |
import re
|
|
|
222 |
features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
|
223 |
features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
|
224 |
# calculating the manhattan distance of the generated log to the target features
|
225 |
+
#features_to_dump['distance_to_target'] = calculate_manhattan_distance(self.objectives, features_to_dump)
|
226 |
+
features_to_dump['target_similarity'] = compute_similarity(self.objectives, features_to_dump)
|
227 |
dump_features_json(features_to_dump, save_path)
|
228 |
|
229 |
return log_config
|
gedi/utils/io_helpers.py
CHANGED
@@ -7,6 +7,7 @@ import shutil
|
|
7 |
import numpy as np
|
8 |
from collections import defaultdict
|
9 |
from pathlib import Path, PurePath
|
|
|
10 |
|
11 |
def select_instance(source_dir, log_path, destination=os.path.join("output","generated","instance_selection")):
|
12 |
os.makedirs(destination, exist_ok=True)
|
@@ -83,12 +84,12 @@ def dump_features_json(features: dict, output_path, content_type="features"):
|
|
83 |
json.dump(features, fp, default=int)
|
84 |
print(f"SUCCESS: Saved {len(features)-1} {content_type} in {json_path}")#-1 because 'log' is not a feature
|
85 |
|
86 |
-
def
|
87 |
|
88 |
# HOTFIX: Rename 'ratio_unique_traces_per_trace
|
89 |
if 'ratio_unique_traces_per_trace' in v1:
|
90 |
v1['ratio_variants_per_number_of_traces'] = v1.pop('ratio_unique_traces_per_trace')
|
91 |
-
|
92 |
# Filter out non-numeric values and ensure the same keys exist in both dictionaries
|
93 |
common_keys = set(v1.keys()).intersection(set(v2.keys()))
|
94 |
numeric_keys = [k for k in common_keys if isinstance(v1[k], (int, float)) and isinstance(v2[k], (int, float))]
|
@@ -98,11 +99,12 @@ def calculate_manhattan_distance(v1, v2):
|
|
98 |
vec2 = np.array([v2[k] for k in numeric_keys])
|
99 |
|
100 |
if len(vec1) == 0 or len(vec2) == 0:
|
101 |
-
print("[ERROR]: No common numeric keys found for (
|
102 |
return None
|
103 |
|
104 |
else:
|
105 |
-
# Calculate
|
106 |
-
|
|
|
107 |
|
108 |
-
return
|
|
|
7 |
import numpy as np
|
8 |
from collections import defaultdict
|
9 |
from pathlib import Path, PurePath
|
10 |
+
from scipy.spatial.distance import euclidean
|
11 |
|
12 |
def select_instance(source_dir, log_path, destination=os.path.join("output","generated","instance_selection")):
|
13 |
os.makedirs(destination, exist_ok=True)
|
|
|
84 |
json.dump(features, fp, default=int)
|
85 |
print(f"SUCCESS: Saved {len(features)-1} {content_type} in {json_path}")#-1 because 'log' is not a feature
|
86 |
|
87 |
+
def compute_similarity(v1, v2):
|
88 |
|
89 |
# HOTFIX: Rename 'ratio_unique_traces_per_trace
|
90 |
if 'ratio_unique_traces_per_trace' in v1:
|
91 |
v1['ratio_variants_per_number_of_traces'] = v1.pop('ratio_unique_traces_per_trace')
|
92 |
+
|
93 |
# Filter out non-numeric values and ensure the same keys exist in both dictionaries
|
94 |
common_keys = set(v1.keys()).intersection(set(v2.keys()))
|
95 |
numeric_keys = [k for k in common_keys if isinstance(v1[k], (int, float)) and isinstance(v2[k], (int, float))]
|
|
|
99 |
vec2 = np.array([v2[k] for k in numeric_keys])
|
100 |
|
101 |
if len(vec1) == 0 or len(vec2) == 0:
|
102 |
+
print("[ERROR]: No common numeric keys found for (Edit) Distance calculation.")
|
103 |
return None
|
104 |
|
105 |
else:
|
106 |
+
# Calculate Euclidean Similarity
|
107 |
+
target_similarity = 1-euclidean(vec1, vec2)
|
108 |
+
#print("VECTORS: ", vec1, vec2, target_similarity)
|
109 |
|
110 |
+
return target_similarity
|