Spaces:

andreamalhera
/

igedi

Running

App Files Files Community

Andrea MH commited on Aug 21, 2024

Commit

f744036

unverified ·

2 Parent(s): 885bc68 8735c43

Merge pull request #18 from andreamalhera/17-add-feature-similarity-to-json-output

Browse files

Files changed (6) hide show

data/validation/genELexperiment1_04_02.json +1 -1
data/validation/genELexperiment2_07_04.json +1 -1
data/validation/genELexperiment3_04_nan.json +1 -1
data/validation/genELexperiment4_nan_02.json +1 -1
gedi/generator.py +4 -1
gedi/utils/io_helpers.py +27 -1

data/validation/genELexperiment1_04_02.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"ratio_top_20_variants": 0.20017714791851196, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment1_04_02"}


1	+ {"ratio_top_20_variants": 0.20017714791851196, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment1_04_02", "target_similarity": 0.6520971605578558}

data/validation/genELexperiment2_07_04.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"ratio_top_20_variants": 0.38863337713534823, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment2_07_04"}


1	+ {"ratio_top_20_variants": 0.38863337713534823, "epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment2_07_04", "target_similarity": 0.35199750692556764}

data/validation/genELexperiment3_04_nan.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment3_04_nan"}


1	+ {"epa_normalized_sequence_entropy_linear_forgetting": 0.052097205658647734, "log": "genELexperiment3_04_nan", "target_similarity": 0.6520972056586477}

data/validation/genELexperiment4_nan_02.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"ratio_top_20_variants": 0.2, "log": "genELexperiment4_nan_02"}


1	+ {"ratio_top_20_variants": 0.2, "log": "genELexperiment4_nan_02", "target_similarity": 1.0}

gedi/generator.py CHANGED Viewed

@@ -19,7 +19,7 @@ from pm4py.sim import play_out
 from smac import HyperparameterOptimizationFacade, Scenario
 from utils.param_keys import OUTPUT_PATH, INPUT_PATH
 from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
-from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json
 from gedi.utils.io_helpers import read_csvs
 import xml.etree.ElementTree as ET
 import re
@@ -221,6 +221,9 @@ class GenerateEventLogs():
         if features_to_dump.get('ratio_unique_traces_per_trace'):#HOTFIX
             features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
         features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
         dump_features_json(features_to_dump, save_path)
         return log_config

 from smac import HyperparameterOptimizationFacade, Scenario
 from utils.param_keys import OUTPUT_PATH, INPUT_PATH
 from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
+from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, compute_similarity
 from gedi.utils.io_helpers import read_csvs
 import xml.etree.ElementTree as ET
 import re
         if features_to_dump.get('ratio_unique_traces_per_trace'):#HOTFIX
             features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
         features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
+        # calculating the manhattan distance of the generated log to the target features
+        #features_to_dump['distance_to_target'] = calculate_manhattan_distance(self.objectives, features_to_dump)
+        features_to_dump['target_similarity'] = compute_similarity(self.objectives, features_to_dump)
         dump_features_json(features_to_dump, save_path)
         return log_config

gedi/utils/io_helpers.py CHANGED Viewed

@@ -4,9 +4,10 @@ import os
 import pandas as pd
 import re
 import shutil
 from collections import defaultdict
 from pathlib import Path, PurePath
 def select_instance(source_dir, log_path, destination=os.path.join("output","generated","instance_selection")):
     os.makedirs(destination, exist_ok=True)
@@ -82,3 +83,28 @@ def dump_features_json(features: dict, output_path, content_type="features"):
     with open(json_path, 'w') as fp:
         json.dump(features, fp, default=int)
         print(f"SUCCESS: Saved {len(features)-1} {content_type} in {json_path}")#-1 because 'log' is not a feature

 import pandas as pd
 import re
 import shutil
+import numpy as np
 from collections import defaultdict
 from pathlib import Path, PurePath
+from scipy.spatial.distance import euclidean
 def select_instance(source_dir, log_path, destination=os.path.join("output","generated","instance_selection")):
     os.makedirs(destination, exist_ok=True)
     with open(json_path, 'w') as fp:
         json.dump(features, fp, default=int)
         print(f"SUCCESS: Saved {len(features)-1} {content_type} in {json_path}")#-1 because 'log' is not a feature
+def compute_similarity(v1, v2):
+    # HOTFIX: Rename 'ratio_unique_traces_per_trace
+    if 'ratio_unique_traces_per_trace' in v1:
+        v1['ratio_variants_per_number_of_traces'] = v1.pop('ratio_unique_traces_per_trace')
+    # Filter out non-numeric values and ensure the same keys exist in both dictionaries
+    common_keys = set(v1.keys()).intersection(set(v2.keys()))
+    numeric_keys = [k for k in common_keys if isinstance(v1[k], (int, float)) and isinstance(v2[k], (int, float))]
+    # Create vectors from the filtered keys
+    vec1 = np.array([v1[k] for k in numeric_keys])
+    vec2 = np.array([v2[k] for k in numeric_keys])
+    if len(vec1) == 0 or len(vec2) == 0:
+        print("[ERROR]: No common numeric keys found for (Edit) Distance calculation.")
+        return None
+    else:
+        # Calculate Euclidean Similarity
+        target_similarity = 1-euclidean(vec1, vec2)
+        #print("VECTORS: ", vec1, vec2, target_similarity)
+        return target_similarity