baakaani commited on
Commit
9b8cb42
·
1 Parent(s): a87d7ad

added distance metric to the output jsons

Browse files
Files changed (2) hide show
  1. gedi/generator.py +3 -1
  2. gedi/utils/io_helpers.py +25 -1
gedi/generator.py CHANGED
@@ -19,7 +19,7 @@ from pm4py.sim import play_out
19
  from smac import HyperparameterOptimizationFacade, Scenario
20
  from utils.param_keys import OUTPUT_PATH, INPUT_PATH
21
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
22
- from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json
23
  from gedi.utils.io_helpers import read_csvs
24
  import xml.etree.ElementTree as ET
25
  import re
@@ -221,6 +221,8 @@ class GenerateEventLogs():
221
  if features_to_dump.get('ratio_unique_traces_per_trace'):#HOTFIX
222
  features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
223
  features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
 
 
224
  dump_features_json(features_to_dump, save_path)
225
 
226
  return log_config
 
19
  from smac import HyperparameterOptimizationFacade, Scenario
20
  from utils.param_keys import OUTPUT_PATH, INPUT_PATH
21
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
22
+ from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, calculate_manhattan_distance
23
  from gedi.utils.io_helpers import read_csvs
24
  import xml.etree.ElementTree as ET
25
  import re
 
221
  if features_to_dump.get('ratio_unique_traces_per_trace'):#HOTFIX
222
  features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
223
  features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
224
+ # calculating the manhattan distance of the generated log to the target features
225
+ features_to_dump['distance_to_target'] = calculate_manhattan_distance(self.objectives, features_to_dump)
226
  dump_features_json(features_to_dump, save_path)
227
 
228
  return log_config
gedi/utils/io_helpers.py CHANGED
@@ -4,7 +4,7 @@ import os
4
  import pandas as pd
5
  import re
6
  import shutil
7
-
8
  from collections import defaultdict
9
  from pathlib import Path, PurePath
10
 
@@ -82,3 +82,27 @@ def dump_features_json(features: dict, output_path, content_type="features"):
82
  with open(json_path, 'w') as fp:
83
  json.dump(features, fp, default=int)
84
  print(f"SUCCESS: Saved {len(features)-1} {content_type} in {json_path}")#-1 because 'log' is not a feature
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import pandas as pd
5
  import re
6
  import shutil
7
+ import numpy as np
8
  from collections import defaultdict
9
  from pathlib import Path, PurePath
10
 
 
82
  with open(json_path, 'w') as fp:
83
  json.dump(features, fp, default=int)
84
  print(f"SUCCESS: Saved {len(features)-1} {content_type} in {json_path}")#-1 because 'log' is not a feature
85
+
86
+ def calculate_manhattan_distance(v1, v2):
87
+
88
+ # HOTFIX: Rename 'ratio_unique_traces_per_trace
89
+ if 'ratio_unique_traces_per_trace' in v1:
90
+ v1['ratio_variants_per_number_of_traces'] = v1.pop('ratio_unique_traces_per_trace')
91
+
92
+ # Filter out non-numeric values and ensure the same keys exist in both dictionaries
93
+ common_keys = set(v1.keys()).intersection(set(v2.keys()))
94
+ numeric_keys = [k for k in common_keys if isinstance(v1[k], (int, float)) and isinstance(v2[k], (int, float))]
95
+
96
+ # Create vectors from the filtered keys
97
+ vec1 = np.array([v1[k] for k in numeric_keys])
98
+ vec2 = np.array([v2[k] for k in numeric_keys])
99
+
100
+ if len(vec1) == 0 or len(vec2) == 0:
101
+ print("[ERROR]: No common numeric keys found for (Manhattan) Distance calculation.")
102
+ return None
103
+
104
+ else:
105
+ # Calculate Manhattan Distance
106
+ manhattan_distance = np.sum(np.abs(vec1 - vec2))
107
+
108
+ return manhattan_distance