Andrea Maldonado commited on
Commit
e58b4c7
·
1 Parent(s): 776721c

Gets rid of Hotfixes

Browse files
.github/workflows/test_gedi.yml CHANGED
@@ -266,4 +266,4 @@ jobs:
266
 
267
  - name: Run test
268
  run: |
269
- python -c "from gedi import GenerateEventLogs;default_params = {'generator_params': {'experiment': {'ratio_top_20_variants': 0.2, 'epa_normalized_sequence_entropy_linear_forgetting': 0.4}, 'config_space': {'mode': [5, 20], 'sequence': [0.01, 1], 'choice': [0.01, 1], 'parallel': [0.01, 1], 'loop': [0.01, 1], 'silent': [0.01, 1], 'lt_dependency': [0.01, 1], 'num_traces': [10, 101], 'duplicate': [0], 'or': [0]}, 'n_trials': 50}}; GenerateEventLogs(default_params)"
 
266
 
267
  - name: Run test
268
  run: |
269
+ python -c "from gedi import GenerateEventLogs;default_params = {'generator_params': {'experiment': {'ratio_variants_per_number_of_traces': 0.2, 'epa_normalized_sequence_entropy_linear_forgetting': 0.4}, 'config_space': {'mode': [5, 20], 'sequence': [0.01, 1], 'choice': [0.01, 1], 'parallel': [0.01, 1], 'loop': [0.01, 1], 'silent': [0.01, 1], 'lt_dependency': [0.01, 1], 'num_traces': [10, 101], 'duplicate': [0], 'or': [0]}, 'n_trials': 50}}; GenerateEventLogs(default_params)"
gedi/features.py CHANGED
@@ -36,12 +36,7 @@ class EventLogFeatures(EventLogFile):
36
  elif ft_params.get(FEATURE_PARAMS) == None:
37
  self.params = {FEATURE_SET: None}
38
  else:
39
- #TODO: Replace hotfix
40
  self.params=ft_params.get(FEATURE_PARAMS)
41
- if 'ratio_variants_per_number_of_traces' in self.params.get(FEATURE_SET):#HOTFIX
42
- self.params[FEATURE_SET] = ['ratio_unique_traces_per_trace'\
43
- if feat=='ratio_variants_per_number_of_traces'\
44
- else feat for feat in self.params.get(FEATURE_SET)]
45
 
46
  # TODO: handle parameters in main, not in features. Move to main.py
47
  if ft_params[INPUT_PATH]:
@@ -150,9 +145,6 @@ class EventLogFeatures(EventLogFile):
150
  file_path = os.path.join(self.root_path, file)
151
  print(f" INFO: Starting FEEED for {file_path} and {feature_set}")
152
  features = extract_features(file_path, feature_set)
153
- #TODO: Replace hotfix
154
- if features.get('ratio_unique_traces_per_trace'):#HOTFIX
155
- features['ratio_variants_per_number_of_traces']=features.pop('ratio_unique_traces_per_trace')
156
 
157
  except Exception as e:
158
  print("ERROR: for ",file.rsplit(".", 1)[0], feature_set, "skipping and continuing with next log.")
 
36
  elif ft_params.get(FEATURE_PARAMS) == None:
37
  self.params = {FEATURE_SET: None}
38
  else:
 
39
  self.params=ft_params.get(FEATURE_PARAMS)
 
 
 
 
40
 
41
  # TODO: handle parameters in main, not in features. Move to main.py
42
  if ft_params[INPUT_PATH]:
 
145
  file_path = os.path.join(self.root_path, file)
146
  print(f" INFO: Starting FEEED for {file_path} and {feature_set}")
147
  features = extract_features(file_path, feature_set)
 
 
 
148
 
149
  except Exception as e:
150
  print("ERROR: for ",file.rsplit(".", 1)[0], feature_set, "skipping and continuing with next log.")
gedi/generator.py CHANGED
@@ -162,9 +162,6 @@ class GenerateEventLogs():
162
  tasks = tasks.rename(columns=columns_to_rename)
163
  self.output_path = output_path
164
 
165
- if 'ratio_variants_per_number_of_traces' in tasks.columns:#HOTFIX
166
- tasks=tasks.rename(columns={"ratio_variants_per_number_of_traces": "ratio_unique_traces_per_trace"})
167
-
168
  if tasks is not None:
169
  self.feature_keys = sorted([feature for feature in tasks.columns.tolist() if feature != "log"])
170
  num_cores = multiprocessing.cpu_count() if len(tasks) >= multiprocessing.cpu_count() else len(tasks)
@@ -182,10 +179,6 @@ class GenerateEventLogs():
182
  self.configs = [self.configs]
183
  temp = self.generate_optimized_log(self.configs[0])
184
  self.log_config = [temp]
185
- #TODO: Replace hotfix
186
- if self.params[EXPERIMENT].get('ratio_unique_traces_per_trace'):#HOTFIX
187
- self.params[EXPERIMENT]['ratio_variants_per_number_of_traces']=self.params[EXPERIMENT].pop('ratio_unique_traces_per_trace')
188
-
189
  save_path = get_output_key_value_location(self.params[EXPERIMENT],
190
  self.output_path, "genEL")+".xes"
191
  write_xes(temp['log'], save_path)
@@ -212,10 +205,6 @@ class GenerateEventLogs():
212
  log_config = self.generate_optimized_log(self.configs)
213
 
214
  identifier = 'genEL'+str(identifier)
215
- #TODO: Replace hotfix
216
- if self.objectives.get('ratio_unique_traces_per_trace'):#HOTFIX
217
- self.objectives['ratio_variants_per_number_of_traces']=self.objectives.pop('ratio_unique_traces_per_trace')
218
-
219
  save_path = get_output_key_value_location(task.to_dict(),
220
  self.output_path, identifier, self.feature_keys)+".xes"
221
 
@@ -224,9 +213,6 @@ class GenerateEventLogs():
224
  print("SUCCESS: Saved generated event log in", save_path)
225
  features_to_dump = log_config['metafeatures']
226
 
227
- #TODO: Replace hotfix
228
- if features_to_dump.get('ratio_unique_traces_per_trace'):#HOTFIX
229
- features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
230
  features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
231
  # calculating the manhattan distance of the generated log to the target features
232
  #features_to_dump['distance_to_target'] = calculate_manhattan_distance(self.objectives, features_to_dump)
 
162
  tasks = tasks.rename(columns=columns_to_rename)
163
  self.output_path = output_path
164
 
 
 
 
165
  if tasks is not None:
166
  self.feature_keys = sorted([feature for feature in tasks.columns.tolist() if feature != "log"])
167
  num_cores = multiprocessing.cpu_count() if len(tasks) >= multiprocessing.cpu_count() else len(tasks)
 
179
  self.configs = [self.configs]
180
  temp = self.generate_optimized_log(self.configs[0])
181
  self.log_config = [temp]
 
 
 
 
182
  save_path = get_output_key_value_location(self.params[EXPERIMENT],
183
  self.output_path, "genEL")+".xes"
184
  write_xes(temp['log'], save_path)
 
205
  log_config = self.generate_optimized_log(self.configs)
206
 
207
  identifier = 'genEL'+str(identifier)
 
 
 
 
208
  save_path = get_output_key_value_location(task.to_dict(),
209
  self.output_path, identifier, self.feature_keys)+".xes"
210
 
 
213
  print("SUCCESS: Saved generated event log in", save_path)
214
  features_to_dump = log_config['metafeatures']
215
 
 
 
 
216
  features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
217
  # calculating the manhattan distance of the generated log to the target features
218
  #features_to_dump['distance_to_target'] = calculate_manhattan_distance(self.objectives, features_to_dump)
gedi/utils/io_helpers.py CHANGED
@@ -90,10 +90,6 @@ def compute_similarity(v1, v2):
90
  v1 = {k: (float(v) if k != "log" else v) for k, v in v1.items()}
91
  v2 = {k: (float(v) if k != "log" else v) for k, v in v2.items()}
92
 
93
- # HOTFIX: Rename 'ratio_unique_traces_per_trace'
94
- if 'ratio_unique_traces_per_trace' in v1:
95
- v1['ratio_variants_per_number_of_traces'] = v1.pop('ratio_unique_traces_per_trace')
96
-
97
  # Filter out non-numeric values and ensure the same keys exist in both dictionaries
98
  common_keys = set(v1.keys()).intersection(set(v2.keys()))
99
  numeric_keys = [k for k in common_keys if isinstance(v1[k], (int, float)) and isinstance(v2[k], (int, float))]
 
90
  v1 = {k: (float(v) if k != "log" else v) for k, v in v1.items()}
91
  v2 = {k: (float(v) if k != "log" else v) for k, v in v2.items()}
92
 
 
 
 
 
93
  # Filter out non-numeric values and ensure the same keys exist in both dictionaries
94
  common_keys = set(v1.keys()).intersection(set(v2.keys()))
95
  numeric_keys = [k for k in common_keys if isinstance(v1[k], (int, float)) and isinstance(v2[k], (int, float))]
utils/config_fabric.py CHANGED
@@ -285,9 +285,6 @@ def set_generator_experiments(generator_params):
285
  f.write(uploaded_file.getbuffer())
286
 
287
  sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
288
- if 'ratio_variants_per_number_of_traces' in sel_features: #Hotfix
289
- sel_features[sel_features.index('ratio_variants_per_number_of_traces')] = 'ratio_unique_traces_per_trace'
290
-
291
  xes_features = extract_features(f"{uploaded_file.name}", sel_features)
292
  del xes_features['log']
293
  # removing the temporary file
@@ -296,7 +293,7 @@ def set_generator_experiments(generator_params):
296
  os.remove(f"{uploaded_file.name}")
297
  xes_features = {key: float(value) for key, value in xes_features.items()}
298
  experiments = [xes_features]
299
-
300
  if uploaded_file.name.endswith('.csv'):
301
  df, sel_features = handle_csv_file(uploaded_file,grid_option)
302
  if df is not None and sel_features is not None:
 
285
  f.write(uploaded_file.getbuffer())
286
 
287
  sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
 
 
 
288
  xes_features = extract_features(f"{uploaded_file.name}", sel_features)
289
  del xes_features['log']
290
  # removing the temporary file
 
293
  os.remove(f"{uploaded_file.name}")
294
  xes_features = {key: float(value) for key, value in xes_features.items()}
295
  experiments = [xes_features]
296
+
297
  if uploaded_file.name.endswith('.csv'):
298
  df, sel_features = handle_csv_file(uploaded_file,grid_option)
299
  if df is not None and sel_features is not None: