Andrea Maldonado commited on
Commit
ddfaf7c
·
1 Parent(s): 775403a

Fixes benchmark run with logs from memory.

Browse files
Files changed (3) hide show
  1. gedi/benchmark.py +1 -1
  2. gedi/generator.py +17 -16
  3. gedi/run.py +4 -4
gedi/benchmark.py CHANGED
@@ -25,7 +25,7 @@ class BenchmarkTest:
25
 
26
  print(f"INFO: Running with {params}")
27
 
28
- if len(event_logs) == 0:
29
  log_path = params[INPUT_PATH]
30
  if log_path.endswith(".xes"):
31
  event_logs = [""]
 
25
 
26
  print(f"INFO: Running with {params}")
27
 
28
+ if event_logs is None or len(event_logs) == 0:
29
  log_path = params[INPUT_PATH]
30
  if log_path.endswith(".xes"):
31
  event_logs = [""]
gedi/generator.py CHANGED
@@ -151,7 +151,7 @@ class GenerateEventLogs():
151
  os.makedirs(self.output_path, exist_ok=True)
152
 
153
  if self.output_path.endswith('csv'):
154
- self.log_features = pd.read_csv(self.output_path)
155
  return
156
 
157
  generator_params = params.get(GENERATOR_PARAMS)
@@ -171,14 +171,16 @@ class GenerateEventLogs():
171
  print(f"INFO: Generator starting at {start.strftime('%H:%M:%S')} using {num_cores} cores for {len(tasks)} tasks...")
172
  random.seed(RANDOM_SEED)
173
  partial_wrapper = partial(self.generator_wrapper, generator_params=generator_params)
174
- log_features = p.map(partial_wrapper, [(index, row) for index, row in tasks.iterrows()])
175
  # TODO: Split log and metafeatures into separate object attributes
176
  # TODO: Access not storing log in memory
177
- # TODO: identify why log is needed in self.log_features
178
- self.log_features = [
179
- {'log': config.get('log'), 'metafeatures': config.get('metafeatures')}
180
- for config in log_features
181
- if 'metafeatures' in config and 'log' in config
 
 
182
  ]
183
 
184
  else:
@@ -187,20 +189,19 @@ class GenerateEventLogs():
187
  if type(configs) is not list:
188
  configs = [configs]
189
  temp = self.generate_optimized_log(configs[0])
190
- self.log_features = [temp['metafeatures']] if 'metafeatures' in temp else []
191
  save_path = get_output_key_value_location(generator_params[EXPERIMENT],
192
  self.output_path, "genEL")+".xes"
193
  write_xes(temp['log'], save_path)
194
  add_extension_before_traces(save_path)
195
  print("SUCCESS: Saved generated event log in", save_path)
196
- import pdb; pdb.set_trace()
197
- print(f"SUCCESS: Generator took {dt.now()-start} sec. Generated {len(self.log_features)} event log(s).")
198
  print(f" Saved generated logs in {self.output_path}")
199
  print("========================= ~ Generator ==========================")
200
 
201
  def clear(self):
202
  print("Clearing parameters...")
203
- self.log_features = None
204
  # self.configs = None
205
  # self.params = None
206
  self.output_path = None
@@ -220,17 +221,17 @@ class GenerateEventLogs():
220
 
221
  random.seed(RANDOM_SEED)
222
  if isinstance(configs, list):
223
- log_features = self.generate_optimized_log(configs[0])
224
  else:
225
- log_features = self.generate_optimized_log(configs)
226
 
227
  save_path = get_output_key_value_location(task.to_dict(),
228
  self.output_path, identifier, self.feature_keys)+".xes"
229
 
230
- write_xes(log_features['log'], save_path)
231
  add_extension_before_traces(save_path)
232
  print("SUCCESS: Saved generated event log in", save_path)
233
- features_to_dump = log_features['metafeatures']
234
 
235
  features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
236
  # calculating the manhattan distance of the generated log to the target features
@@ -238,7 +239,7 @@ class GenerateEventLogs():
238
  features_to_dump['target_similarity'] = compute_similarity(self.objectives, features_to_dump)
239
  dump_features_json(features_to_dump, save_path)
240
 
241
- return log_features
242
 
243
  def generate_optimized_log(self, config):
244
  ''' Returns event log from given configuration'''
 
151
  os.makedirs(self.output_path, exist_ok=True)
152
 
153
  if self.output_path.endswith('csv'):
154
+ self.generated_features = pd.read_csv(self.output_path)
155
  return
156
 
157
  generator_params = params.get(GENERATOR_PARAMS)
 
171
  print(f"INFO: Generator starting at {start.strftime('%H:%M:%S')} using {num_cores} cores for {len(tasks)} tasks...")
172
  random.seed(RANDOM_SEED)
173
  partial_wrapper = partial(self.generator_wrapper, generator_params=generator_params)
174
+ generated_features = p.map(partial_wrapper, [(index, row) for index, row in tasks.iterrows()])
175
  # TODO: Split log and metafeatures into separate object attributes
176
  # TODO: Access not storing log in memory
177
+ # TODO: identify why log is needed in self.generated_features
178
+ self.generated_features = [
179
+ {
180
+ #'log': config.get('log'),
181
+ 'metafeatures': config.get('metafeatures')}
182
+ for config in generated_features
183
+ if 'metafeatures' in config #and 'log' in config
184
  ]
185
 
186
  else:
 
189
  if type(configs) is not list:
190
  configs = [configs]
191
  temp = self.generate_optimized_log(configs[0])
192
+ self.generated_features = [temp['metafeatures']] if 'metafeatures' in temp else []
193
  save_path = get_output_key_value_location(generator_params[EXPERIMENT],
194
  self.output_path, "genEL")+".xes"
195
  write_xes(temp['log'], save_path)
196
  add_extension_before_traces(save_path)
197
  print("SUCCESS: Saved generated event log in", save_path)
198
+ print(f"SUCCESS: Generator took {dt.now()-start} sec. Generated {len(self.generated_features)} event log(s).")
 
199
  print(f" Saved generated logs in {self.output_path}")
200
  print("========================= ~ Generator ==========================")
201
 
202
  def clear(self):
203
  print("Clearing parameters...")
204
+ self.generated_features = None
205
  # self.configs = None
206
  # self.params = None
207
  self.output_path = None
 
221
 
222
  random.seed(RANDOM_SEED)
223
  if isinstance(configs, list):
224
+ generated_features = self.generate_optimized_log(configs[0])
225
  else:
226
+ generated_features = self.generate_optimized_log(configs)
227
 
228
  save_path = get_output_key_value_location(task.to_dict(),
229
  self.output_path, identifier, self.feature_keys)+".xes"
230
 
231
+ write_xes(generated_features['log'], save_path)
232
  add_extension_before_traces(save_path)
233
  print("SUCCESS: Saved generated event log in", save_path)
234
+ features_to_dump = generated_features['metafeatures']
235
 
236
  features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
237
  # calculating the manhattan distance of the generated log to the target features
 
239
  features_to_dump['target_similarity'] = compute_similarity(self.objectives, features_to_dump)
240
  dump_features_json(features_to_dump, save_path)
241
 
242
+ return generated_features
243
 
244
  def generate_optimized_log(self, config):
245
  ''' Returns event log from given configuration'''
gedi/run.py CHANGED
@@ -24,21 +24,21 @@ def run(kwargs:dict, model_params_list: list, filename_list:list):
24
  params = kwargs[PARAMS]
25
  ft = EventLogFeatures(None)
26
  augmented_ft = InstanceAugmentator()
27
- gen = pd.DataFrame(columns=['log'])
28
 
29
  for model_params in model_params_list:
30
  if model_params.get(PIPELINE_STEP) == 'instance_augmentation':
31
  augmented_ft = InstanceAugmentator(aug_params=model_params, samples=ft.feat)
32
  AugmentationPlotter(augmented_ft, model_params)
33
  elif model_params.get(PIPELINE_STEP) == 'event_logs_generation':
34
- gen = pd.DataFrame(GenerateEventLogs(model_params).log_features)
35
  #gen = pd.read_csv("output/features/generated/grid_2objectives_enseef_enve/2_enseef_enve_feat.csv")
36
  #GenerationPlotter(gen, model_params, output_path="output/plots")
37
  elif model_params.get(PIPELINE_STEP) == 'benchmark_test':
38
- benchmark = BenchmarkTest(model_params, event_logs=gen['log'])
39
  # BenchmarkPlotter(benchmark.features, output_path="output/plots")
40
  elif model_params.get(PIPELINE_STEP) == 'feature_extraction':
41
- ft = EventLogFeatures(**kwargs, logs=gen['log'], ft_params=model_params)
42
  FeaturesPlotter(ft.feat, model_params)
43
  elif model_params.get(PIPELINE_STEP) == "evaluation_plotter":
44
  GenerationPlotter(gen, model_params, output_path=model_params['output_path'], input_path=model_params['input_path'])
 
24
  params = kwargs[PARAMS]
25
  ft = EventLogFeatures(None)
26
  augmented_ft = InstanceAugmentator()
27
+ gen = pd.DataFrame(columns=['metafeatures'])
28
 
29
  for model_params in model_params_list:
30
  if model_params.get(PIPELINE_STEP) == 'instance_augmentation':
31
  augmented_ft = InstanceAugmentator(aug_params=model_params, samples=ft.feat)
32
  AugmentationPlotter(augmented_ft, model_params)
33
  elif model_params.get(PIPELINE_STEP) == 'event_logs_generation':
34
+ gen = pd.DataFrame(GenerateEventLogs(model_params).generation_features)
35
  #gen = pd.read_csv("output/features/generated/grid_2objectives_enseef_enve/2_enseef_enve_feat.csv")
36
  #GenerationPlotter(gen, model_params, output_path="output/plots")
37
  elif model_params.get(PIPELINE_STEP) == 'benchmark_test':
38
+ benchmark = BenchmarkTest(model_params)#, event_logs=gen['log'])
39
  # BenchmarkPlotter(benchmark.features, output_path="output/plots")
40
  elif model_params.get(PIPELINE_STEP) == 'feature_extraction':
41
+ ft = EventLogFeatures(**kwargs, ft_params=model_params)
42
  FeaturesPlotter(ft.feat, model_params)
43
  elif model_params.get(PIPELINE_STEP) == "evaluation_plotter":
44
  GenerationPlotter(gen, model_params, output_path=model_params['output_path'], input_path=model_params['input_path'])