Spaces:
Running
Running
Andrea Maldonado
commited on
Commit
·
ddfaf7c
1
Parent(s):
775403a
Fixes benchmark run with logs from memory.
Browse files- gedi/benchmark.py +1 -1
- gedi/generator.py +17 -16
- gedi/run.py +4 -4
gedi/benchmark.py
CHANGED
@@ -25,7 +25,7 @@ class BenchmarkTest:
|
|
25 |
|
26 |
print(f"INFO: Running with {params}")
|
27 |
|
28 |
-
if len(event_logs) == 0:
|
29 |
log_path = params[INPUT_PATH]
|
30 |
if log_path.endswith(".xes"):
|
31 |
event_logs = [""]
|
|
|
25 |
|
26 |
print(f"INFO: Running with {params}")
|
27 |
|
28 |
+
if event_logs is None or len(event_logs) == 0:
|
29 |
log_path = params[INPUT_PATH]
|
30 |
if log_path.endswith(".xes"):
|
31 |
event_logs = [""]
|
gedi/generator.py
CHANGED
@@ -151,7 +151,7 @@ class GenerateEventLogs():
|
|
151 |
os.makedirs(self.output_path, exist_ok=True)
|
152 |
|
153 |
if self.output_path.endswith('csv'):
|
154 |
-
self.
|
155 |
return
|
156 |
|
157 |
generator_params = params.get(GENERATOR_PARAMS)
|
@@ -171,14 +171,16 @@ class GenerateEventLogs():
|
|
171 |
print(f"INFO: Generator starting at {start.strftime('%H:%M:%S')} using {num_cores} cores for {len(tasks)} tasks...")
|
172 |
random.seed(RANDOM_SEED)
|
173 |
partial_wrapper = partial(self.generator_wrapper, generator_params=generator_params)
|
174 |
-
|
175 |
# TODO: Split log and metafeatures into separate object attributes
|
176 |
# TODO: Access not storing log in memory
|
177 |
-
# TODO: identify why log is needed in self.
|
178 |
-
self.
|
179 |
-
{
|
180 |
-
|
181 |
-
|
|
|
|
|
182 |
]
|
183 |
|
184 |
else:
|
@@ -187,20 +189,19 @@ class GenerateEventLogs():
|
|
187 |
if type(configs) is not list:
|
188 |
configs = [configs]
|
189 |
temp = self.generate_optimized_log(configs[0])
|
190 |
-
self.
|
191 |
save_path = get_output_key_value_location(generator_params[EXPERIMENT],
|
192 |
self.output_path, "genEL")+".xes"
|
193 |
write_xes(temp['log'], save_path)
|
194 |
add_extension_before_traces(save_path)
|
195 |
print("SUCCESS: Saved generated event log in", save_path)
|
196 |
-
|
197 |
-
print(f"SUCCESS: Generator took {dt.now()-start} sec. Generated {len(self.log_features)} event log(s).")
|
198 |
print(f" Saved generated logs in {self.output_path}")
|
199 |
print("========================= ~ Generator ==========================")
|
200 |
|
201 |
def clear(self):
|
202 |
print("Clearing parameters...")
|
203 |
-
self.
|
204 |
# self.configs = None
|
205 |
# self.params = None
|
206 |
self.output_path = None
|
@@ -220,17 +221,17 @@ class GenerateEventLogs():
|
|
220 |
|
221 |
random.seed(RANDOM_SEED)
|
222 |
if isinstance(configs, list):
|
223 |
-
|
224 |
else:
|
225 |
-
|
226 |
|
227 |
save_path = get_output_key_value_location(task.to_dict(),
|
228 |
self.output_path, identifier, self.feature_keys)+".xes"
|
229 |
|
230 |
-
write_xes(
|
231 |
add_extension_before_traces(save_path)
|
232 |
print("SUCCESS: Saved generated event log in", save_path)
|
233 |
-
features_to_dump =
|
234 |
|
235 |
features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
|
236 |
# calculating the manhattan distance of the generated log to the target features
|
@@ -238,7 +239,7 @@ class GenerateEventLogs():
|
|
238 |
features_to_dump['target_similarity'] = compute_similarity(self.objectives, features_to_dump)
|
239 |
dump_features_json(features_to_dump, save_path)
|
240 |
|
241 |
-
return
|
242 |
|
243 |
def generate_optimized_log(self, config):
|
244 |
''' Returns event log from given configuration'''
|
|
|
151 |
os.makedirs(self.output_path, exist_ok=True)
|
152 |
|
153 |
if self.output_path.endswith('csv'):
|
154 |
+
self.generated_features = pd.read_csv(self.output_path)
|
155 |
return
|
156 |
|
157 |
generator_params = params.get(GENERATOR_PARAMS)
|
|
|
171 |
print(f"INFO: Generator starting at {start.strftime('%H:%M:%S')} using {num_cores} cores for {len(tasks)} tasks...")
|
172 |
random.seed(RANDOM_SEED)
|
173 |
partial_wrapper = partial(self.generator_wrapper, generator_params=generator_params)
|
174 |
+
generated_features = p.map(partial_wrapper, [(index, row) for index, row in tasks.iterrows()])
|
175 |
# TODO: Split log and metafeatures into separate object attributes
|
176 |
# TODO: Access not storing log in memory
|
177 |
+
# TODO: identify why log is needed in self.generated_features
|
178 |
+
self.generated_features = [
|
179 |
+
{
|
180 |
+
#'log': config.get('log'),
|
181 |
+
'metafeatures': config.get('metafeatures')}
|
182 |
+
for config in generated_features
|
183 |
+
if 'metafeatures' in config #and 'log' in config
|
184 |
]
|
185 |
|
186 |
else:
|
|
|
189 |
if type(configs) is not list:
|
190 |
configs = [configs]
|
191 |
temp = self.generate_optimized_log(configs[0])
|
192 |
+
self.generated_features = [temp['metafeatures']] if 'metafeatures' in temp else []
|
193 |
save_path = get_output_key_value_location(generator_params[EXPERIMENT],
|
194 |
self.output_path, "genEL")+".xes"
|
195 |
write_xes(temp['log'], save_path)
|
196 |
add_extension_before_traces(save_path)
|
197 |
print("SUCCESS: Saved generated event log in", save_path)
|
198 |
+
print(f"SUCCESS: Generator took {dt.now()-start} sec. Generated {len(self.generated_features)} event log(s).")
|
|
|
199 |
print(f" Saved generated logs in {self.output_path}")
|
200 |
print("========================= ~ Generator ==========================")
|
201 |
|
202 |
def clear(self):
|
203 |
print("Clearing parameters...")
|
204 |
+
self.generated_features = None
|
205 |
# self.configs = None
|
206 |
# self.params = None
|
207 |
self.output_path = None
|
|
|
221 |
|
222 |
random.seed(RANDOM_SEED)
|
223 |
if isinstance(configs, list):
|
224 |
+
generated_features = self.generate_optimized_log(configs[0])
|
225 |
else:
|
226 |
+
generated_features = self.generate_optimized_log(configs)
|
227 |
|
228 |
save_path = get_output_key_value_location(task.to_dict(),
|
229 |
self.output_path, identifier, self.feature_keys)+".xes"
|
230 |
|
231 |
+
write_xes(generated_features['log'], save_path)
|
232 |
add_extension_before_traces(save_path)
|
233 |
print("SUCCESS: Saved generated event log in", save_path)
|
234 |
+
features_to_dump = generated_features['metafeatures']
|
235 |
|
236 |
features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
|
237 |
# calculating the manhattan distance of the generated log to the target features
|
|
|
239 |
features_to_dump['target_similarity'] = compute_similarity(self.objectives, features_to_dump)
|
240 |
dump_features_json(features_to_dump, save_path)
|
241 |
|
242 |
+
return generated_features
|
243 |
|
244 |
def generate_optimized_log(self, config):
|
245 |
''' Returns event log from given configuration'''
|
gedi/run.py
CHANGED
@@ -24,21 +24,21 @@ def run(kwargs:dict, model_params_list: list, filename_list:list):
|
|
24 |
params = kwargs[PARAMS]
|
25 |
ft = EventLogFeatures(None)
|
26 |
augmented_ft = InstanceAugmentator()
|
27 |
-
gen = pd.DataFrame(columns=['
|
28 |
|
29 |
for model_params in model_params_list:
|
30 |
if model_params.get(PIPELINE_STEP) == 'instance_augmentation':
|
31 |
augmented_ft = InstanceAugmentator(aug_params=model_params, samples=ft.feat)
|
32 |
AugmentationPlotter(augmented_ft, model_params)
|
33 |
elif model_params.get(PIPELINE_STEP) == 'event_logs_generation':
|
34 |
-
gen = pd.DataFrame(GenerateEventLogs(model_params).
|
35 |
#gen = pd.read_csv("output/features/generated/grid_2objectives_enseef_enve/2_enseef_enve_feat.csv")
|
36 |
#GenerationPlotter(gen, model_params, output_path="output/plots")
|
37 |
elif model_params.get(PIPELINE_STEP) == 'benchmark_test':
|
38 |
-
benchmark = BenchmarkTest(model_params
|
39 |
# BenchmarkPlotter(benchmark.features, output_path="output/plots")
|
40 |
elif model_params.get(PIPELINE_STEP) == 'feature_extraction':
|
41 |
-
ft = EventLogFeatures(**kwargs,
|
42 |
FeaturesPlotter(ft.feat, model_params)
|
43 |
elif model_params.get(PIPELINE_STEP) == "evaluation_plotter":
|
44 |
GenerationPlotter(gen, model_params, output_path=model_params['output_path'], input_path=model_params['input_path'])
|
|
|
24 |
params = kwargs[PARAMS]
|
25 |
ft = EventLogFeatures(None)
|
26 |
augmented_ft = InstanceAugmentator()
|
27 |
+
gen = pd.DataFrame(columns=['metafeatures'])
|
28 |
|
29 |
for model_params in model_params_list:
|
30 |
if model_params.get(PIPELINE_STEP) == 'instance_augmentation':
|
31 |
augmented_ft = InstanceAugmentator(aug_params=model_params, samples=ft.feat)
|
32 |
AugmentationPlotter(augmented_ft, model_params)
|
33 |
elif model_params.get(PIPELINE_STEP) == 'event_logs_generation':
|
34 |
+
gen = pd.DataFrame(GenerateEventLogs(model_params).generation_features)
|
35 |
#gen = pd.read_csv("output/features/generated/grid_2objectives_enseef_enve/2_enseef_enve_feat.csv")
|
36 |
#GenerationPlotter(gen, model_params, output_path="output/plots")
|
37 |
elif model_params.get(PIPELINE_STEP) == 'benchmark_test':
|
38 |
+
benchmark = BenchmarkTest(model_params)#, event_logs=gen['log'])
|
39 |
# BenchmarkPlotter(benchmark.features, output_path="output/plots")
|
40 |
elif model_params.get(PIPELINE_STEP) == 'feature_extraction':
|
41 |
+
ft = EventLogFeatures(**kwargs, ft_params=model_params)
|
42 |
FeaturesPlotter(ft.feat, model_params)
|
43 |
elif model_params.get(PIPELINE_STEP) == "evaluation_plotter":
|
44 |
GenerationPlotter(gen, model_params, output_path=model_params['output_path'], input_path=model_params['input_path'])
|