Spaces:
Running
Running
Andrea Maldonado
commited on
Commit
·
e58b4c7
1
Parent(s):
776721c
Gets rid of Hotfixes
Browse files- .github/workflows/test_gedi.yml +1 -1
- gedi/features.py +0 -8
- gedi/generator.py +0 -14
- gedi/utils/io_helpers.py +0 -4
- utils/config_fabric.py +1 -4
.github/workflows/test_gedi.yml
CHANGED
@@ -266,4 +266,4 @@ jobs:
|
|
266 |
|
267 |
- name: Run test
|
268 |
run: |
|
269 |
-
python -c "from gedi import GenerateEventLogs;default_params = {'generator_params': {'experiment': {'
|
|
|
266 |
|
267 |
- name: Run test
|
268 |
run: |
|
269 |
+
python -c "from gedi import GenerateEventLogs;default_params = {'generator_params': {'experiment': {'ratio_variants_per_number_of_traces': 0.2, 'epa_normalized_sequence_entropy_linear_forgetting': 0.4}, 'config_space': {'mode': [5, 20], 'sequence': [0.01, 1], 'choice': [0.01, 1], 'parallel': [0.01, 1], 'loop': [0.01, 1], 'silent': [0.01, 1], 'lt_dependency': [0.01, 1], 'num_traces': [10, 101], 'duplicate': [0], 'or': [0]}, 'n_trials': 50}}; GenerateEventLogs(default_params)"
|
gedi/features.py
CHANGED
@@ -36,12 +36,7 @@ class EventLogFeatures(EventLogFile):
|
|
36 |
elif ft_params.get(FEATURE_PARAMS) == None:
|
37 |
self.params = {FEATURE_SET: None}
|
38 |
else:
|
39 |
-
#TODO: Replace hotfix
|
40 |
self.params=ft_params.get(FEATURE_PARAMS)
|
41 |
-
if 'ratio_variants_per_number_of_traces' in self.params.get(FEATURE_SET):#HOTFIX
|
42 |
-
self.params[FEATURE_SET] = ['ratio_unique_traces_per_trace'\
|
43 |
-
if feat=='ratio_variants_per_number_of_traces'\
|
44 |
-
else feat for feat in self.params.get(FEATURE_SET)]
|
45 |
|
46 |
# TODO: handle parameters in main, not in features. Move to main.py
|
47 |
if ft_params[INPUT_PATH]:
|
@@ -150,9 +145,6 @@ class EventLogFeatures(EventLogFile):
|
|
150 |
file_path = os.path.join(self.root_path, file)
|
151 |
print(f" INFO: Starting FEEED for {file_path} and {feature_set}")
|
152 |
features = extract_features(file_path, feature_set)
|
153 |
-
#TODO: Replace hotfix
|
154 |
-
if features.get('ratio_unique_traces_per_trace'):#HOTFIX
|
155 |
-
features['ratio_variants_per_number_of_traces']=features.pop('ratio_unique_traces_per_trace')
|
156 |
|
157 |
except Exception as e:
|
158 |
print("ERROR: for ",file.rsplit(".", 1)[0], feature_set, "skipping and continuing with next log.")
|
|
|
36 |
elif ft_params.get(FEATURE_PARAMS) == None:
|
37 |
self.params = {FEATURE_SET: None}
|
38 |
else:
|
|
|
39 |
self.params=ft_params.get(FEATURE_PARAMS)
|
|
|
|
|
|
|
|
|
40 |
|
41 |
# TODO: handle parameters in main, not in features. Move to main.py
|
42 |
if ft_params[INPUT_PATH]:
|
|
|
145 |
file_path = os.path.join(self.root_path, file)
|
146 |
print(f" INFO: Starting FEEED for {file_path} and {feature_set}")
|
147 |
features = extract_features(file_path, feature_set)
|
|
|
|
|
|
|
148 |
|
149 |
except Exception as e:
|
150 |
print("ERROR: for ",file.rsplit(".", 1)[0], feature_set, "skipping and continuing with next log.")
|
gedi/generator.py
CHANGED
@@ -162,9 +162,6 @@ class GenerateEventLogs():
|
|
162 |
tasks = tasks.rename(columns=columns_to_rename)
|
163 |
self.output_path = output_path
|
164 |
|
165 |
-
if 'ratio_variants_per_number_of_traces' in tasks.columns:#HOTFIX
|
166 |
-
tasks=tasks.rename(columns={"ratio_variants_per_number_of_traces": "ratio_unique_traces_per_trace"})
|
167 |
-
|
168 |
if tasks is not None:
|
169 |
self.feature_keys = sorted([feature for feature in tasks.columns.tolist() if feature != "log"])
|
170 |
num_cores = multiprocessing.cpu_count() if len(tasks) >= multiprocessing.cpu_count() else len(tasks)
|
@@ -182,10 +179,6 @@ class GenerateEventLogs():
|
|
182 |
self.configs = [self.configs]
|
183 |
temp = self.generate_optimized_log(self.configs[0])
|
184 |
self.log_config = [temp]
|
185 |
-
#TODO: Replace hotfix
|
186 |
-
if self.params[EXPERIMENT].get('ratio_unique_traces_per_trace'):#HOTFIX
|
187 |
-
self.params[EXPERIMENT]['ratio_variants_per_number_of_traces']=self.params[EXPERIMENT].pop('ratio_unique_traces_per_trace')
|
188 |
-
|
189 |
save_path = get_output_key_value_location(self.params[EXPERIMENT],
|
190 |
self.output_path, "genEL")+".xes"
|
191 |
write_xes(temp['log'], save_path)
|
@@ -212,10 +205,6 @@ class GenerateEventLogs():
|
|
212 |
log_config = self.generate_optimized_log(self.configs)
|
213 |
|
214 |
identifier = 'genEL'+str(identifier)
|
215 |
-
#TODO: Replace hotfix
|
216 |
-
if self.objectives.get('ratio_unique_traces_per_trace'):#HOTFIX
|
217 |
-
self.objectives['ratio_variants_per_number_of_traces']=self.objectives.pop('ratio_unique_traces_per_trace')
|
218 |
-
|
219 |
save_path = get_output_key_value_location(task.to_dict(),
|
220 |
self.output_path, identifier, self.feature_keys)+".xes"
|
221 |
|
@@ -224,9 +213,6 @@ class GenerateEventLogs():
|
|
224 |
print("SUCCESS: Saved generated event log in", save_path)
|
225 |
features_to_dump = log_config['metafeatures']
|
226 |
|
227 |
-
#TODO: Replace hotfix
|
228 |
-
if features_to_dump.get('ratio_unique_traces_per_trace'):#HOTFIX
|
229 |
-
features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
|
230 |
features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
|
231 |
# calculating the manhattan distance of the generated log to the target features
|
232 |
#features_to_dump['distance_to_target'] = calculate_manhattan_distance(self.objectives, features_to_dump)
|
|
|
162 |
tasks = tasks.rename(columns=columns_to_rename)
|
163 |
self.output_path = output_path
|
164 |
|
|
|
|
|
|
|
165 |
if tasks is not None:
|
166 |
self.feature_keys = sorted([feature for feature in tasks.columns.tolist() if feature != "log"])
|
167 |
num_cores = multiprocessing.cpu_count() if len(tasks) >= multiprocessing.cpu_count() else len(tasks)
|
|
|
179 |
self.configs = [self.configs]
|
180 |
temp = self.generate_optimized_log(self.configs[0])
|
181 |
self.log_config = [temp]
|
|
|
|
|
|
|
|
|
182 |
save_path = get_output_key_value_location(self.params[EXPERIMENT],
|
183 |
self.output_path, "genEL")+".xes"
|
184 |
write_xes(temp['log'], save_path)
|
|
|
205 |
log_config = self.generate_optimized_log(self.configs)
|
206 |
|
207 |
identifier = 'genEL'+str(identifier)
|
|
|
|
|
|
|
|
|
208 |
save_path = get_output_key_value_location(task.to_dict(),
|
209 |
self.output_path, identifier, self.feature_keys)+".xes"
|
210 |
|
|
|
213 |
print("SUCCESS: Saved generated event log in", save_path)
|
214 |
features_to_dump = log_config['metafeatures']
|
215 |
|
|
|
|
|
|
|
216 |
features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
|
217 |
# calculating the manhattan distance of the generated log to the target features
|
218 |
#features_to_dump['distance_to_target'] = calculate_manhattan_distance(self.objectives, features_to_dump)
|
gedi/utils/io_helpers.py
CHANGED
@@ -90,10 +90,6 @@ def compute_similarity(v1, v2):
|
|
90 |
v1 = {k: (float(v) if k != "log" else v) for k, v in v1.items()}
|
91 |
v2 = {k: (float(v) if k != "log" else v) for k, v in v2.items()}
|
92 |
|
93 |
-
# HOTFIX: Rename 'ratio_unique_traces_per_trace'
|
94 |
-
if 'ratio_unique_traces_per_trace' in v1:
|
95 |
-
v1['ratio_variants_per_number_of_traces'] = v1.pop('ratio_unique_traces_per_trace')
|
96 |
-
|
97 |
# Filter out non-numeric values and ensure the same keys exist in both dictionaries
|
98 |
common_keys = set(v1.keys()).intersection(set(v2.keys()))
|
99 |
numeric_keys = [k for k in common_keys if isinstance(v1[k], (int, float)) and isinstance(v2[k], (int, float))]
|
|
|
90 |
v1 = {k: (float(v) if k != "log" else v) for k, v in v1.items()}
|
91 |
v2 = {k: (float(v) if k != "log" else v) for k, v in v2.items()}
|
92 |
|
|
|
|
|
|
|
|
|
93 |
# Filter out non-numeric values and ensure the same keys exist in both dictionaries
|
94 |
common_keys = set(v1.keys()).intersection(set(v2.keys()))
|
95 |
numeric_keys = [k for k in common_keys if isinstance(v1[k], (int, float)) and isinstance(v2[k], (int, float))]
|
utils/config_fabric.py
CHANGED
@@ -285,9 +285,6 @@ def set_generator_experiments(generator_params):
|
|
285 |
f.write(uploaded_file.getbuffer())
|
286 |
|
287 |
sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
|
288 |
-
if 'ratio_variants_per_number_of_traces' in sel_features: #Hotfix
|
289 |
-
sel_features[sel_features.index('ratio_variants_per_number_of_traces')] = 'ratio_unique_traces_per_trace'
|
290 |
-
|
291 |
xes_features = extract_features(f"{uploaded_file.name}", sel_features)
|
292 |
del xes_features['log']
|
293 |
# removing the temporary file
|
@@ -296,7 +293,7 @@ def set_generator_experiments(generator_params):
|
|
296 |
os.remove(f"{uploaded_file.name}")
|
297 |
xes_features = {key: float(value) for key, value in xes_features.items()}
|
298 |
experiments = [xes_features]
|
299 |
-
|
300 |
if uploaded_file.name.endswith('.csv'):
|
301 |
df, sel_features = handle_csv_file(uploaded_file,grid_option)
|
302 |
if df is not None and sel_features is not None:
|
|
|
285 |
f.write(uploaded_file.getbuffer())
|
286 |
|
287 |
sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
|
|
|
|
|
|
|
288 |
xes_features = extract_features(f"{uploaded_file.name}", sel_features)
|
289 |
del xes_features['log']
|
290 |
# removing the temporary file
|
|
|
293 |
os.remove(f"{uploaded_file.name}")
|
294 |
xes_features = {key: float(value) for key, value in xes_features.items()}
|
295 |
experiments = [xes_features]
|
296 |
+
|
297 |
if uploaded_file.name.endswith('.csv'):
|
298 |
df, sel_features = handle_csv_file(uploaded_file,grid_option)
|
299 |
if df is not None and sel_features is not None:
|