Spaces:

andreamalhera
/

igedi

Running

App Files Files Community

Andrea Maldonado commited on Dec 27, 2024

Commit

e58b4c7

1 Parent(s): 776721c

Gets rid of Hotfixes

Browse files

Files changed (5) hide show

.github/workflows/test_gedi.yml +1 -1
gedi/features.py +0 -8
gedi/generator.py +0 -14
gedi/utils/io_helpers.py +0 -4
utils/config_fabric.py +1 -4

.github/workflows/test_gedi.yml CHANGED Viewed

@@ -266,4 +266,4 @@ jobs:
     - name: Run test
       run: |
-        python -c "from gedi import GenerateEventLogs;default_params = {'generator_params': {'experiment': {'ratio_top_20_variants': 0.2, 'epa_normalized_sequence_entropy_linear_forgetting': 0.4}, 'config_space': {'mode': [5, 20], 'sequence': [0.01, 1], 'choice': [0.01, 1], 'parallel': [0.01, 1], 'loop': [0.01, 1], 'silent': [0.01, 1], 'lt_dependency': [0.01, 1], 'num_traces': [10, 101], 'duplicate': [0], 'or': [0]}, 'n_trials': 50}}; GenerateEventLogs(default_params)"

     - name: Run test
       run: |
+        python -c "from gedi import GenerateEventLogs;default_params = {'generator_params': {'experiment': {'ratio_variants_per_number_of_traces': 0.2, 'epa_normalized_sequence_entropy_linear_forgetting': 0.4}, 'config_space': {'mode': [5, 20], 'sequence': [0.01, 1], 'choice': [0.01, 1], 'parallel': [0.01, 1], 'loop': [0.01, 1], 'silent': [0.01, 1], 'lt_dependency': [0.01, 1], 'num_traces': [10, 101], 'duplicate': [0], 'or': [0]}, 'n_trials': 50}}; GenerateEventLogs(default_params)"

gedi/features.py CHANGED Viewed

@@ -36,12 +36,7 @@ class EventLogFeatures(EventLogFile):
         elif ft_params.get(FEATURE_PARAMS) == None:
             self.params = {FEATURE_SET: None}
         else:
-            #TODO: Replace hotfix
             self.params=ft_params.get(FEATURE_PARAMS)
-            if 'ratio_variants_per_number_of_traces' in self.params.get(FEATURE_SET):#HOTFIX
-                self.params[FEATURE_SET] = ['ratio_unique_traces_per_trace'\
-                                                if feat=='ratio_variants_per_number_of_traces'\
-                                                else feat for feat in self.params.get(FEATURE_SET)]
         # TODO: handle parameters in main, not in features. Move to main.py
         if ft_params[INPUT_PATH]:
@@ -150,9 +145,6 @@ class EventLogFeatures(EventLogFile):
             file_path = os.path.join(self.root_path, file)
             print(f"  INFO: Starting FEEED for {file_path} and {feature_set}")
             features = extract_features(file_path, feature_set)
-            #TODO: Replace hotfix
-            if features.get('ratio_unique_traces_per_trace'):#HOTFIX
-                features['ratio_variants_per_number_of_traces']=features.pop('ratio_unique_traces_per_trace')
         except Exception as e:
             print("ERROR: for ",file.rsplit(".", 1)[0], feature_set, "skipping and continuing with next log.")

         elif ft_params.get(FEATURE_PARAMS) == None:
             self.params = {FEATURE_SET: None}
         else:
             self.params=ft_params.get(FEATURE_PARAMS)
         # TODO: handle parameters in main, not in features. Move to main.py
         if ft_params[INPUT_PATH]:
             file_path = os.path.join(self.root_path, file)
             print(f"  INFO: Starting FEEED for {file_path} and {feature_set}")
             features = extract_features(file_path, feature_set)
         except Exception as e:
             print("ERROR: for ",file.rsplit(".", 1)[0], feature_set, "skipping and continuing with next log.")

gedi/generator.py CHANGED Viewed

@@ -162,9 +162,6 @@ class GenerateEventLogs():
             tasks = tasks.rename(columns=columns_to_rename)
             self.output_path = output_path
-        if 'ratio_variants_per_number_of_traces' in tasks.columns:#HOTFIX
-            tasks=tasks.rename(columns={"ratio_variants_per_number_of_traces": "ratio_unique_traces_per_trace"})
         if tasks is not None:
             self.feature_keys = sorted([feature for feature in tasks.columns.tolist() if feature != "log"])
             num_cores = multiprocessing.cpu_count() if len(tasks) >= multiprocessing.cpu_count() else len(tasks)
@@ -182,10 +179,6 @@ class GenerateEventLogs():
                 self.configs = [self.configs]
             temp = self.generate_optimized_log(self.configs[0])
             self.log_config = [temp]
-            #TODO: Replace hotfix
-            if self.params[EXPERIMENT].get('ratio_unique_traces_per_trace'):#HOTFIX
-                self.params[EXPERIMENT]['ratio_variants_per_number_of_traces']=self.params[EXPERIMENT].pop('ratio_unique_traces_per_trace')
             save_path = get_output_key_value_location(self.params[EXPERIMENT],
                                              self.output_path, "genEL")+".xes"
             write_xes(temp['log'], save_path)
@@ -212,10 +205,6 @@ class GenerateEventLogs():
             log_config = self.generate_optimized_log(self.configs)
         identifier = 'genEL'+str(identifier)
-        #TODO: Replace hotfix
-        if self.objectives.get('ratio_unique_traces_per_trace'):#HOTFIX
-            self.objectives['ratio_variants_per_number_of_traces']=self.objectives.pop('ratio_unique_traces_per_trace')
         save_path = get_output_key_value_location(task.to_dict(),
                                          self.output_path, identifier, self.feature_keys)+".xes"
@@ -224,9 +213,6 @@ class GenerateEventLogs():
         print("SUCCESS: Saved generated event log in", save_path)
         features_to_dump = log_config['metafeatures']
-        #TODO: Replace hotfix
-        if features_to_dump.get('ratio_unique_traces_per_trace'):#HOTFIX
-            features_to_dump['ratio_variants_per_number_of_traces']=features_to_dump.pop('ratio_unique_traces_per_trace')
         features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
         # calculating the manhattan distance of the generated log to the target features
         #features_to_dump['distance_to_target'] = calculate_manhattan_distance(self.objectives, features_to_dump)

             tasks = tasks.rename(columns=columns_to_rename)
             self.output_path = output_path
         if tasks is not None:
             self.feature_keys = sorted([feature for feature in tasks.columns.tolist() if feature != "log"])
             num_cores = multiprocessing.cpu_count() if len(tasks) >= multiprocessing.cpu_count() else len(tasks)
                 self.configs = [self.configs]
             temp = self.generate_optimized_log(self.configs[0])
             self.log_config = [temp]
             save_path = get_output_key_value_location(self.params[EXPERIMENT],
                                              self.output_path, "genEL")+".xes"
             write_xes(temp['log'], save_path)
             log_config = self.generate_optimized_log(self.configs)
         identifier = 'genEL'+str(identifier)
         save_path = get_output_key_value_location(task.to_dict(),
                                          self.output_path, identifier, self.feature_keys)+".xes"
         print("SUCCESS: Saved generated event log in", save_path)
         features_to_dump = log_config['metafeatures']
         features_to_dump['log']= os.path.split(save_path)[1].split(".")[0]
         # calculating the manhattan distance of the generated log to the target features
         #features_to_dump['distance_to_target'] = calculate_manhattan_distance(self.objectives, features_to_dump)

gedi/utils/io_helpers.py CHANGED Viewed

@@ -90,10 +90,6 @@ def compute_similarity(v1, v2):
     v1 = {k: (float(v) if k != "log" else v) for k, v in v1.items()}
     v2 = {k: (float(v) if k != "log" else v) for k, v in v2.items()}
-    # HOTFIX: Rename 'ratio_unique_traces_per_trace'
-    if 'ratio_unique_traces_per_trace' in v1:
-        v1['ratio_variants_per_number_of_traces'] = v1.pop('ratio_unique_traces_per_trace')
     # Filter out non-numeric values and ensure the same keys exist in both dictionaries
     common_keys = set(v1.keys()).intersection(set(v2.keys()))
     numeric_keys = [k for k in common_keys if isinstance(v1[k], (int, float)) and isinstance(v2[k], (int, float))]

     v1 = {k: (float(v) if k != "log" else v) for k, v in v1.items()}
     v2 = {k: (float(v) if k != "log" else v) for k, v in v2.items()}
     # Filter out non-numeric values and ensure the same keys exist in both dictionaries
     common_keys = set(v1.keys()).intersection(set(v2.keys()))
     numeric_keys = [k for k in common_keys if isinstance(v1[k], (int, float)) and isinstance(v2[k], (int, float))]

utils/config_fabric.py CHANGED Viewed

@@ -285,9 +285,6 @@ def set_generator_experiments(generator_params):
                     f.write(uploaded_file.getbuffer())
                 sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
-                if 'ratio_variants_per_number_of_traces' in sel_features: #Hotfix
-                    sel_features[sel_features.index('ratio_variants_per_number_of_traces')] = 'ratio_unique_traces_per_trace'
                 xes_features = extract_features(f"{uploaded_file.name}", sel_features)
                 del xes_features['log']
                 # removing the temporary file
@@ -296,7 +293,7 @@ def set_generator_experiments(generator_params):
                     os.remove(f"{uploaded_file.name}")
                 xes_features = {key: float(value) for key, value in xes_features.items()}
                 experiments = [xes_features]
             if uploaded_file.name.endswith('.csv'):
                 df, sel_features = handle_csv_file(uploaded_file,grid_option)
                 if df is not None and sel_features is not None:

                     f.write(uploaded_file.getbuffer())
                 sel_features = st.multiselect("Selected features", list(generator_params['experiment'].keys()))
                 xes_features = extract_features(f"{uploaded_file.name}", sel_features)
                 del xes_features['log']
                 # removing the temporary file
                     os.remove(f"{uploaded_file.name}")
                 xes_features = {key: float(value) for key, value in xes_features.items()}
                 experiments = [xes_features]
             if uploaded_file.name.endswith('.csv'):
                 df, sel_features = handle_csv_file(uploaded_file,grid_option)
                 if df is not None and sel_features is not None: