Spaces:

andreamalhera
/

igedi

Running

App Files Files Community

baakaani commited on Sep 23, 2024

Commit

0d2306d

1 Parent(s): 28b0c8e

adding new actions code

Browse files

Files changed (7) hide show

.github/workflows/test_abbrv.yml +43 -0
config_files/test/test_abbrv_generation.json +16 -0
gedi/features.py +3 -1
gedi/generator.py +3 -0
gedi/utils/bpic_feat_ranges.html +0 -214
utils/column_mappings.py +16 -0
utils/config_fabric.py +2 -13

.github/workflows/test_abbrv.yml ADDED Viewed

	@@ -0,0 +1,43 @@

+name: GEDI abbrv Test
+# Specifies when the action should run
+on:
+  pull_request:
+    branches:
+      - main
+      - bpm24
+  workflow_dispatch:
+# Specifies the jobs that are to be run
+jobs:
+  test_generation:
+    runs-on: ubuntu-latest
+    # Setting up a python envronment  for the test script to run
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: 3.9
+    - name: Install dependencies
+      run: |
+        sudo apt-get install build-essential python3 python3-dev
+    - name: Install feeed
+      run: |
+        python -m pip install --upgrade pip
+        pip install .
+    - name: Run test
+      run:
+        python main.py -a config_files/test/test_abbrv_generation.json
+    - name: Compare output
+      run:
+        diff data/validation/2_ense_rmcv_feat.csv output/test/igedi_table_1/2_ense_rmcv_feat.csv

config_files/test/test_abbrv_generation.json ADDED Viewed

	@@ -0,0 +1,16 @@

+[{"pipeline_step": "event_logs_generation",
+"output_path": "output/test",
+"generator_params": {"experiment":
+	{"input_path": "data/test/igedi_table_1.csv",
+	"objectives": ["rmcv","ense"]},
+	"config_space": {"mode": [5, 20], "sequence": [0.01, 1],
+	"choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1],
+	"silent": [0.01, 1], "lt_dependency": [0.01, 1],
+	"num_traces": [10, 10001], "duplicate": [0],
+	"or": [0]}, "n_trials": 2}},
+ {"pipeline_step": "feature_extraction",
+ "input_path": "output/test/igedi_table_1/2_ense_rmcv",
+ "feature_params": {"feature_set": ["simple_stats", "trace_length", "trace_variant",
+ "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
+ "output_path": "output/plots", "real_eventlog_path": "data/test/2_bpic_features.csv",
+ "plot_type": "boxplot"}]

gedi/features.py CHANGED Viewed

@@ -10,7 +10,7 @@ from pathlib import Path
 from utils.param_keys import INPUT_PATH
 from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
 from gedi.utils.io_helpers import dump_features_json
 def get_sortby_parameter(elem):
     number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
     return number
@@ -63,6 +63,8 @@ class EventLogFeatures(EventLogFile):
             if str(self.filename).endswith('csv'): # Returns dataframe from loaded metafeatures file
                 self.feat = pd.read_csv(self.filepath)
                 print(f"SUCCESS: EventLogFeatures loaded features from {self.filepath}")
             elif isinstance(self.filename, list): # Computes metafeatures for list of .xes files
                 combined_features=pd.DataFrame()

 from utils.param_keys import INPUT_PATH
 from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
 from gedi.utils.io_helpers import dump_features_json
+from utils.column_mappings import column_mappings
 def get_sortby_parameter(elem):
     number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
     return number
             if str(self.filename).endswith('csv'): # Returns dataframe from loaded metafeatures file
                 self.feat = pd.read_csv(self.filepath)
+                columns_to_rename = {col: column_mappings()[col] for col in self.feat.columns if col in column_mappings()}
+                self.feat.rename(columns=columns_to_rename, inplace=True)
                 print(f"SUCCESS: EventLogFeatures loaded features from {self.filepath}")
             elif isinstance(self.filename, list): # Computes metafeatures for list of .xes files
                 combined_features=pd.DataFrame()

gedi/generator.py CHANGED Viewed

@@ -21,6 +21,7 @@ from utils.param_keys import OUTPUT_PATH, INPUT_PATH
 from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
 from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, compute_similarity
 from gedi.utils.io_helpers import read_csvs
 import xml.etree.ElementTree as ET
 import re
 from xml.dom import minidom
@@ -153,6 +154,8 @@ class GenerateEventLogs():
         experiment = self.params.get(EXPERIMENT)
         if experiment is not None:
             tasks, output_path = get_tasks(experiment, self.output_path)
             self.output_path = output_path
         if 'ratio_variants_per_number_of_traces' in tasks.columns:#HOTFIX

 from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
 from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, compute_similarity
 from gedi.utils.io_helpers import read_csvs
+from utils.column_mappings import column_mappings
 import xml.etree.ElementTree as ET
 import re
 from xml.dom import minidom
         experiment = self.params.get(EXPERIMENT)
         if experiment is not None:
             tasks, output_path = get_tasks(experiment, self.output_path)
+            columns_to_rename = {col: column_mappings()[col] for col in tasks.columns if col in column_mappings()}
+            tasks = tasks.rename(columns=columns_to_rename)
             self.output_path = output_path
         if 'ratio_variants_per_number_of_traces' in tasks.columns:#HOTFIX

gedi/utils/bpic_feat_ranges.html DELETED Viewed

@@ -1,214 +0,0 @@
-<div style="overflow-x:auto;">
-    <table border="1" class="dataframe">
-    <thead>
-        <tr style="text-align: right;">
-        <th></th>
-        <th>n_traces</th>
-        <th>n_unique_traces</th>
-        <th>ratio_variants_per_number_of_traces</th>
-        <th>trace_len_min</th>
-        <th>trace_len_max</th>
-        <th>trace_len_mean</th>
-        <th>trace_len_median</th>
-        <th>trace_len_mode</th>
-        <th>trace_len_std</th>
-        <th>trace_len_variance</th>
-        <th>trace_len_q1</th>
-        <th>trace_len_q3</th>
-        <th>trace_len_iqr</th>
-        <th>trace_len_geometric_mean</th>
-        <th>trace_len_geometric_std</th>
-        <th>trace_len_harmonic_mean</th>
-        <th>trace_len_skewness</th>
-        <th>trace_len_kurtosis</th>
-        <th>trace_len_coefficient_variation</th>
-        <th>trace_len_entropy</th>
-        <th>trace_len_hist1</th>
-        <th>trace_len_hist2</th>
-        <th>trace_len_hist3</th>
-        <th>trace_len_hist4</th>
-        <th>trace_len_hist5</th>
-        <th>trace_len_hist6</th>
-        <th>trace_len_hist7</th>
-        <th>trace_len_hist8</th>
-        <th>trace_len_hist9</th>
-        <th>trace_len_hist10</th>
-        <th>trace_len_skewness_hist</th>
-        <th>trace_len_kurtosis_hist</th>
-        <th>ratio_most_common_variant</th>
-        <th>ratio_top_1_variants</th>
-        <th>ratio_top_5_variants</th>
-        <th>ratio_top_10_variants</th>
-        <th>ratio_top_20_variants</th>
-        <th>ratio_top_50_variants</th>
-        <th>ratio_top_75_variants</th>
-        <th>mean_variant_occurrence</th>
-        <th>std_variant_occurrence</th>
-        <th>skewness_variant_occurrence</th>
-        <th>kurtosis_variant_occurrence</th>
-        <th>n_unique_activities</th>
-        <th>activities_min</th>
-        <th>activities_max</th>
-        <th>activities_mean</th>
-        <th>activities_median</th>
-        <th>activities_std</th>
-        <th>activities_variance</th>
-        <th>activities_q1</th>
-        <th>activities_q3</th>
-        <th>activities_iqr</th>
-        <th>activities_skewness</th>
-        <th>activities_kurtosis</th>
-        <th>n_unique_start_activities</th>
-        <th>start_activities_min</th>
-        <th>start_activities_max</th>
-        <th>start_activities_mean</th>
-        <th>start_activities_median</th>
-        <th>start_activities_std</th>
-        <th>start_activities_variance</th>
-        <th>start_activities_q1</th>
-        <th>start_activities_q3</th>
-        <th>start_activities_iqr</th>
-        <th>start_activities_skewness</th>
-        <th>start_activities_kurtosis</th>
-        <th>n_unique_end_activities</th>
-        <th>end_activities_min</th>
-        <th>end_activities_max</th>
-        <th>end_activities_mean</th>
-        <th>end_activities_median</th>
-        <th>end_activities_std</th>
-        <th>end_activities_variance</th>
-        <th>end_activities_q1</th>
-        <th>end_activities_q3</th>
-        <th>end_activities_iqr</th>
-        <th>end_activities_skewness</th>
-        <th>end_activities_kurtosis</th>
-        <th>eventropy_trace</th>
-        <th>eventropy_prefix</th>
-        <th>eventropy_global_block</th>
-        <th>eventropy_lempel_ziv</th>
-        <th>eventropy_k_block_diff_1</th>
-        <th>eventropy_k_block_diff_3</th>
-        <th>eventropy_k_block_diff_5</th>
-        <th>eventropy_k_block_ratio_1</th>
-        <th>eventropy_k_block_ratio_3</th>
-        <th>eventropy_k_block_ratio_5</th>
-        <th>eventropy_knn_3</th>
-        <th>eventropy_knn_5</th>
-        <th>eventropy_knn_7</th>
-        <th>epa_variant_entropy</th>
-        <th>epa_normalized_variant_entropy</th>
-        <th>epa_sequence_entropy</th>
-        <th>epa_normalized_sequence_entropy</th>
-        <th>epa_sequence_entropy_linear_forgetting</th>
-        <th>epa_normalized_sequence_entropy_linear_forgetting</th>
-        <th>epa_sequence_entropy_exponential_forgetting</th>
-        <th>epa_normalized_sequence_entropy_exponential_forgetting</th>
-        </tr>
-    </thead>
-    <tbody>
-        <tr>
-        <td>[ min, max ]</td>
-        <td>[ 226.0, 251734.0 ]</td>
-        <td>[ 6.0, 28457.0 ]</td>
-        <td>[ 0.0, 1.0 ]</td>
-        <td>[ 1.0, 24.0 ]</td>
-        <td>[ 1.0, 2973.0 ]</td>
-        <td>[ 1.0, 131.49 ]</td>
-        <td>[ 1.0, 55.0 ]</td>
-        <td>[ 1.0, 61.0 ]</td>
-        <td>[ 0.0, 202.53 ]</td>
-        <td>[ 0.0, 41017.89 ]</td>
-        <td>[ 1.0, 44.0 ]</td>
-        <td>[ 1.0, 169.0 ]</td>
-        <td>[ 0.0, 161.0 ]</td>
-        <td>[ 1.0, 53.78 ]</td>
-        <td>[ 1.0, 5.65 ]</td>
-        <td>[ 1.0, 51.65 ]</td>
-        <td>[ -0.58, 111.97 ]</td>
-        <td>[ -0.97, 14006.75 ]</td>
-        <td>[ 0.0, 4.74 ]</td>
-        <td>[ 5.33, 12.04 ]</td>
-        <td>[ 0.0, 1.99 ]</td>
-        <td>[ 0.0, 0.42 ]</td>
-        <td>[ 0.0, 0.4 ]</td>
-        <td>[ 0.0, 0.19 ]</td>
-        <td>[ 0.0, 0.14 ]</td>
-        <td>[ 0.0, 10.0 ]</td>
-        <td>[ 0.0, 0.02 ]</td>
-        <td>[ 0.0, 0.04 ]</td>
-        <td>[ 0.0, 0.0 ]</td>
-        <td>[ 0.0, 2.7 ]</td>
-        <td>[ -0.58, 111.97 ]</td>
-        <td>[ -0.97, 14006.75 ]</td>
-        <td>[ 0.0, 0.79 ]</td>
-        <td>[ 0.0, 0.87 ]</td>
-        <td>[ 0.0, 0.98 ]</td>
-        <td>[ 0.0, 0.99 ]</td>
-        <td>[ 0.2, 1.0 ]</td>
-        <td>[ 0.5, 1.0 ]</td>
-        <td>[ 0.75, 1.0 ]</td>
-        <td>[ 1.0, 24500.67 ]</td>
-        <td>[ 0.04, 42344.04 ]</td>
-        <td>[ 1.54, 64.77 ]</td>
-        <td>[ 0.66, 5083.46 ]</td>
-        <td>[ 1.0, 1152.0 ]</td>
-        <td>[ 1.0, 66058.0 ]</td>
-        <td>[ 34.0, 466141.0 ]</td>
-        <td>[ 4.13, 66058.0 ]</td>
-        <td>[ 2.0, 66058.0 ]</td>
-        <td>[ 0.0, 120522.25 ]</td>
-        <td>[ 0.0, 14525612122.34 ]</td>
-        <td>[ 1.0, 66058.0 ]</td>
-        <td>[ 4.0, 79860.0 ]</td>
-        <td>[ 0.0, 77290.0 ]</td>
-        <td>[ -0.06, 15.21 ]</td>
-        <td>[ -1.5, 315.84 ]</td>
-        <td>[ 1.0, 809.0 ]</td>
-        <td>[ 1.0, 150370.0 ]</td>
-        <td>[ 27.0, 199867.0 ]</td>
-        <td>[ 3.7, 150370.0 ]</td>
-        <td>[ 1.0, 150370.0 ]</td>
-        <td>[ 0.0, 65387.49 ]</td>
-        <td>[ 0.0, 4275524278.19 ]</td>
-        <td>[ 1.0, 150370.0 ]</td>
-        <td>[ 4.0, 150370.0 ]</td>
-        <td>[ 0.0, 23387.25 ]</td>
-        <td>[ 0.0, 9.3 ]</td>
-        <td>[ -2.0, 101.82 ]</td>
-        <td>[ 1.0, 757.0 ]</td>
-        <td>[ 1.0, 16653.0 ]</td>
-        <td>[ 28.0, 181328.0 ]</td>
-        <td>[ 3.53, 24500.67 ]</td>
-        <td>[ 1.0, 16653.0 ]</td>
-        <td>[ 0.0, 42344.04 ]</td>
-        <td>[ 0.0, 1793017566.89 ]</td>
-        <td>[ 1.0, 16653.0 ]</td>
-        <td>[ 3.0, 39876.0 ]</td>
-        <td>[ 0.0, 39766.0 ]</td>
-        <td>[ -0.7, 13.82 ]</td>
-        <td>[ -2.0, 255.39 ]</td>
-        <td>[ 0.0, 13.36 ]</td>
-        <td>[ 0.0, 16.77 ]</td>
-        <td>[ 0.0, 24.71 ]</td>
-        <td>[ 0.0, 685.0 ]</td>
-        <td>[ -328.0, 962.0 ]</td>
-        <td>[ 0.0, 871.0 ]</td>
-        <td>[ 0.0, 881.0 ]</td>
-        <td>[ 0.0, 935.0 ]</td>
-        <td>[ 0.0, 7.11 ]</td>
-        <td>[ 0.0, 7.11 ]</td>
-        <td>[ 0.0, 8.93 ]</td>
-        <td>[ 0.0, 648.0 ]</td>
-        <td>[ 0.0, 618.0 ]</td>
-        <td>[ 0.0, 11563842.15 ]</td>
-        <td>[ 0.0, 0.9 ]</td>
-        <td>[ 0.0, 21146257.12 ]</td>
-        <td>[ 0.0, 0.76 ]</td>
-        <td>[ 0.0, 14140225.9 ]</td>
-        <td>[ 0.0, 0.42 ]</td>
-        <td>[ 0.0, 15576076.83 ]</td>
-        <td>[ 0.0, 0.51 ]</td>
-        </tr>
-    </tbody>
-    </table>
-</div>

utils/column_mappings.py ADDED Viewed

	@@ -0,0 +1,16 @@

+def column_mappings():
+    column_names_short = {
+    'rutpt': 'ratio_unique_traces_per_trace',
+    'rmcv': 'ratio_most_common_variant',
+    'tlcv': 'trace_len_coefficient_variation',
+    'mvo': 'mean_variant_occurrence',
+    'enve': 'epa_normalized_variant_entropy',
+    'ense': 'epa_normalized_sequence_entropy',
+    'eself': 'epa_sequence_entropy_linear_forgetting',
+    'enself': 'epa_normalized_sequence_entropy_linear_forgetting',
+    'eseef': 'epa_sequence_entropy_exponential_forgetting',
+    'enseef': 'epa_normalized_sequence_entropy_exponential_forgetting'
+    }
+    return column_names_short

utils/config_fabric.py CHANGED Viewed

@@ -13,6 +13,7 @@ import time
 import shutil
 import zipfile
 import io
 st.set_page_config(layout='wide')
 INPUT_XES="output/inputlog_temp.xes"
@@ -174,19 +175,7 @@ def set_generator_experiments(generator_params):
             df = pd.read_csv(uploaded_file)
             if len(df.columns) <= 1:
                 raise pd.errors.ParserError("Please select a file withat least two columns (e.g. log, feature) and use ',' as a delimiter.")
-            column_names_short = {
-                                    'rutpt': 'ratio_unique_traces_per_trace',
-                                    'rmcv': 'ratio_most_common_variant',
-                                    'tlcv': 'trace_len_coefficient_variation',
-                                    'mvo': 'mean_variant_occurrence',
-                                    'enve': 'epa_normalized_variant_entropy',
-                                    'ense': 'epa_normalized_sequence_entropy',
-                                    'eself': 'epa_sequence_entropy_linear_forgetting',
-                                    'enself': 'epa_normalized_sequence_entropy_linear_forgetting',
-                                    'eseef': 'epa_sequence_entropy_exponential_forgetting',
-                                    'enseef': 'epa_normalized_sequence_entropy_exponential_forgetting'
-                                }
-            columns_to_rename = {col: column_names_short[col] for col in df.columns if col in column_names_short}
             # Rename the matching columns
             df.rename(columns=columns_to_rename, inplace=True)

 import shutil
 import zipfile
 import io
+from column_mappings import column_mappings
 st.set_page_config(layout='wide')
 INPUT_XES="output/inputlog_temp.xes"
             df = pd.read_csv(uploaded_file)
             if len(df.columns) <= 1:
                 raise pd.errors.ParserError("Please select a file withat least two columns (e.g. log, feature) and use ',' as a delimiter.")
+            columns_to_rename = {col: column_mappings()[col] for col in df.columns if col in column_mappings()}
             # Rename the matching columns
             df.rename(columns=columns_to_rename, inplace=True)