baakaani commited on
Commit
0d2306d
·
1 Parent(s): 28b0c8e

adding new actions code

Browse files
.github/workflows/test_abbrv.yml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: GEDI abbrv Test
2
+
3
+ # Specifies when the action should run
4
+ on:
5
+ pull_request:
6
+ branches:
7
+ - main
8
+ - bpm24
9
+ workflow_dispatch:
10
+
11
+ # Specifies the jobs that are to be run
12
+ jobs:
13
+
14
+ test_generation:
15
+ runs-on: ubuntu-latest
16
+
17
+ # Setting up a python envronment for the test script to run
18
+ steps:
19
+ - name: Checkout code
20
+ uses: actions/checkout@v4
21
+
22
+ - name: Set up Python
23
+ uses: actions/setup-python@v5
24
+ with:
25
+ python-version: 3.9
26
+
27
+ - name: Install dependencies
28
+ run: |
29
+ sudo apt-get install build-essential python3 python3-dev
30
+
31
+ - name: Install feeed
32
+ run: |
33
+ python -m pip install --upgrade pip
34
+ pip install .
35
+
36
+ - name: Run test
37
+ run:
38
+ python main.py -a config_files/test/test_abbrv_generation.json
39
+
40
+ - name: Compare output
41
+ run:
42
+ diff data/validation/2_ense_rmcv_feat.csv output/test/igedi_table_1/2_ense_rmcv_feat.csv
43
+
config_files/test/test_abbrv_generation.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [{"pipeline_step": "event_logs_generation",
2
+ "output_path": "output/test",
3
+ "generator_params": {"experiment":
4
+ {"input_path": "data/test/igedi_table_1.csv",
5
+ "objectives": ["rmcv","ense"]},
6
+ "config_space": {"mode": [5, 20], "sequence": [0.01, 1],
7
+ "choice": [0.01, 1], "parallel": [0.01, 1], "loop": [0.01, 1],
8
+ "silent": [0.01, 1], "lt_dependency": [0.01, 1],
9
+ "num_traces": [10, 10001], "duplicate": [0],
10
+ "or": [0]}, "n_trials": 2}},
11
+ {"pipeline_step": "feature_extraction",
12
+ "input_path": "output/test/igedi_table_1/2_ense_rmcv",
13
+ "feature_params": {"feature_set": ["simple_stats", "trace_length", "trace_variant",
14
+ "activities", "start_activities", "end_activities", "eventropies", "epa_based"]},
15
+ "output_path": "output/plots", "real_eventlog_path": "data/test/2_bpic_features.csv",
16
+ "plot_type": "boxplot"}]
gedi/features.py CHANGED
@@ -10,7 +10,7 @@ from pathlib import Path
10
  from utils.param_keys import INPUT_PATH
11
  from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
12
  from gedi.utils.io_helpers import dump_features_json
13
-
14
  def get_sortby_parameter(elem):
15
  number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
16
  return number
@@ -63,6 +63,8 @@ class EventLogFeatures(EventLogFile):
63
 
64
  if str(self.filename).endswith('csv'): # Returns dataframe from loaded metafeatures file
65
  self.feat = pd.read_csv(self.filepath)
 
 
66
  print(f"SUCCESS: EventLogFeatures loaded features from {self.filepath}")
67
  elif isinstance(self.filename, list): # Computes metafeatures for list of .xes files
68
  combined_features=pd.DataFrame()
 
10
  from utils.param_keys import INPUT_PATH
11
  from utils.param_keys.features import FEATURE_PARAMS, FEATURE_SET
12
  from gedi.utils.io_helpers import dump_features_json
13
+ from utils.column_mappings import column_mappings
14
  def get_sortby_parameter(elem):
15
  number = int(elem.rsplit(".")[0].rsplit("_", 1)[1])
16
  return number
 
63
 
64
  if str(self.filename).endswith('csv'): # Returns dataframe from loaded metafeatures file
65
  self.feat = pd.read_csv(self.filepath)
66
+ columns_to_rename = {col: column_mappings()[col] for col in self.feat.columns if col in column_mappings()}
67
+ self.feat.rename(columns=columns_to_rename, inplace=True)
68
  print(f"SUCCESS: EventLogFeatures loaded features from {self.filepath}")
69
  elif isinstance(self.filename, list): # Computes metafeatures for list of .xes files
70
  combined_features=pd.DataFrame()
gedi/generator.py CHANGED
@@ -21,6 +21,7 @@ from utils.param_keys import OUTPUT_PATH, INPUT_PATH
21
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
22
  from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, compute_similarity
23
  from gedi.utils.io_helpers import read_csvs
 
24
  import xml.etree.ElementTree as ET
25
  import re
26
  from xml.dom import minidom
@@ -153,6 +154,8 @@ class GenerateEventLogs():
153
  experiment = self.params.get(EXPERIMENT)
154
  if experiment is not None:
155
  tasks, output_path = get_tasks(experiment, self.output_path)
 
 
156
  self.output_path = output_path
157
 
158
  if 'ratio_variants_per_number_of_traces' in tasks.columns:#HOTFIX
 
21
  from utils.param_keys.generator import GENERATOR_PARAMS, EXPERIMENT, CONFIG_SPACE, N_TRIALS
22
  from gedi.utils.io_helpers import get_output_key_value_location, dump_features_json, compute_similarity
23
  from gedi.utils.io_helpers import read_csvs
24
+ from utils.column_mappings import column_mappings
25
  import xml.etree.ElementTree as ET
26
  import re
27
  from xml.dom import minidom
 
154
  experiment = self.params.get(EXPERIMENT)
155
  if experiment is not None:
156
  tasks, output_path = get_tasks(experiment, self.output_path)
157
+ columns_to_rename = {col: column_mappings()[col] for col in tasks.columns if col in column_mappings()}
158
+ tasks = tasks.rename(columns=columns_to_rename)
159
  self.output_path = output_path
160
 
161
  if 'ratio_variants_per_number_of_traces' in tasks.columns:#HOTFIX
gedi/utils/bpic_feat_ranges.html DELETED
@@ -1,214 +0,0 @@
1
- <div style="overflow-x:auto;">
2
- <table border="1" class="dataframe">
3
- <thead>
4
- <tr style="text-align: right;">
5
- <th></th>
6
- <th>n_traces</th>
7
- <th>n_unique_traces</th>
8
- <th>ratio_variants_per_number_of_traces</th>
9
- <th>trace_len_min</th>
10
- <th>trace_len_max</th>
11
- <th>trace_len_mean</th>
12
- <th>trace_len_median</th>
13
- <th>trace_len_mode</th>
14
- <th>trace_len_std</th>
15
- <th>trace_len_variance</th>
16
- <th>trace_len_q1</th>
17
- <th>trace_len_q3</th>
18
- <th>trace_len_iqr</th>
19
- <th>trace_len_geometric_mean</th>
20
- <th>trace_len_geometric_std</th>
21
- <th>trace_len_harmonic_mean</th>
22
- <th>trace_len_skewness</th>
23
- <th>trace_len_kurtosis</th>
24
- <th>trace_len_coefficient_variation</th>
25
- <th>trace_len_entropy</th>
26
- <th>trace_len_hist1</th>
27
- <th>trace_len_hist2</th>
28
- <th>trace_len_hist3</th>
29
- <th>trace_len_hist4</th>
30
- <th>trace_len_hist5</th>
31
- <th>trace_len_hist6</th>
32
- <th>trace_len_hist7</th>
33
- <th>trace_len_hist8</th>
34
- <th>trace_len_hist9</th>
35
- <th>trace_len_hist10</th>
36
- <th>trace_len_skewness_hist</th>
37
- <th>trace_len_kurtosis_hist</th>
38
- <th>ratio_most_common_variant</th>
39
- <th>ratio_top_1_variants</th>
40
- <th>ratio_top_5_variants</th>
41
- <th>ratio_top_10_variants</th>
42
- <th>ratio_top_20_variants</th>
43
- <th>ratio_top_50_variants</th>
44
- <th>ratio_top_75_variants</th>
45
- <th>mean_variant_occurrence</th>
46
- <th>std_variant_occurrence</th>
47
- <th>skewness_variant_occurrence</th>
48
- <th>kurtosis_variant_occurrence</th>
49
- <th>n_unique_activities</th>
50
- <th>activities_min</th>
51
- <th>activities_max</th>
52
- <th>activities_mean</th>
53
- <th>activities_median</th>
54
- <th>activities_std</th>
55
- <th>activities_variance</th>
56
- <th>activities_q1</th>
57
- <th>activities_q3</th>
58
- <th>activities_iqr</th>
59
- <th>activities_skewness</th>
60
- <th>activities_kurtosis</th>
61
- <th>n_unique_start_activities</th>
62
- <th>start_activities_min</th>
63
- <th>start_activities_max</th>
64
- <th>start_activities_mean</th>
65
- <th>start_activities_median</th>
66
- <th>start_activities_std</th>
67
- <th>start_activities_variance</th>
68
- <th>start_activities_q1</th>
69
- <th>start_activities_q3</th>
70
- <th>start_activities_iqr</th>
71
- <th>start_activities_skewness</th>
72
- <th>start_activities_kurtosis</th>
73
- <th>n_unique_end_activities</th>
74
- <th>end_activities_min</th>
75
- <th>end_activities_max</th>
76
- <th>end_activities_mean</th>
77
- <th>end_activities_median</th>
78
- <th>end_activities_std</th>
79
- <th>end_activities_variance</th>
80
- <th>end_activities_q1</th>
81
- <th>end_activities_q3</th>
82
- <th>end_activities_iqr</th>
83
- <th>end_activities_skewness</th>
84
- <th>end_activities_kurtosis</th>
85
- <th>eventropy_trace</th>
86
- <th>eventropy_prefix</th>
87
- <th>eventropy_global_block</th>
88
- <th>eventropy_lempel_ziv</th>
89
- <th>eventropy_k_block_diff_1</th>
90
- <th>eventropy_k_block_diff_3</th>
91
- <th>eventropy_k_block_diff_5</th>
92
- <th>eventropy_k_block_ratio_1</th>
93
- <th>eventropy_k_block_ratio_3</th>
94
- <th>eventropy_k_block_ratio_5</th>
95
- <th>eventropy_knn_3</th>
96
- <th>eventropy_knn_5</th>
97
- <th>eventropy_knn_7</th>
98
- <th>epa_variant_entropy</th>
99
- <th>epa_normalized_variant_entropy</th>
100
- <th>epa_sequence_entropy</th>
101
- <th>epa_normalized_sequence_entropy</th>
102
- <th>epa_sequence_entropy_linear_forgetting</th>
103
- <th>epa_normalized_sequence_entropy_linear_forgetting</th>
104
- <th>epa_sequence_entropy_exponential_forgetting</th>
105
- <th>epa_normalized_sequence_entropy_exponential_forgetting</th>
106
- </tr>
107
- </thead>
108
- <tbody>
109
- <tr>
110
- <td>[ min, max ]</td>
111
- <td>[ 226.0, 251734.0 ]</td>
112
- <td>[ 6.0, 28457.0 ]</td>
113
- <td>[ 0.0, 1.0 ]</td>
114
- <td>[ 1.0, 24.0 ]</td>
115
- <td>[ 1.0, 2973.0 ]</td>
116
- <td>[ 1.0, 131.49 ]</td>
117
- <td>[ 1.0, 55.0 ]</td>
118
- <td>[ 1.0, 61.0 ]</td>
119
- <td>[ 0.0, 202.53 ]</td>
120
- <td>[ 0.0, 41017.89 ]</td>
121
- <td>[ 1.0, 44.0 ]</td>
122
- <td>[ 1.0, 169.0 ]</td>
123
- <td>[ 0.0, 161.0 ]</td>
124
- <td>[ 1.0, 53.78 ]</td>
125
- <td>[ 1.0, 5.65 ]</td>
126
- <td>[ 1.0, 51.65 ]</td>
127
- <td>[ -0.58, 111.97 ]</td>
128
- <td>[ -0.97, 14006.75 ]</td>
129
- <td>[ 0.0, 4.74 ]</td>
130
- <td>[ 5.33, 12.04 ]</td>
131
- <td>[ 0.0, 1.99 ]</td>
132
- <td>[ 0.0, 0.42 ]</td>
133
- <td>[ 0.0, 0.4 ]</td>
134
- <td>[ 0.0, 0.19 ]</td>
135
- <td>[ 0.0, 0.14 ]</td>
136
- <td>[ 0.0, 10.0 ]</td>
137
- <td>[ 0.0, 0.02 ]</td>
138
- <td>[ 0.0, 0.04 ]</td>
139
- <td>[ 0.0, 0.0 ]</td>
140
- <td>[ 0.0, 2.7 ]</td>
141
- <td>[ -0.58, 111.97 ]</td>
142
- <td>[ -0.97, 14006.75 ]</td>
143
- <td>[ 0.0, 0.79 ]</td>
144
- <td>[ 0.0, 0.87 ]</td>
145
- <td>[ 0.0, 0.98 ]</td>
146
- <td>[ 0.0, 0.99 ]</td>
147
- <td>[ 0.2, 1.0 ]</td>
148
- <td>[ 0.5, 1.0 ]</td>
149
- <td>[ 0.75, 1.0 ]</td>
150
- <td>[ 1.0, 24500.67 ]</td>
151
- <td>[ 0.04, 42344.04 ]</td>
152
- <td>[ 1.54, 64.77 ]</td>
153
- <td>[ 0.66, 5083.46 ]</td>
154
- <td>[ 1.0, 1152.0 ]</td>
155
- <td>[ 1.0, 66058.0 ]</td>
156
- <td>[ 34.0, 466141.0 ]</td>
157
- <td>[ 4.13, 66058.0 ]</td>
158
- <td>[ 2.0, 66058.0 ]</td>
159
- <td>[ 0.0, 120522.25 ]</td>
160
- <td>[ 0.0, 14525612122.34 ]</td>
161
- <td>[ 1.0, 66058.0 ]</td>
162
- <td>[ 4.0, 79860.0 ]</td>
163
- <td>[ 0.0, 77290.0 ]</td>
164
- <td>[ -0.06, 15.21 ]</td>
165
- <td>[ -1.5, 315.84 ]</td>
166
- <td>[ 1.0, 809.0 ]</td>
167
- <td>[ 1.0, 150370.0 ]</td>
168
- <td>[ 27.0, 199867.0 ]</td>
169
- <td>[ 3.7, 150370.0 ]</td>
170
- <td>[ 1.0, 150370.0 ]</td>
171
- <td>[ 0.0, 65387.49 ]</td>
172
- <td>[ 0.0, 4275524278.19 ]</td>
173
- <td>[ 1.0, 150370.0 ]</td>
174
- <td>[ 4.0, 150370.0 ]</td>
175
- <td>[ 0.0, 23387.25 ]</td>
176
- <td>[ 0.0, 9.3 ]</td>
177
- <td>[ -2.0, 101.82 ]</td>
178
- <td>[ 1.0, 757.0 ]</td>
179
- <td>[ 1.0, 16653.0 ]</td>
180
- <td>[ 28.0, 181328.0 ]</td>
181
- <td>[ 3.53, 24500.67 ]</td>
182
- <td>[ 1.0, 16653.0 ]</td>
183
- <td>[ 0.0, 42344.04 ]</td>
184
- <td>[ 0.0, 1793017566.89 ]</td>
185
- <td>[ 1.0, 16653.0 ]</td>
186
- <td>[ 3.0, 39876.0 ]</td>
187
- <td>[ 0.0, 39766.0 ]</td>
188
- <td>[ -0.7, 13.82 ]</td>
189
- <td>[ -2.0, 255.39 ]</td>
190
- <td>[ 0.0, 13.36 ]</td>
191
- <td>[ 0.0, 16.77 ]</td>
192
- <td>[ 0.0, 24.71 ]</td>
193
- <td>[ 0.0, 685.0 ]</td>
194
- <td>[ -328.0, 962.0 ]</td>
195
- <td>[ 0.0, 871.0 ]</td>
196
- <td>[ 0.0, 881.0 ]</td>
197
- <td>[ 0.0, 935.0 ]</td>
198
- <td>[ 0.0, 7.11 ]</td>
199
- <td>[ 0.0, 7.11 ]</td>
200
- <td>[ 0.0, 8.93 ]</td>
201
- <td>[ 0.0, 648.0 ]</td>
202
- <td>[ 0.0, 618.0 ]</td>
203
- <td>[ 0.0, 11563842.15 ]</td>
204
- <td>[ 0.0, 0.9 ]</td>
205
- <td>[ 0.0, 21146257.12 ]</td>
206
- <td>[ 0.0, 0.76 ]</td>
207
- <td>[ 0.0, 14140225.9 ]</td>
208
- <td>[ 0.0, 0.42 ]</td>
209
- <td>[ 0.0, 15576076.83 ]</td>
210
- <td>[ 0.0, 0.51 ]</td>
211
- </tr>
212
- </tbody>
213
- </table>
214
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/column_mappings.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def column_mappings():
2
+
3
+ column_names_short = {
4
+ 'rutpt': 'ratio_unique_traces_per_trace',
5
+ 'rmcv': 'ratio_most_common_variant',
6
+ 'tlcv': 'trace_len_coefficient_variation',
7
+ 'mvo': 'mean_variant_occurrence',
8
+ 'enve': 'epa_normalized_variant_entropy',
9
+ 'ense': 'epa_normalized_sequence_entropy',
10
+ 'eself': 'epa_sequence_entropy_linear_forgetting',
11
+ 'enself': 'epa_normalized_sequence_entropy_linear_forgetting',
12
+ 'eseef': 'epa_sequence_entropy_exponential_forgetting',
13
+ 'enseef': 'epa_normalized_sequence_entropy_exponential_forgetting'
14
+ }
15
+
16
+ return column_names_short
utils/config_fabric.py CHANGED
@@ -13,6 +13,7 @@ import time
13
  import shutil
14
  import zipfile
15
  import io
 
16
 
17
  st.set_page_config(layout='wide')
18
  INPUT_XES="output/inputlog_temp.xes"
@@ -174,19 +175,7 @@ def set_generator_experiments(generator_params):
174
  df = pd.read_csv(uploaded_file)
175
  if len(df.columns) <= 1:
176
  raise pd.errors.ParserError("Please select a file withat least two columns (e.g. log, feature) and use ',' as a delimiter.")
177
- column_names_short = {
178
- 'rutpt': 'ratio_unique_traces_per_trace',
179
- 'rmcv': 'ratio_most_common_variant',
180
- 'tlcv': 'trace_len_coefficient_variation',
181
- 'mvo': 'mean_variant_occurrence',
182
- 'enve': 'epa_normalized_variant_entropy',
183
- 'ense': 'epa_normalized_sequence_entropy',
184
- 'eself': 'epa_sequence_entropy_linear_forgetting',
185
- 'enself': 'epa_normalized_sequence_entropy_linear_forgetting',
186
- 'eseef': 'epa_sequence_entropy_exponential_forgetting',
187
- 'enseef': 'epa_normalized_sequence_entropy_exponential_forgetting'
188
- }
189
- columns_to_rename = {col: column_names_short[col] for col in df.columns if col in column_names_short}
190
 
191
  # Rename the matching columns
192
  df.rename(columns=columns_to_rename, inplace=True)
 
13
  import shutil
14
  import zipfile
15
  import io
16
+ from column_mappings import column_mappings
17
 
18
  st.set_page_config(layout='wide')
19
  INPUT_XES="output/inputlog_temp.xes"
 
175
  df = pd.read_csv(uploaded_file)
176
  if len(df.columns) <= 1:
177
  raise pd.errors.ParserError("Please select a file withat least two columns (e.g. log, feature) and use ',' as a delimiter.")
178
+ columns_to_rename = {col: column_mappings()[col] for col in df.columns if col in column_mappings()}
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
  # Rename the matching columns
181
  df.rename(columns=columns_to_rename, inplace=True)