mgyigit commited on
Commit
3dc7215
·
verified ·
1 Parent(s): 62ef124

Update src/saving_utils.py

Browse files
Files changed (1) hide show
  1. src/saving_utils.py +83 -60
src/saving_utils.py CHANGED
@@ -76,26 +76,11 @@ def upload_to_hub(benchmark_types, repo_id="mgyigit/probe-data", repo_type="spac
76
  return 0
77
 
78
 
79
- def save_csv_locally(dataframe, file_name, save_dir="/tmp"):
80
- # Ensure the save directory exists
81
- os.makedirs(save_dir, exist_ok=True)
82
-
83
- # Construct the full file path
84
- file_path = os.path.join(save_dir, file_name)
85
-
86
- # Save the DataFrame as a CSV
87
- dataframe.to_csv(file_path, index=False)
88
- print(f"Saved {file_name} to {file_path}")
89
-
90
- return file_path
91
-
92
-
93
  def save_similarity_output(
94
  output_dict,
95
  method_name,
96
  leaderboard_path="/tmp/leaderboard_results.csv",
97
  similarity_path="/tmp/similarity_results.csv",
98
- repo_id="mgyigit/probe-data",
99
  ):
100
  # Load or initialize the DataFrames
101
  if os.path.exists(leaderboard_path):
@@ -155,26 +140,42 @@ def save_similarity_output(
155
  similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
156
  leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
157
 
158
- leaderboard_file = save_csv_locally(leaderboard_df, "leaderboard_results.csv")
159
- similarity_file = save_csv_locally(similarity_df, "similarity_results.csv")
160
 
161
  return 0
162
 
163
- def save_function_output(model_output, method_name, func_results_path="/home/user/app/src/data/function_results.csv", leaderboard_path="/home/user/app/src/data/leaderboard_results.csv"):
164
- # Load or initialize the DataFrames
165
- if os.path.exists(func_results_path):
166
- func_results_df = pd.read_csv(func_results_path)
167
- else:
168
- func_results_df = pd.DataFrame(columns=['Method'])
169
 
 
 
 
 
 
 
 
170
  if os.path.exists(leaderboard_path):
171
  leaderboard_df = pd.read_csv(leaderboard_path)
172
  else:
173
- leaderboard_df = pd.DataFrame()
 
 
 
 
 
 
 
174
 
175
- # Ensure the method_name row exists in function results
176
  if method_name not in func_results_df['Method'].values:
177
- func_results_df = pd.concat([func_results_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True)
 
 
 
 
 
 
 
 
 
178
 
179
  # Storage for averaging in leaderboard results
180
  metrics_sum = {
@@ -193,10 +194,10 @@ def save_function_output(model_output, method_name, func_results_path="/home/use
193
  aspect, dataset1, dataset2 = key.split('_')
194
 
195
  # Save each metric to function_results under its respective column
196
- func_results_df.at[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_accuracy"] = accuracy
197
- func_results_df.at[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_F1"] = f1
198
- func_results_df.at[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_precision"] = precision
199
- func_results_df.at[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_recall"] = recall
200
 
201
  # Add values for leaderboard averaging
202
  metrics_sum['accuracy'][aspect].append(accuracy)
@@ -209,7 +210,7 @@ def save_function_output(model_output, method_name, func_results_path="/home/use
209
  for aspect in ['BP', 'CC', 'MF']:
210
  if metrics_sum[metric][aspect]:
211
  aspect_average = sum(metrics_sum[metric][aspect]) / len(metrics_sum[metric][aspect])
212
- leaderboard_df.at[0, f"func_{aspect}_{metric}"] = aspect_average
213
 
214
  # Calculate overall average if each aspect has entries
215
  if all(metrics_sum[metric][aspect] for aspect in ['BP', 'CC', 'MF']):
@@ -217,7 +218,7 @@ def save_function_output(model_output, method_name, func_results_path="/home/use
217
  sum(metrics_sum[metric][aspect]) / len(metrics_sum[metric][aspect])
218
  for aspect in ['BP', 'CC', 'MF']
219
  ) / 3
220
- leaderboard_df.at[0, f"func_Ave_{metric}"] = overall_average
221
 
222
  # Save updated DataFrames to CSV
223
  func_results_df.to_csv(func_results_path, index=False)
@@ -225,69 +226,91 @@ def save_function_output(model_output, method_name, func_results_path="/home/use
225
 
226
  return 0
227
 
228
- def save_family_output(model_output, method_name, leaderboard_path="/home/user/app/src/data/leaderboard_results.csv", family_results_path="/home/user/app/src/data/family_results.csv"):
 
 
 
 
 
 
229
  # Load or initialize the DataFrames
230
  if os.path.exists(leaderboard_path):
231
  leaderboard_df = pd.read_csv(leaderboard_path)
232
  else:
233
- leaderboard_df = pd.DataFrame(columns=['Method'])
 
234
 
235
  if os.path.exists(family_results_path):
236
  family_results_df = pd.read_csv(family_results_path)
237
  else:
238
- family_results_df = pd.DataFrame(columns=['Method'])
 
239
 
240
- # Ensure the method_name row exists in the leaderboard results
241
- if method_name not in leaderboard_df['Method'].values:
242
- leaderboard_df = pd.concat([leaderboard_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True)
243
-
244
- # Ensure the method_name row exists in family results
245
  if method_name not in family_results_df['Method'].values:
246
- family_results_df = pd.concat([family_results_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True)
 
 
 
 
 
 
 
 
247
 
248
  # Iterate through the datasets and metrics
249
  for dataset, metrics in model_output.items():
250
  for metric, values in metrics.items():
251
  # Calculate the average for each metric in leaderboard results
252
  avg_value = sum(values) / len(values) if values else None
253
- leaderboard_df.at[leaderboard_df['Method'] == method_name, f"fam_{dataset}_{metric}_ave"] = avg_value
254
 
255
  # Save each fold result for family results
256
  for i, value in enumerate(values):
257
- family_results_df.at[family_results_df['Method'] == method_name, f"{dataset}_{metric}_{i}"] = value
258
 
259
  # Save updated DataFrames to CSV
260
  leaderboard_df.to_csv(leaderboard_path, index=False)
261
  family_results_df.to_csv(family_results_path, index=False)
262
 
263
- return leaderboard_df, family_results_df
 
264
 
265
- def save_affinity_output(model_output, method_name, leaderboard_path="/home/user/app/src/data/leaderboard_results.csv", affinity_results_path="/home/user/app/src/data/affinity_results.csv"):
266
- # Load or initialize DataFrames
 
 
 
 
 
267
  if os.path.exists(leaderboard_path):
268
  leaderboard_df = pd.read_csv(leaderboard_path)
269
  else:
270
- leaderboard_df = pd.DataFrame(columns=['Method'])
 
271
 
272
  if os.path.exists(affinity_results_path):
273
  affinity_results_df = pd.read_csv(affinity_results_path)
274
  else:
275
- affinity_results_df = pd.DataFrame(columns=['Method'])
 
276
 
277
- # Ensure the method_name row exists in the leaderboard results
278
- if method_name not in leaderboard_df['Method'].values:
279
- leaderboard_df = pd.concat([leaderboard_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True)
280
-
281
- # Ensure the method_name row exists in affinity results
282
  if method_name not in affinity_results_df['Method'].values:
283
- affinity_results_df = pd.concat([affinity_results_df, pd.DataFrame({'Method': [method_name]})], ignore_index=True)
 
 
 
 
 
 
 
 
284
 
285
  # Process 'summary' section for leaderboard results
286
  summary = model_output.get('summary', {})
287
  if summary:
288
- leaderboard_df.at[leaderboard_df['Method'] == method_name, 'aff_mse_ave'] = summary.get('val_mse_error')
289
- leaderboard_df.at[leaderboard_df['Method'] == method_name, 'aff_mae_ave'] = summary.get('val_mae_error')
290
- leaderboard_df.at[leaderboard_df['Method'] == method_name, 'aff_corr_ave'] = summary.get('validation_corr')
291
 
292
  # Process 'detail' section for affinity results
293
  detail = model_output.get('detail', {})
@@ -295,11 +318,11 @@ def save_affinity_output(model_output, method_name, leaderboard_path="/home/user
295
  # Save each 10-fold cross-validation result for mse, mae, and corr
296
  for i in range(10):
297
  if 'val_mse_errors' in detail:
298
- affinity_results_df.at[affinity_results_df['Method'] == method_name, f"mse_{i}"] = detail['val_mse_errors'][i]
299
  if 'val_mae_errors' in detail:
300
- affinity_results_df.at[affinity_results_df['Method'] == method_name, f"mae_{i}"] = detail['val_mae_errors'][i]
301
  if 'validation_corrs' in detail:
302
- affinity_results_df.at[affinity_results_df['Method'] == method_name, f"corr_{i}"] = detail['validation_corrs'][i]
303
 
304
  # Save updated DataFrames to CSV
305
  leaderboard_df.to_csv(leaderboard_path, index=False)
 
76
  return 0
77
 
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  def save_similarity_output(
80
  output_dict,
81
  method_name,
82
  leaderboard_path="/tmp/leaderboard_results.csv",
83
  similarity_path="/tmp/similarity_results.csv",
 
84
  ):
85
  # Load or initialize the DataFrames
86
  if os.path.exists(leaderboard_path):
 
140
  similarity_df.loc[similarity_df['Method'] == method_name, f"{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
141
  leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"sim_{dataset}_Ave_pvalue"] = averages[f"{dataset}_Ave_pvalue"]
142
 
143
+ leaderboard_df.to_csv(leaderboard_path, index=False)
144
+ similarity_df.to_csv(similarity_path, index=False)
145
 
146
  return 0
147
 
 
 
 
 
 
 
148
 
149
+ def save_function_output(
150
+ model_output,
151
+ method_name,
152
+ func_results_path="/tmp/function_results.csv",
153
+ leaderboard_path="/tmp/leaderboard_results.csv"
154
+ ):
155
+ # Load or initialize the DataFrames
156
  if os.path.exists(leaderboard_path):
157
  leaderboard_df = pd.read_csv(leaderboard_path)
158
  else:
159
+ print("Leaderboard file not found!")
160
+ return -1
161
+
162
+ if os.path.exists(func_results_path):
163
+ func_results_df = pd.read_csv(func_results_path)
164
+ else:
165
+ print("Function file not found!")
166
+ return -1
167
 
 
168
  if method_name not in func_results_df['Method'].values:
169
+ # Create a new row for the method with default values
170
+ new_row = {col: None for col in func_results_df.columns}
171
+ new_row['Method'] = method_name
172
+ func_results_df = pd.concat([func_results_df, pd.DataFrame([new_row])], ignore_index=True)
173
+
174
+ if method_name not in leaderboard_df['Method'].values:
175
+ new_row = {col: None for col in leaderboard_df.columns}
176
+ new_row['Method'] = method_name
177
+ leaderboard_df = pd.concat([leaderboard_df, pd.DataFrame([new_row])], ignore_index=True)
178
+
179
 
180
  # Storage for averaging in leaderboard results
181
  metrics_sum = {
 
194
  aspect, dataset1, dataset2 = key.split('_')
195
 
196
  # Save each metric to function_results under its respective column
197
+ func_results_df.loc[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_accuracy"] = accuracy
198
+ func_results_df.loc[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_F1"] = f1
199
+ func_results_df.loc[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_precision"] = precision
200
+ func_results_df.loc[func_results_df['Method'] == method_name, f"{aspect}_{dataset1}_{dataset2}_recall"] = recall
201
 
202
  # Add values for leaderboard averaging
203
  metrics_sum['accuracy'][aspect].append(accuracy)
 
210
  for aspect in ['BP', 'CC', 'MF']:
211
  if metrics_sum[metric][aspect]:
212
  aspect_average = sum(metrics_sum[metric][aspect]) / len(metrics_sum[metric][aspect])
213
+ leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"func_{aspect}_{metric}"] = aspect_average
214
 
215
  # Calculate overall average if each aspect has entries
216
  if all(metrics_sum[metric][aspect] for aspect in ['BP', 'CC', 'MF']):
 
218
  sum(metrics_sum[metric][aspect]) / len(metrics_sum[metric][aspect])
219
  for aspect in ['BP', 'CC', 'MF']
220
  ) / 3
221
+ leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"func_Ave_{metric}"] = overall_average
222
 
223
  # Save updated DataFrames to CSV
224
  func_results_df.to_csv(func_results_path, index=False)
 
226
 
227
  return 0
228
 
229
+
230
+ def save_family_output(
231
+ model_output,
232
+ method_name,
233
+ leaderboard_path="/tmp/leaderboard_results.csv",
234
+ family_results_path="/tmp/family_results.csv"
235
+ ):
236
  # Load or initialize the DataFrames
237
  if os.path.exists(leaderboard_path):
238
  leaderboard_df = pd.read_csv(leaderboard_path)
239
  else:
240
+ print("Leaderboard file not found!")
241
+ return -1
242
 
243
  if os.path.exists(family_results_path):
244
  family_results_df = pd.read_csv(family_results_path)
245
  else:
246
+ print("Family file not found!")
247
+ return -1
248
 
 
 
 
 
 
249
  if method_name not in family_results_df['Method'].values:
250
+ # Create a new row for the method with default values
251
+ new_row = {col: None for col in family_results_df.columns}
252
+ new_row['Method'] = method_name
253
+ family_results_df = pd.concat([family_results_df, pd.DataFrame([new_row])], ignore_index=True)
254
+
255
+ if method_name not in leaderboard_df['Method'].values:
256
+ new_row = {col: None for col in leaderboard_df.columns}
257
+ new_row['Method'] = method_name
258
+ leaderboard_df = pd.concat([leaderboard_df, pd.DataFrame([new_row])], ignore_index=True)
259
 
260
  # Iterate through the datasets and metrics
261
  for dataset, metrics in model_output.items():
262
  for metric, values in metrics.items():
263
  # Calculate the average for each metric in leaderboard results
264
  avg_value = sum(values) / len(values) if values else None
265
+ leaderboard_df.loc[leaderboard_df['Method'] == method_name, f"fam_{dataset}_{metric}_ave"] = avg_value
266
 
267
  # Save each fold result for family results
268
  for i, value in enumerate(values):
269
+ family_results_df.loc[family_results_df['Method'] == method_name, f"{dataset}_{metric}_{i}"] = value
270
 
271
  # Save updated DataFrames to CSV
272
  leaderboard_df.to_csv(leaderboard_path, index=False)
273
  family_results_df.to_csv(family_results_path, index=False)
274
 
275
+ return 0
276
+
277
 
278
+ def save_affinity_output(
279
+ model_output,
280
+ method_name,
281
+ leaderboard_path="/tmp/leaderboard_results.csv",
282
+ affinity_results_path="/tmp/affinity_results.csv"
283
+ ):
284
+ # Load or initialize the DataFrames
285
  if os.path.exists(leaderboard_path):
286
  leaderboard_df = pd.read_csv(leaderboard_path)
287
  else:
288
+ print("Leaderboard file not found!")
289
+ return -1
290
 
291
  if os.path.exists(affinity_results_path):
292
  affinity_results_df = pd.read_csv(affinity_results_path)
293
  else:
294
+ print("Affinity file not found!")
295
+ return -1
296
 
 
 
 
 
 
297
  if method_name not in affinity_results_df['Method'].values:
298
+ # Create a new row for the method with default values
299
+ new_row = {col: None for col in affinity_results_df.columns}
300
+ new_row['Method'] = method_name
301
+ affinity_results_df = pd.concat([affinity_results_df, pd.DataFrame([new_row])], ignore_index=True)
302
+
303
+ if method_name not in leaderboard_df['Method'].values:
304
+ new_row = {col: None for col in leaderboard_df.columns}
305
+ new_row['Method'] = method_name
306
+ leaderboard_df = pd.concat([leaderboard_df, pd.DataFrame([new_row])], ignore_index=True)
307
 
308
  # Process 'summary' section for leaderboard results
309
  summary = model_output.get('summary', {})
310
  if summary:
311
+ leaderboard_df.loc[leaderboard_df['Method'] == method_name, 'aff_mse_ave'] = summary.get('val_mse_error')
312
+ leaderboard_df.loc[leaderboard_df['Method'] == method_name, 'aff_mae_ave'] = summary.get('val_mae_error')
313
+ leaderboard_df.loc[leaderboard_df['Method'] == method_name, 'aff_corr_ave'] = summary.get('validation_corr')
314
 
315
  # Process 'detail' section for affinity results
316
  detail = model_output.get('detail', {})
 
318
  # Save each 10-fold cross-validation result for mse, mae, and corr
319
  for i in range(10):
320
  if 'val_mse_errors' in detail:
321
+ affinity_results_df.loc[affinity_results_df['Method'] == method_name, f"mse_{i}"] = detail['val_mse_errors'][i]
322
  if 'val_mae_errors' in detail:
323
+ affinity_results_df.loc[affinity_results_df['Method'] == method_name, f"mae_{i}"] = detail['val_mae_errors'][i]
324
  if 'validation_corrs' in detail:
325
+ affinity_results_df.loc[affinity_results_df['Method'] == method_name, f"corr_{i}"] = detail['validation_corrs'][i]
326
 
327
  # Save updated DataFrames to CSV
328
  leaderboard_df.to_csv(leaderboard_path, index=False)