bkb2135 commited on
Commit
12461ea
·
1 Parent(s): 0c772d3

Update utils using prompting-api

Browse files
Files changed (1) hide show
  1. utils.py +6 -12
utils.py CHANGED
@@ -26,8 +26,6 @@ USERNAME = 'login19861986'
26
 
27
  # Initialize wandb with anonymous login
28
  wandb.login(anonymous='must')
29
-
30
- # Your existing code
31
  api = wandb.Api(timeout=600)
32
 
33
  IDENTITIES = {
@@ -138,16 +136,12 @@ def load_downloaded_runs(time, cols=KEYS):
138
  'date-based question answering': 'date_qa',
139
  'question-answering': 'qa',
140
  }
 
141
 
142
- # Fill the missing values in the turn column with 0
143
- df_all['turn'] = df_all['turn'].fillna(0)
144
 
145
  df_all.sort_values(by=['_timestamp'], inplace=True)
146
- # Check if df_all has the task columns
147
- if 'task' in df_all.columns:
148
- df_all.task = df_all.task.apply(lambda x: task_mapping.get(x, x))
149
- else:
150
- df_all['task'] = "Task was not found"
151
 
152
  return df_all
153
 
@@ -229,13 +223,13 @@ def download_runs(time, df_vali):
229
  save_path = f'data/wandb/{row.run_id}.parquet'
230
  # Create the directory if it does not exist
231
  os.makedirs(os.path.dirname(save_path), exist_ok=True)
232
-
233
  if os.path.exists(save_path):
234
  pbar.set_description(f'>> Skipping {row.run_id!r} because file {save_path!r} already exists')
235
  continue
236
 
237
  try:
238
- pbar.set_description(f'* Downloading run {row.run_id!r}')
239
  run = api.run(row.run_path)
240
 
241
  # By default we just download a subset of events (500 most recent)
@@ -418,4 +412,4 @@ def load_state_vars(username=USERNAME, percentile=0.95):
418
 
419
  if __name__ == '__main__':
420
 
421
- pass
 
26
 
27
  # Initialize wandb with anonymous login
28
  wandb.login(anonymous='must')
 
 
29
  api = wandb.Api(timeout=600)
30
 
31
  IDENTITIES = {
 
136
  'date-based question answering': 'date_qa',
137
  'question-answering': 'qa',
138
  }
139
+ df_all.task = df_all.task.apply(lambda x: task_mapping.get(x, x))
140
 
141
+ # Runs which do not have a turn field are imputed to be turn zero (single turn)
142
+ df_all.turn.fillna(0, inplace=True)
143
 
144
  df_all.sort_values(by=['_timestamp'], inplace=True)
 
 
 
 
 
145
 
146
  return df_all
147
 
 
223
  save_path = f'data/wandb/{row.run_id}.parquet'
224
  # Create the directory if it does not exist
225
  os.makedirs(os.path.dirname(save_path), exist_ok=True)
226
+
227
  if os.path.exists(save_path):
228
  pbar.set_description(f'>> Skipping {row.run_id!r} because file {save_path!r} already exists')
229
  continue
230
 
231
  try:
232
+ pbar.set_description(f'* Downloading run {row.run_id!r}', flush=True)
233
  run = api.run(row.run_path)
234
 
235
  # By default we just download a subset of events (500 most recent)
 
412
 
413
  if __name__ == '__main__':
414
 
415
+ pass