Spaces:
Sleeping
Sleeping
bkb2135
commited on
Commit
·
12461ea
1
Parent(s):
0c772d3
Update utils using prompting-api
Browse files
utils.py
CHANGED
@@ -26,8 +26,6 @@ USERNAME = 'login19861986'
|
|
26 |
|
27 |
# Initialize wandb with anonymous login
|
28 |
wandb.login(anonymous='must')
|
29 |
-
|
30 |
-
# Your existing code
|
31 |
api = wandb.Api(timeout=600)
|
32 |
|
33 |
IDENTITIES = {
|
@@ -138,16 +136,12 @@ def load_downloaded_runs(time, cols=KEYS):
|
|
138 |
'date-based question answering': 'date_qa',
|
139 |
'question-answering': 'qa',
|
140 |
}
|
|
|
141 |
|
142 |
-
#
|
143 |
-
df_all
|
144 |
|
145 |
df_all.sort_values(by=['_timestamp'], inplace=True)
|
146 |
-
# Check if df_all has the task columns
|
147 |
-
if 'task' in df_all.columns:
|
148 |
-
df_all.task = df_all.task.apply(lambda x: task_mapping.get(x, x))
|
149 |
-
else:
|
150 |
-
df_all['task'] = "Task was not found"
|
151 |
|
152 |
return df_all
|
153 |
|
@@ -229,13 +223,13 @@ def download_runs(time, df_vali):
|
|
229 |
save_path = f'data/wandb/{row.run_id}.parquet'
|
230 |
# Create the directory if it does not exist
|
231 |
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
232 |
-
|
233 |
if os.path.exists(save_path):
|
234 |
pbar.set_description(f'>> Skipping {row.run_id!r} because file {save_path!r} already exists')
|
235 |
continue
|
236 |
|
237 |
try:
|
238 |
-
pbar.set_description(f'* Downloading run {row.run_id!r}')
|
239 |
run = api.run(row.run_path)
|
240 |
|
241 |
# By default we just download a subset of events (500 most recent)
|
@@ -418,4 +412,4 @@ def load_state_vars(username=USERNAME, percentile=0.95):
|
|
418 |
|
419 |
if __name__ == '__main__':
|
420 |
|
421 |
-
pass
|
|
|
26 |
|
27 |
# Initialize wandb with anonymous login
|
28 |
wandb.login(anonymous='must')
|
|
|
|
|
29 |
api = wandb.Api(timeout=600)
|
30 |
|
31 |
IDENTITIES = {
|
|
|
136 |
'date-based question answering': 'date_qa',
|
137 |
'question-answering': 'qa',
|
138 |
}
|
139 |
+
df_all.task = df_all.task.apply(lambda x: task_mapping.get(x, x))
|
140 |
|
141 |
+
# Runs which do not have a turn field are imputed to be turn zero (single turn)
|
142 |
+
df_all.turn.fillna(0, inplace=True)
|
143 |
|
144 |
df_all.sort_values(by=['_timestamp'], inplace=True)
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
return df_all
|
147 |
|
|
|
223 |
save_path = f'data/wandb/{row.run_id}.parquet'
|
224 |
# Create the directory if it does not exist
|
225 |
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
226 |
+
|
227 |
if os.path.exists(save_path):
|
228 |
pbar.set_description(f'>> Skipping {row.run_id!r} because file {save_path!r} already exists')
|
229 |
continue
|
230 |
|
231 |
try:
|
232 |
+
pbar.set_description(f'* Downloading run {row.run_id!r}', flush=True)
|
233 |
run = api.run(row.run_path)
|
234 |
|
235 |
# By default we just download a subset of events (500 most recent)
|
|
|
412 |
|
413 |
if __name__ == '__main__':
|
414 |
|
415 |
+
pass
|