meg-huggingface
commited on
Commit
·
8cd9975
1
Parent(s):
e6dead6
changing batch size to auto
Browse files- main_backend_harness.py +1 -1
- src/backend/manage_requests.py +32 -4
main_backend_harness.py
CHANGED
@@ -70,7 +70,7 @@ def run_auto_eval():
|
|
70 |
num_fewshot=NUM_FEWSHOT,
|
71 |
local_dir=EVAL_RESULTS_PATH_BACKEND,
|
72 |
results_repo=RESULTS_REPO,
|
73 |
-
batch_size=
|
74 |
device=DEVICE,
|
75 |
no_cache=True,
|
76 |
limit=LIMIT
|
|
|
70 |
num_fewshot=NUM_FEWSHOT,
|
71 |
local_dir=EVAL_RESULTS_PATH_BACKEND,
|
72 |
results_repo=RESULTS_REPO,
|
73 |
+
batch_size='auto',
|
74 |
device=DEVICE,
|
75 |
no_cache=True,
|
76 |
limit=LIMIT
|
src/backend/manage_requests.py
CHANGED
@@ -2,6 +2,7 @@ import glob
|
|
2 |
import json
|
3 |
from dataclasses import dataclass
|
4 |
from typing import Optional
|
|
|
5 |
|
6 |
from huggingface_hub import HfApi, snapshot_download
|
7 |
from src.envs import TOKEN
|
@@ -87,6 +88,7 @@ def get_eval_requests(job_status: list, local_dir: str, hf_repo: str) -> list[Ev
|
|
87 |
for json_filepath in json_files:
|
88 |
with open(json_filepath) as fp:
|
89 |
data = json.load(fp)
|
|
|
90 |
if data["status"] in job_status:
|
91 |
data["json_filepath"] = json_filepath
|
92 |
print(data.items())
|
@@ -96,6 +98,30 @@ def get_eval_requests(job_status: list, local_dir: str, hf_repo: str) -> list[Ev
|
|
96 |
return eval_requests
|
97 |
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
def check_completed_evals(
|
100 |
api: HfApi,
|
101 |
hf_repo: str,
|
@@ -126,7 +152,9 @@ def check_completed_evals(
|
|
126 |
)
|
127 |
set_eval_request(api, eval_request, completed_status, hf_repo, local_dir)
|
128 |
else:
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
|
|
|
2 |
import json
|
3 |
from dataclasses import dataclass
|
4 |
from typing import Optional
|
5 |
+
from datetime import datetime, timezone
|
6 |
|
7 |
from huggingface_hub import HfApi, snapshot_download
|
8 |
from src.envs import TOKEN
|
|
|
88 |
for json_filepath in json_files:
|
89 |
with open(json_filepath) as fp:
|
90 |
data = json.load(fp)
|
91 |
+
# TODO: isn't job_status the string "RUNNING"?
|
92 |
if data["status"] in job_status:
|
93 |
data["json_filepath"] = json_filepath
|
94 |
print(data.items())
|
|
|
98 |
return eval_requests
|
99 |
|
100 |
|
101 |
+
def check_set_to_fail(eval_request: EvalRequest):
|
102 |
+
"""Checks how long a pending eval request has been running"""
|
103 |
+
json_filepath = eval_request.json_filepath
|
104 |
+
|
105 |
+
with open(json_filepath) as fp:
|
106 |
+
data = json.load(fp)
|
107 |
+
|
108 |
+
status = data["status"]
|
109 |
+
if status == "PENDING" or status == "RUNNING":
|
110 |
+
time_format = "%Y-%m-%dT%H:%M:%SZ"
|
111 |
+
submitted_time_str = data["submitted_time"]
|
112 |
+
submitted_time_naive = datetime.strptime(submitted_time_str, time_format)
|
113 |
+
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
114 |
+
submitted_time = submitted_time_naive.replace(tzinfo=current_time.tzinfo)
|
115 |
+
difference = current_time - submitted_time
|
116 |
+
diff_seconds = difference.total_seconds()
|
117 |
+
# If it's been running for less than 2 hours, leave it alone.
|
118 |
+
if diff_seconds < 7200:
|
119 |
+
return False
|
120 |
+
else:
|
121 |
+
return True
|
122 |
+
return True
|
123 |
+
|
124 |
+
|
125 |
def check_completed_evals(
|
126 |
api: HfApi,
|
127 |
hf_repo: str,
|
|
|
152 |
)
|
153 |
set_eval_request(api, eval_request, completed_status, hf_repo, local_dir)
|
154 |
else:
|
155 |
+
set_to_fail = check_set_to_fail(eval_request)
|
156 |
+
if set_to_fail:
|
157 |
+
logger.info(
|
158 |
+
f"No result file found for {model} setting it to {failed_status}"
|
159 |
+
)
|
160 |
+
set_eval_request(api, eval_request, failed_status, hf_repo, local_dir)
|