backend

Runtime error

App Files Files Community

meg-huggingface commited on Jul 5, 2024

Commit

8cd9975

1 Parent(s): e6dead6

changing batch size to auto

Browse files

Files changed (2) hide show

main_backend_harness.py +1 -1
src/backend/manage_requests.py +32 -4

main_backend_harness.py CHANGED Viewed

@@ -70,7 +70,7 @@ def run_auto_eval():
         num_fewshot=NUM_FEWSHOT,
         local_dir=EVAL_RESULTS_PATH_BACKEND,
         results_repo=RESULTS_REPO,
-        batch_size=1,
         device=DEVICE,
         no_cache=True,
         limit=LIMIT

         num_fewshot=NUM_FEWSHOT,
         local_dir=EVAL_RESULTS_PATH_BACKEND,
         results_repo=RESULTS_REPO,
+        batch_size='auto',
         device=DEVICE,
         no_cache=True,
         limit=LIMIT

src/backend/manage_requests.py CHANGED Viewed

@@ -2,6 +2,7 @@ import glob
 import json
 from dataclasses import dataclass
 from typing import Optional
 from huggingface_hub import HfApi, snapshot_download
 from src.envs import TOKEN
@@ -87,6 +88,7 @@ def get_eval_requests(job_status: list, local_dir: str, hf_repo: str) -> list[Ev
     for json_filepath in json_files:
         with open(json_filepath) as fp:
             data = json.load(fp)
         if data["status"] in job_status:
             data["json_filepath"] = json_filepath
             print(data.items())
@@ -96,6 +98,30 @@ def get_eval_requests(job_status: list, local_dir: str, hf_repo: str) -> list[Ev
     return eval_requests
 def check_completed_evals(
     api: HfApi,
     hf_repo: str,
@@ -126,7 +152,9 @@ def check_completed_evals(
             )
             set_eval_request(api, eval_request, completed_status, hf_repo, local_dir)
         else:
-            logger.info(
-                f"No result file found for {model} setting it to {failed_status}"
-            )
-            set_eval_request(api, eval_request, failed_status, hf_repo, local_dir)

 import json
 from dataclasses import dataclass
 from typing import Optional
+from datetime import datetime, timezone
 from huggingface_hub import HfApi, snapshot_download
 from src.envs import TOKEN
     for json_filepath in json_files:
         with open(json_filepath) as fp:
             data = json.load(fp)
+        # TODO: isn't job_status the string "RUNNING"?
         if data["status"] in job_status:
             data["json_filepath"] = json_filepath
             print(data.items())
     return eval_requests
+def check_set_to_fail(eval_request: EvalRequest):
+    """Checks how long a pending eval request has been running"""
+    json_filepath = eval_request.json_filepath
+    with open(json_filepath) as fp:
+        data = json.load(fp)
+    status = data["status"]
+    if status == "PENDING" or status == "RUNNING":
+        time_format = "%Y-%m-%dT%H:%M:%SZ"
+        submitted_time_str = data["submitted_time"]
+        submitted_time_naive = datetime.strptime(submitted_time_str, time_format)
+        current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+        submitted_time = submitted_time_naive.replace(tzinfo=current_time.tzinfo)
+        difference = current_time - submitted_time
+        diff_seconds = difference.total_seconds()
+        # If it's been running for less than 2 hours, leave it alone.
+        if diff_seconds < 7200:
+            return False
+        else:
+            return True
+    return True
 def check_completed_evals(
     api: HfApi,
     hf_repo: str,
             )
             set_eval_request(api, eval_request, completed_status, hf_repo, local_dir)
         else:
+            set_to_fail = check_set_to_fail(eval_request)
+            if set_to_fail:
+                logger.info(
+                    f"No result file found for {model} setting it to {failed_status}"
+                )
+                set_eval_request(api, eval_request, failed_status, hf_repo, local_dir)