Spaces:

metunlp
/

model-eval-be

Running on L4

App Files Files Community

Ahmet Kaan Sever commited on 10 days ago

Commit

24a5ac7

1 Parent(s): db96c4e

Now backend writes to results file. Used threading for this.

Browse files

Files changed (2) hide show

requirements.txt +1 -0
svc/router.py +61 -39

requirements.txt CHANGED Viewed

@@ -7,5 +7,6 @@ python-jose
 python-multipart
 deepeval
 --extra-index-url https://download.pytorch.org/whl/cu113
 torch
 sentencepiece

 python-multipart
 deepeval
 --extra-index-url https://download.pytorch.org/whl/cu113
+huggingface-hub>=0.29.1
 torch
 sentencepiece

svc/router.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from fastapi import APIRouter, HTTPException, Depends
 import logging
@@ -8,11 +9,13 @@ from auth.authentication import get_current_user, create_access_token
 from dotenv import load_dotenv
 import os
 import json
 from src.deepeval.deepeval_task_manager import DeepEvalTaskManager
 import torch
 import gc
 from time import time
 from huggingface_hub import HfApi, ModelInfo
 router = APIRouter()
@@ -25,7 +28,6 @@ HF_TOKEN = os.getenv("HF_TOKEN")
 # Or configure a HfApi client
 hf_api = HfApi(
-    endpoint="https://huggingface.co", # Can be a Private Hub endpoint.
     token=HF_TOKEN, # Token is not persisted on the machine.
 )
@@ -87,45 +89,65 @@ def inference_model(request: LMHarnessTaskRequest, username: str = Depends(get_c
     return TaskResponse(results=dumped)
 @router.post("/deepeval/eval", response_model=TaskResponse)
-async def deep_eval_suite(request: DeepEvalSuiteRequest):
-    #Free up VRAM
-    torch.cuda.empty_cache()
-    des = DeepEvalTaskManager(request.model_name, request.tasks)
-    start_time = time()
-    results = des.run_tasks() #TODO: format should be different. Check metunlp/results repo for the correct format
-    end_time = time()
-    duration = round(end_time - start_time, 2) # total_evaluation_time_seconds
-    model_info: ModelInfo = hf_api.model_info(request.model_name)
-    config = {
-        "model_source": "hf",
-        "num_fewshot": 0,
-        "batch_size": 8,
-        "batch_sizes": [],
-        "device": "cuda:0", # TODO: take this from requests
-        # "no_cache": true,
-        # "limit": null,
-        # "bootstrap_iters": 100000,
-        # "description_dict": null,
-        "model_dtype": "torch.float16", # TODO: take this from requests
-        "model_name": request.model_name,
-        "model_sha": model_info.sha
-    }
-    tbr_dict = {
-        "results": results,
-        "config": config,
-        "total_evaluation_time_seconds": duration,
-        "start_time": start_time,
-        "end_time": end_time
-    }
-    json_results = json.dumps(tbr_dict)
-    print("Returning:", json_results)
-    return TaskResponse(results=json_results)
 def get_gpu_tier():

+import datetime
 from fastapi import APIRouter, HTTPException, Depends
 import logging
 from dotenv import load_dotenv
 import os
 import json
+from pathlib import Path
 from src.deepeval.deepeval_task_manager import DeepEvalTaskManager
 import torch
 import gc
 from time import time
 from huggingface_hub import HfApi, ModelInfo
+import threading
 router = APIRouter()
 # Or configure a HfApi client
 hf_api = HfApi(
     token=HF_TOKEN, # Token is not persisted on the machine.
 )
     return TaskResponse(results=dumped)
 @router.post("/deepeval/eval", response_model=TaskResponse)
+def deep_eval_suite(request: DeepEvalSuiteRequest):
+    def run_in_background():
+        try:
+            torch.cuda.empty_cache()
+            des = DeepEvalTaskManager(request.model_name, request.tasks)
+            start_time = time()
+            results = des.run_tasks()
+            end_time = time()
+            duration = round(end_time - start_time, 2)
+            model_info: ModelInfo = hf_api.model_info(request.model_name)
+            config = {
+                "model_source": "hf",
+                "num_fewshot": 0,
+                "batch_size": 8,
+                "device": "cuda:0",
+                "model_dtype": "torch.float16",
+                "model_name": request.model_name,
+                "model_sha": model_info.sha,
+            }
+            final_results = {
+                "results": results,
+                "config": config,
+                "total_evaluation_time_seconds": duration,
+                "start_time": start_time,
+                "end_time": end_time
+            }
+            # Save and upload
+            dumped = json.dumps(final_results, indent=2)
+            path = Path("/tmp", request.model_name, f"results_{datetime.now()}.json")
+            path.parent.mkdir(parents=True, exist_ok=True)
+            path.write_text(dumped)
+            RESULTS_REPO = "metunlp/results"
+            hf_api.upload_file(
+                path_or_fileobj=path,
+                path_in_repo=path.relative_to("/tmp").as_posix(),
+                repo_id=RESULTS_REPO,
+                repo_type="dataset",
+            )
+            logger.info(f"✅ Uploaded results to HF Hub for {request.model_name}")
+        except Exception as e:
+            logger.exception(f"❌ Background evaluation failed: {e}")
+    # 🔁 Start evaluation in background
+    threading.Thread(target=run_in_background, daemon=True).start()
+    # ✅ Immediately respond
+    return TaskResponse(results="🚀 Evaluation started in background.")
 def get_gpu_tier():