Spaces:

metunlp
/

model-eval-be

Paused

Ahmet Kaan Sever commited on 10 days ago

Commit

8930e56

1 Parent(s): 5634da8

Added api for returning hardware and added some logging and error handling for task evaluation.

Files changed (2) hide show

src/deepeval/deepeval_task_manager.py CHANGED Viewed

@@ -60,11 +60,15 @@ class DeepEvalTaskManager:
         """Execute validated tasks in order."""
         results = {}
         for task_name, task_method in self.tasks_to_run.items():
-            print("Running task: ", task_name)
-            task_enum = getattr(Task, task_name)
-            task_value = task_enum.value
-            results[task_value] = task_method()  # Call the stored method reference
         return results
     def sentiment_analysis_tr(self):

         """Execute validated tasks in order."""
         results = {}
         for task_name, task_method in self.tasks_to_run.items():
+            try:
+                print("Running task: ", task_name)
+                task_enum = getattr(Task, task_name)
+                task_value = task_enum.value
+                results[task_value] = task_method()  # Call the stored method reference
+            except Exception as e:
+                print(f"Error At Task: {task_name} - {e}")
+                continue
+        print("All tasks completed.")
         return results
     def sentiment_analysis_tr(self):

svc/router.py CHANGED Viewed

@@ -48,6 +48,11 @@ async def deep_eval_status():
     #Return running with 200 status code
     return {"status": "running"}
 @router.post("/chat",  response_model=TaskResponse)
 def inference_model(request: LMHarnessTaskRequest, username: str = Depends(get_current_user)):
     logger.info(request)
@@ -84,6 +89,8 @@ def inference_model(request: LMHarnessTaskRequest, username: str = Depends(get_c
 @router.post("/deepeval/eval", response_model=TaskResponse)
 async def deep_eval_suite(request: DeepEvalSuiteRequest):
     des = DeepEvalTaskManager(request.model_name, request.tasks)
     start_time = time()
     results = des.run_tasks() #TODO: format should be different. Check metunlp/results repo for the correct format
@@ -117,13 +124,24 @@ async def deep_eval_suite(request: DeepEvalSuiteRequest):
     json_results = json.dumps(tbr_dict)
-    #Free up VRAM
-    torch.cuda.empty_cache()
-    #Free up RAM
-    des = None
-    gc.collect()
     return TaskResponse(results=json_results)

     #Return running with 200 status code
     return {"status": "running"}
+@router.get("/deepeval-hardware")
+def hardware_status():
+    info = get_gpu_tier()
+    return info
 @router.post("/chat",  response_model=TaskResponse)
 def inference_model(request: LMHarnessTaskRequest, username: str = Depends(get_current_user)):
     logger.info(request)
 @router.post("/deepeval/eval", response_model=TaskResponse)
 async def deep_eval_suite(request: DeepEvalSuiteRequest):
+    #Free up VRAM
+    torch.cuda.empty_cache()
     des = DeepEvalTaskManager(request.model_name, request.tasks)
     start_time = time()
     results = des.run_tasks() #TODO: format should be different. Check metunlp/results repo for the correct format
     json_results = json.dumps(tbr_dict)
     return TaskResponse(results=json_results)
+def get_gpu_tier():
+    if not torch.cuda.is_available():
+        return {"gpu": "CPU", "tier": "cpu"}
+    gpu_name = torch.cuda.get_device_name(0).lower()
+    # Normalize GPU model to your custom tier system
+    if "t4" in gpu_name:
+        # You can improve this by checking memory or other context
+        return {"gpu": "Tesla T4", "tier": "t4-small"}
+    elif "l4" in gpu_name:
+        return {"gpu": "NVIDIA L4", "tier": "l4x1"}
+    elif "l40s" in gpu_name:
+        return {"gpu": "NVIDIA L40S", "tier": "l40sx1"}
+    elif "a10g" in gpu_name:
+        return {"gpu": "NVIDIA A10G", "tier": "a10g"}
+    else:
+        return {"gpu": gpu_name, "tier": "unknown"}