Ahmet Kaan Sever commited on
Commit
8930e56
·
1 Parent(s): 5634da8

Added api for returning hardware and added some logging and error handling for task evaluation.

Browse files
src/deepeval/deepeval_task_manager.py CHANGED
@@ -60,11 +60,15 @@ class DeepEvalTaskManager:
60
  """Execute validated tasks in order."""
61
  results = {}
62
  for task_name, task_method in self.tasks_to_run.items():
63
- print("Running task: ", task_name)
64
- task_enum = getattr(Task, task_name)
65
- task_value = task_enum.value
66
- results[task_value] = task_method() # Call the stored method reference
67
-
 
 
 
 
68
  return results
69
 
70
  def sentiment_analysis_tr(self):
 
60
  """Execute validated tasks in order."""
61
  results = {}
62
  for task_name, task_method in self.tasks_to_run.items():
63
+ try:
64
+ print("Running task: ", task_name)
65
+ task_enum = getattr(Task, task_name)
66
+ task_value = task_enum.value
67
+ results[task_value] = task_method() # Call the stored method reference
68
+ except Exception as e:
69
+ print(f"Error At Task: {task_name} - {e}")
70
+ continue
71
+ print("All tasks completed.")
72
  return results
73
 
74
  def sentiment_analysis_tr(self):
svc/router.py CHANGED
@@ -48,6 +48,11 @@ async def deep_eval_status():
48
  #Return running with 200 status code
49
  return {"status": "running"}
50
 
 
 
 
 
 
51
  @router.post("/chat", response_model=TaskResponse)
52
  def inference_model(request: LMHarnessTaskRequest, username: str = Depends(get_current_user)):
53
  logger.info(request)
@@ -84,6 +89,8 @@ def inference_model(request: LMHarnessTaskRequest, username: str = Depends(get_c
84
 
85
  @router.post("/deepeval/eval", response_model=TaskResponse)
86
  async def deep_eval_suite(request: DeepEvalSuiteRequest):
 
 
87
  des = DeepEvalTaskManager(request.model_name, request.tasks)
88
  start_time = time()
89
  results = des.run_tasks() #TODO: format should be different. Check metunlp/results repo for the correct format
@@ -117,13 +124,24 @@ async def deep_eval_suite(request: DeepEvalSuiteRequest):
117
 
118
  json_results = json.dumps(tbr_dict)
119
 
120
- #Free up VRAM
121
- torch.cuda.empty_cache()
122
-
123
- #Free up RAM
124
- des = None
125
- gc.collect()
126
-
127
  return TaskResponse(results=json_results)
128
 
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  #Return running with 200 status code
49
  return {"status": "running"}
50
 
51
+ @router.get("/deepeval-hardware")
52
+ def hardware_status():
53
+ info = get_gpu_tier()
54
+ return info
55
+
56
  @router.post("/chat", response_model=TaskResponse)
57
  def inference_model(request: LMHarnessTaskRequest, username: str = Depends(get_current_user)):
58
  logger.info(request)
 
89
 
90
  @router.post("/deepeval/eval", response_model=TaskResponse)
91
  async def deep_eval_suite(request: DeepEvalSuiteRequest):
92
+ #Free up VRAM
93
+ torch.cuda.empty_cache()
94
  des = DeepEvalTaskManager(request.model_name, request.tasks)
95
  start_time = time()
96
  results = des.run_tasks() #TODO: format should be different. Check metunlp/results repo for the correct format
 
124
 
125
  json_results = json.dumps(tbr_dict)
126
 
 
 
 
 
 
 
 
127
  return TaskResponse(results=json_results)
128
 
129
 
130
+ def get_gpu_tier():
131
+ if not torch.cuda.is_available():
132
+ return {"gpu": "CPU", "tier": "cpu"}
133
+
134
+ gpu_name = torch.cuda.get_device_name(0).lower()
135
+
136
+ # Normalize GPU model to your custom tier system
137
+ if "t4" in gpu_name:
138
+ # You can improve this by checking memory or other context
139
+ return {"gpu": "Tesla T4", "tier": "t4-small"}
140
+ elif "l4" in gpu_name:
141
+ return {"gpu": "NVIDIA L4", "tier": "l4x1"}
142
+ elif "l40s" in gpu_name:
143
+ return {"gpu": "NVIDIA L40S", "tier": "l40sx1"}
144
+ elif "a10g" in gpu_name:
145
+ return {"gpu": "NVIDIA A10G", "tier": "a10g"}
146
+ else:
147
+ return {"gpu": gpu_name, "tier": "unknown"}