Ahmet Kaan Sever commited on
Commit
24a5ac7
Β·
1 Parent(s): db96c4e

Now backend writes to results file. Used threading for this.

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -0
  2. svc/router.py +61 -39
requirements.txt CHANGED
@@ -7,5 +7,6 @@ python-jose
7
  python-multipart
8
  deepeval
9
  --extra-index-url https://download.pytorch.org/whl/cu113
 
10
  torch
11
  sentencepiece
 
7
  python-multipart
8
  deepeval
9
  --extra-index-url https://download.pytorch.org/whl/cu113
10
+ huggingface-hub>=0.29.1
11
  torch
12
  sentencepiece
svc/router.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from fastapi import APIRouter, HTTPException, Depends
2
  import logging
3
 
@@ -8,11 +9,13 @@ from auth.authentication import get_current_user, create_access_token
8
  from dotenv import load_dotenv
9
  import os
10
  import json
 
11
  from src.deepeval.deepeval_task_manager import DeepEvalTaskManager
12
  import torch
13
  import gc
14
  from time import time
15
  from huggingface_hub import HfApi, ModelInfo
 
16
 
17
 
18
  router = APIRouter()
@@ -25,7 +28,6 @@ HF_TOKEN = os.getenv("HF_TOKEN")
25
 
26
  # Or configure a HfApi client
27
  hf_api = HfApi(
28
- endpoint="https://huggingface.co", # Can be a Private Hub endpoint.
29
  token=HF_TOKEN, # Token is not persisted on the machine.
30
  )
31
 
@@ -87,45 +89,65 @@ def inference_model(request: LMHarnessTaskRequest, username: str = Depends(get_c
87
  return TaskResponse(results=dumped)
88
 
89
 
 
 
90
  @router.post("/deepeval/eval", response_model=TaskResponse)
91
- async def deep_eval_suite(request: DeepEvalSuiteRequest):
92
- #Free up VRAM
93
- torch.cuda.empty_cache()
94
- des = DeepEvalTaskManager(request.model_name, request.tasks)
95
- start_time = time()
96
- results = des.run_tasks() #TODO: format should be different. Check metunlp/results repo for the correct format
97
- end_time = time()
98
- duration = round(end_time - start_time, 2) # total_evaluation_time_seconds
99
-
100
- model_info: ModelInfo = hf_api.model_info(request.model_name)
101
-
102
- config = {
103
- "model_source": "hf",
104
- "num_fewshot": 0,
105
- "batch_size": 8,
106
- "batch_sizes": [],
107
- "device": "cuda:0", # TODO: take this from requests
108
- # "no_cache": true,
109
- # "limit": null,
110
- # "bootstrap_iters": 100000,
111
- # "description_dict": null,
112
- "model_dtype": "torch.float16", # TODO: take this from requests
113
- "model_name": request.model_name,
114
- "model_sha": model_info.sha
115
- }
116
-
117
- tbr_dict = {
118
- "results": results,
119
- "config": config,
120
- "total_evaluation_time_seconds": duration,
121
- "start_time": start_time,
122
- "end_time": end_time
123
- }
124
-
125
- json_results = json.dumps(tbr_dict)
126
-
127
- print("Returning:", json_results)
128
- return TaskResponse(results=json_results)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
 
131
  def get_gpu_tier():
 
1
+ import datetime
2
  from fastapi import APIRouter, HTTPException, Depends
3
  import logging
4
 
 
9
  from dotenv import load_dotenv
10
  import os
11
  import json
12
+ from pathlib import Path
13
  from src.deepeval.deepeval_task_manager import DeepEvalTaskManager
14
  import torch
15
  import gc
16
  from time import time
17
  from huggingface_hub import HfApi, ModelInfo
18
+ import threading
19
 
20
 
21
  router = APIRouter()
 
28
 
29
  # Or configure a HfApi client
30
  hf_api = HfApi(
 
31
  token=HF_TOKEN, # Token is not persisted on the machine.
32
  )
33
 
 
89
  return TaskResponse(results=dumped)
90
 
91
 
92
+
93
+
94
  @router.post("/deepeval/eval", response_model=TaskResponse)
95
+ def deep_eval_suite(request: DeepEvalSuiteRequest):
96
+ def run_in_background():
97
+ try:
98
+ torch.cuda.empty_cache()
99
+ des = DeepEvalTaskManager(request.model_name, request.tasks)
100
+
101
+ start_time = time()
102
+ results = des.run_tasks()
103
+ end_time = time()
104
+ duration = round(end_time - start_time, 2)
105
+
106
+ model_info: ModelInfo = hf_api.model_info(request.model_name)
107
+
108
+ config = {
109
+ "model_source": "hf",
110
+ "num_fewshot": 0,
111
+ "batch_size": 8,
112
+ "device": "cuda:0",
113
+ "model_dtype": "torch.float16",
114
+ "model_name": request.model_name,
115
+ "model_sha": model_info.sha,
116
+ }
117
+
118
+ final_results = {
119
+ "results": results,
120
+ "config": config,
121
+ "total_evaluation_time_seconds": duration,
122
+ "start_time": start_time,
123
+ "end_time": end_time
124
+ }
125
+
126
+ # Save and upload
127
+ dumped = json.dumps(final_results, indent=2)
128
+ path = Path("/tmp", request.model_name, f"results_{datetime.now()}.json")
129
+ path.parent.mkdir(parents=True, exist_ok=True)
130
+ path.write_text(dumped)
131
+
132
+ RESULTS_REPO = "metunlp/results"
133
+ hf_api.upload_file(
134
+ path_or_fileobj=path,
135
+ path_in_repo=path.relative_to("/tmp").as_posix(),
136
+ repo_id=RESULTS_REPO,
137
+ repo_type="dataset",
138
+ )
139
+
140
+ logger.info(f"βœ… Uploaded results to HF Hub for {request.model_name}")
141
+
142
+ except Exception as e:
143
+ logger.exception(f"❌ Background evaluation failed: {e}")
144
+
145
+ # πŸ” Start evaluation in background
146
+ threading.Thread(target=run_in_background, daemon=True).start()
147
+
148
+ # βœ… Immediately respond
149
+ return TaskResponse(results="πŸš€ Evaluation started in background.")
150
+
151
 
152
 
153
  def get_gpu_tier():