Spaces:
Running
on
L4
Running
on
L4
Ahmet Kaan Sever
commited on
Commit
Β·
24a5ac7
1
Parent(s):
db96c4e
Now backend writes to results file. Used threading for this.
Browse files- requirements.txt +1 -0
- svc/router.py +61 -39
requirements.txt
CHANGED
@@ -7,5 +7,6 @@ python-jose
|
|
7 |
python-multipart
|
8 |
deepeval
|
9 |
--extra-index-url https://download.pytorch.org/whl/cu113
|
|
|
10 |
torch
|
11 |
sentencepiece
|
|
|
7 |
python-multipart
|
8 |
deepeval
|
9 |
--extra-index-url https://download.pytorch.org/whl/cu113
|
10 |
+
huggingface-hub>=0.29.1
|
11 |
torch
|
12 |
sentencepiece
|
svc/router.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from fastapi import APIRouter, HTTPException, Depends
|
2 |
import logging
|
3 |
|
@@ -8,11 +9,13 @@ from auth.authentication import get_current_user, create_access_token
|
|
8 |
from dotenv import load_dotenv
|
9 |
import os
|
10 |
import json
|
|
|
11 |
from src.deepeval.deepeval_task_manager import DeepEvalTaskManager
|
12 |
import torch
|
13 |
import gc
|
14 |
from time import time
|
15 |
from huggingface_hub import HfApi, ModelInfo
|
|
|
16 |
|
17 |
|
18 |
router = APIRouter()
|
@@ -25,7 +28,6 @@ HF_TOKEN = os.getenv("HF_TOKEN")
|
|
25 |
|
26 |
# Or configure a HfApi client
|
27 |
hf_api = HfApi(
|
28 |
-
endpoint="https://huggingface.co", # Can be a Private Hub endpoint.
|
29 |
token=HF_TOKEN, # Token is not persisted on the machine.
|
30 |
)
|
31 |
|
@@ -87,45 +89,65 @@ def inference_model(request: LMHarnessTaskRequest, username: str = Depends(get_c
|
|
87 |
return TaskResponse(results=dumped)
|
88 |
|
89 |
|
|
|
|
|
90 |
@router.post("/deepeval/eval", response_model=TaskResponse)
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
|
130 |
|
131 |
def get_gpu_tier():
|
|
|
1 |
+
import datetime
|
2 |
from fastapi import APIRouter, HTTPException, Depends
|
3 |
import logging
|
4 |
|
|
|
9 |
from dotenv import load_dotenv
|
10 |
import os
|
11 |
import json
|
12 |
+
from pathlib import Path
|
13 |
from src.deepeval.deepeval_task_manager import DeepEvalTaskManager
|
14 |
import torch
|
15 |
import gc
|
16 |
from time import time
|
17 |
from huggingface_hub import HfApi, ModelInfo
|
18 |
+
import threading
|
19 |
|
20 |
|
21 |
router = APIRouter()
|
|
|
28 |
|
29 |
# Or configure a HfApi client
|
30 |
hf_api = HfApi(
|
|
|
31 |
token=HF_TOKEN, # Token is not persisted on the machine.
|
32 |
)
|
33 |
|
|
|
89 |
return TaskResponse(results=dumped)
|
90 |
|
91 |
|
92 |
+
|
93 |
+
|
94 |
@router.post("/deepeval/eval", response_model=TaskResponse)
|
95 |
+
def deep_eval_suite(request: DeepEvalSuiteRequest):
|
96 |
+
def run_in_background():
|
97 |
+
try:
|
98 |
+
torch.cuda.empty_cache()
|
99 |
+
des = DeepEvalTaskManager(request.model_name, request.tasks)
|
100 |
+
|
101 |
+
start_time = time()
|
102 |
+
results = des.run_tasks()
|
103 |
+
end_time = time()
|
104 |
+
duration = round(end_time - start_time, 2)
|
105 |
+
|
106 |
+
model_info: ModelInfo = hf_api.model_info(request.model_name)
|
107 |
+
|
108 |
+
config = {
|
109 |
+
"model_source": "hf",
|
110 |
+
"num_fewshot": 0,
|
111 |
+
"batch_size": 8,
|
112 |
+
"device": "cuda:0",
|
113 |
+
"model_dtype": "torch.float16",
|
114 |
+
"model_name": request.model_name,
|
115 |
+
"model_sha": model_info.sha,
|
116 |
+
}
|
117 |
+
|
118 |
+
final_results = {
|
119 |
+
"results": results,
|
120 |
+
"config": config,
|
121 |
+
"total_evaluation_time_seconds": duration,
|
122 |
+
"start_time": start_time,
|
123 |
+
"end_time": end_time
|
124 |
+
}
|
125 |
+
|
126 |
+
# Save and upload
|
127 |
+
dumped = json.dumps(final_results, indent=2)
|
128 |
+
path = Path("/tmp", request.model_name, f"results_{datetime.now()}.json")
|
129 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
130 |
+
path.write_text(dumped)
|
131 |
+
|
132 |
+
RESULTS_REPO = "metunlp/results"
|
133 |
+
hf_api.upload_file(
|
134 |
+
path_or_fileobj=path,
|
135 |
+
path_in_repo=path.relative_to("/tmp").as_posix(),
|
136 |
+
repo_id=RESULTS_REPO,
|
137 |
+
repo_type="dataset",
|
138 |
+
)
|
139 |
+
|
140 |
+
logger.info(f"β
Uploaded results to HF Hub for {request.model_name}")
|
141 |
+
|
142 |
+
except Exception as e:
|
143 |
+
logger.exception(f"β Background evaluation failed: {e}")
|
144 |
+
|
145 |
+
# π Start evaluation in background
|
146 |
+
threading.Thread(target=run_in_background, daemon=True).start()
|
147 |
+
|
148 |
+
# β
Immediately respond
|
149 |
+
return TaskResponse(results="π Evaluation started in background.")
|
150 |
+
|
151 |
|
152 |
|
153 |
def get_gpu_tier():
|