Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 1,847 Bytes
ea047ad 3adea5e ea047ad 9562cba ea047ad 1a6cc70 67741f2 ea047ad 67741f2 ea047ad 3adea5e 9562cba ea047ad 67741f2 ea047ad 67741f2 ea047ad 67741f2 3adea5e 67741f2 ea047ad 3adea5e ea047ad 3adea5e ea047ad 3adea5e ea047ad 67741f2 ea047ad |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import os
import subprocess
import asyncio
from pathlib import Path
from yourbench_space.leaderboard_space.env import INIT_MODELS
ON_SPACES = os.environ.get("system") == "spaces"
OUTPUT_DIR = "/data" if ON_SPACES else "."
def create_eval_file(eval_ds_name: str):
task_name = eval_ds_name.replace("/", "_")
template_path = Path("/home/user/app/yourbench_space/lighteval_task/yourbench_task.py")
subprocess.run(["lighteval", "tasks", "create", str(template_path), task_name, eval_ds_name])
async def run_process(args: list) -> dict:
process = await asyncio.create_subprocess_exec(
*args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
await asyncio.wait_for(process.wait(), timeout=180)
stdout = await process.stdout.read()
stderr = await process.stderr.read()
return {"pid": process.pid, "stdout": stdout.decode(), "stderr": stderr.decode()}
async def run_evaluations(eval_ds_name: str, org: str) -> list:
task_name = eval_ds_name.replace("/", "_")
tasks = []
for model_name, provider in INIT_MODELS:
args = [
"lighteval",
"endpoint",
"inference-providers",
f"model={model_name},provider={provider}",
f"custom|{task_name}|0|0",
"--custom-tasks",
f"custom_{task_name}_task.py",
"--max-samples",
"30",
"--output-dir",
f"{OUTPUT_DIR}",
"--save-details",
"--results-org",
org,
"--push-to-hub",
]
tasks.append(run_process(args))
# Will capture the task if failed
processes = await asyncio.gather(*tasks, return_exceptions=True)
if all(not isinstance(result, Exception) for result in processes):
return "✅"
return "At least one model failed"
|