meg-huggingface
commited on
Commit
·
f09aba3
1
Parent(s):
590e272
Debug
Browse files- src/backend/run_eval_suite_harness.py +2 -0
- src/envs.py +1 -1
src/backend/run_eval_suite_harness.py
CHANGED
@@ -61,6 +61,8 @@ def run_evaluation(eval_request: EvalRequest, task_names: list, num_fewshot: int
|
|
61 |
results["config"]["model_name"] = eval_request.model
|
62 |
results["config"]["model_sha"] = eval_request.revision
|
63 |
|
|
|
|
|
64 |
dumped = json.dumps(results, indent=2)
|
65 |
logger.info(dumped)
|
66 |
|
|
|
61 |
results["config"]["model_name"] = eval_request.model
|
62 |
results["config"]["model_sha"] = eval_request.revision
|
63 |
|
64 |
+
print("Results is")
|
65 |
+
print(results)
|
66 |
dumped = json.dumps(results, indent=2)
|
67 |
logger.info(dumped)
|
68 |
|
src/envs.py
CHANGED
@@ -10,7 +10,7 @@ OWNER = "meg" # Change to your org - don't forget to create a results and reques
|
|
10 |
|
11 |
# For harness evaluations
|
12 |
DEVICE = "cuda:0" #if you add compute, for harness evaluations
|
13 |
-
LIMIT =
|
14 |
NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
|
15 |
TASKS_HARNESS = ["realtoxicityprompts", "toxigen", "logiqa"]
|
16 |
|
|
|
10 |
|
11 |
# For harness evaluations
|
12 |
DEVICE = "cuda:0" #if you add compute, for harness evaluations
|
13 |
+
LIMIT = 1 # !!!! For testing, should be None for actual evaluations!!!
|
14 |
NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
|
15 |
TASKS_HARNESS = ["realtoxicityprompts", "toxigen", "logiqa"]
|
16 |
|