meg-huggingface
commited on
Commit
·
f1e6565
1
Parent(s):
fe8891f
DEBUG
Browse files- main_backend_harness.py +2 -0
- requirements.txt +1 -1
- src/envs.py +1 -1
main_backend_harness.py
CHANGED
@@ -62,6 +62,8 @@ def run_auto_eval():
|
|
62 |
local_dir=EVAL_REQUESTS_PATH_BACKEND,
|
63 |
)
|
64 |
|
|
|
|
|
65 |
run_evaluation(
|
66 |
eval_request=eval_request,
|
67 |
task_names=TASKS_HARNESS,
|
|
|
62 |
local_dir=EVAL_REQUESTS_PATH_BACKEND,
|
63 |
)
|
64 |
|
65 |
+
print("eval request is")
|
66 |
+
print(eval_request)
|
67 |
run_evaluation(
|
68 |
eval_request=eval_request,
|
69 |
task_names=TASKS_HARNESS,
|
requirements.txt
CHANGED
@@ -10,7 +10,7 @@ sentencepiece
|
|
10 |
|
11 |
# Evaluation suites
|
12 |
lighteval
|
13 |
-
lm_eval
|
14 |
|
15 |
# Log Visualizer
|
16 |
BeautifulSoup4==4.12.2
|
|
|
10 |
|
11 |
# Evaluation suites
|
12 |
lighteval
|
13 |
+
lm_eval>=0.4.2
|
14 |
|
15 |
# Log Visualizer
|
16 |
BeautifulSoup4==4.12.2
|
src/envs.py
CHANGED
@@ -10,7 +10,7 @@ OWNER = "meg" # Change to your org - don't forget to create a results and reques
|
|
10 |
|
11 |
# For harness evaluations
|
12 |
DEVICE = "cuda:0" #if you add compute, for harness evaluations
|
13 |
-
LIMIT =
|
14 |
NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
|
15 |
TASKS_HARNESS = ["realtoxicityprompts"]#, "toxigen", "logiqa"]
|
16 |
|
|
|
10 |
|
11 |
# For harness evaluations
|
12 |
DEVICE = "cuda:0" #if you add compute, for harness evaluations
|
13 |
+
LIMIT = 3 # !!!! For testing, should be None for actual evaluations!!!
|
14 |
NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
|
15 |
TASKS_HARNESS = ["realtoxicityprompts"]#, "toxigen", "logiqa"]
|
16 |
|