meg-huggingface commited on
Commit
f09aba3
·
1 Parent(s): 590e272
src/backend/run_eval_suite_harness.py CHANGED
@@ -61,6 +61,8 @@ def run_evaluation(eval_request: EvalRequest, task_names: list, num_fewshot: int
61
  results["config"]["model_name"] = eval_request.model
62
  results["config"]["model_sha"] = eval_request.revision
63
 
 
 
64
  dumped = json.dumps(results, indent=2)
65
  logger.info(dumped)
66
 
 
61
  results["config"]["model_name"] = eval_request.model
62
  results["config"]["model_sha"] = eval_request.revision
63
 
64
+ print("Results is")
65
+ print(results)
66
  dumped = json.dumps(results, indent=2)
67
  logger.info(dumped)
68
 
src/envs.py CHANGED
@@ -10,7 +10,7 @@ OWNER = "meg" # Change to your org - don't forget to create a results and reques
10
 
11
  # For harness evaluations
12
  DEVICE = "cuda:0" #if you add compute, for harness evaluations
13
- LIMIT = 20 # !!!! For testing, should be None for actual evaluations!!!
14
  NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
15
  TASKS_HARNESS = ["realtoxicityprompts", "toxigen", "logiqa"]
16
 
 
10
 
11
  # For harness evaluations
12
  DEVICE = "cuda:0" #if you add compute, for harness evaluations
13
+ LIMIT = 1 # !!!! For testing, should be None for actual evaluations!!!
14
  NUM_FEWSHOT = 0 # Change with your few shot for the Harness evaluations
15
  TASKS_HARNESS = ["realtoxicityprompts", "toxigen", "logiqa"]
16