icl_tasks:
-
  label: jeopardy
  dataset_uri: eval/local_data/world_knowledge/jeopardy_all.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [10]
  icl_task_type: language_modeling
  continuation_delimiter: "\nAnswer: " # this separates questions from answers
  has_categories: true
-
  label: bigbench_qa_wikidata
  dataset_uri: eval/local_data/world_knowledge/bigbench_qa_wikidata.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: arc_easy
  dataset_uri: eval/local_data/world_knowledge/arc_easy.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [10]
  icl_task_type: multiple_choice
  continuation_delimiter: "\nAnswer: " # this separates questions from answers
-
  label: arc_challenge
  dataset_uri: eval/local_data/world_knowledge/arc_challenge.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [10]
  icl_task_type: multiple_choice
  continuation_delimiter: "\nAnswer: " # this separates questions from answers
-
  label: mmlu
  dataset_uri: eval/local_data/world_knowledge/mmlu.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [10]
  icl_task_type: multiple_choice
  continuation_delimiter: "\nAnswer: " # this separates questions from answers
  has_categories: true
-
  label: bigbench_misconceptions
  dataset_uri: eval/local_data/world_knowledge/bigbench_misconceptions.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [10]
  icl_task_type: multiple_choice
-
  label: copa
  dataset_uri: eval/local_data/commonsense_reasoning/copa.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  icl_task_type: multiple_choice
-
  label: piqa
  dataset_uri: eval/local_data/commonsense_reasoning/piqa.jsonl  # ADD YOUR OWN DATASET URI
  num_fewshot: [10]
  icl_task_type: multiple_choice
  continuation_delimiter: "\nAnswer: " # this separates questions from answers
-
  label: openbook_qa
  dataset_uri: eval/local_data/commonsense_reasoning/openbook_qa.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  icl_task_type: multiple_choice
-
  label: bigbench_novel_concepts
  dataset_uri: eval/local_data/commonsense_reasoning/bigbench_novel_concepts.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [10]
  icl_task_type: multiple_choice
-
  label: bigbench_strange_stories
  dataset_uri: eval/local_data/commonsense_reasoning/bigbench_strange_stories.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [10]
  icl_task_type: multiple_choice
-
  label: bigbench_strategy_qa
  dataset_uri: eval/local_data/commonsense_reasoning/bigbench_strategy_qa.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [10]
  icl_task_type: multiple_choice
-
  label: lambada_openai
  dataset_uri: eval/local_data/language_understanding/lambada_openai.jsonl
  num_fewshot: [0]
  icl_task_type: language_modeling
-
  label: hellaswag
  dataset_uri: eval/local_data/language_understanding/hellaswag.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [10]
  icl_task_type: multiple_choice
-
  label: winograd
  dataset_uri: eval/local_data/language_understanding/winograd_wsc.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  icl_task_type: schema
-
  label: winogrande
  dataset_uri: eval/local_data/language_understanding/winogrande.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  icl_task_type: schema
-
  label: bigbench_conlang_translation
  dataset_uri: eval/local_data/language_understanding/bigbench_conlang_translation.jsonl
  num_fewshot: [0]
  icl_task_type: language_modeling
-
  label: bigbench_language_identification
  dataset_uri: eval/local_data/language_understanding/bigbench_language_identification.jsonl
  num_fewshot: [10]
  icl_task_type: multiple_choice
-
  label: bigbench_conceptual_combinations
  dataset_uri: eval/local_data/language_understanding/bigbench_conceptual_combinations.jsonl
  num_fewshot: [10]
  icl_task_type: multiple_choice
-
  label: bigbench_elementary_math_qa
  dataset_uri: eval/local_data/symbolic_problem_solving/bigbench_elementary_math_qa.jsonl
  num_fewshot: [10]
  icl_task_type: multiple_choice
-
  label: bigbench_dyck_languages
  dataset_uri: eval/local_data/symbolic_problem_solving/bigbench_dyck_languages.jsonl
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: bigbench_cs_algorithms
  dataset_uri: eval/local_data/symbolic_problem_solving/bigbench_cs_algorithms.jsonl
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: bigbench_logical_deduction
  dataset_uri: eval/local_data/symbolic_problem_solving/bigbench_logical_deduction.jsonl
  num_fewshot: [10]
  icl_task_type: multiple_choice
-
  label: bigbench_operators
  dataset_uri: eval/local_data/symbolic_problem_solving/bigbench_operators.jsonl
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: bigbench_repeat_copy_logic
  dataset_uri: eval/local_data/symbolic_problem_solving/bigbench_repeat_copy_logic.jsonl
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: simple_arithmetic_nospaces
  dataset_uri: eval/local_data/symbolic_problem_solving/simple_arithmetic_nospaces.jsonl
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: simple_arithmetic_withspaces
  dataset_uri: eval/local_data/symbolic_problem_solving/simple_arithmetic_withspaces.jsonl
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: math_qa
  dataset_uri: eval/local_data/symbolic_problem_solving/math_qa.jsonl
  num_fewshot: [10]
  icl_task_type: multiple_choice
-
  label: logi_qa
  dataset_uri: eval/local_data/symbolic_problem_solving/logi_qa.jsonl
  num_fewshot: [10]
  icl_task_type: multiple_choice
  continuation_delimiter: "\nAnswer: " # this separates questions from answers
-
  label: pubmed_qa_labeled
  dataset_uri: eval/local_data/reading_comprehension/pubmed_qa_labeled.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: squad
  dataset_uri: eval/local_data/reading_comprehension/squad.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [10]
  icl_task_type: language_modeling
-
  label: bigbench_understanding_fables
  dataset_uri: eval/local_data/reading_comprehension/bigbench_understanding_fables.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [10]
  icl_task_type: multiple_choice
-
  label: boolq
  dataset_uri: eval/local_data/reading_comprehension/boolq.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [10]
  icl_task_type: multiple_choice
  continuation_delimiter: "\nAnswer: " # this separates questions from answers
-
  label: human_eval
  dataset_uri: eval/local_data/programming/human_eval.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  num_beams: 20
  batch_size: 1
  icl_task_type: code_evaluation
-
  label: human_eval_cpp
  dataset_uri: eval/local_data/programming/processed_human_eval_cpp.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  num_beams: 20
  batch_size: 1
  icl_task_type: code_evaluation
-
  label: human_eval_js
  dataset_uri: eval/local_data/programming/processed_human_eval_js.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  num_beams: 20
  batch_size: 1
  icl_task_type: code_evaluation
-
  label: human_eval_return_simple
  dataset_uri: eval/local_data/programming/human_eval_return_simple.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  num_beams: 20
  batch_size: 1
  icl_task_type: code_evaluation
-
  label: human_eval_return_complex
  dataset_uri: eval/local_data/programming/human_eval_return_complex.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  num_beams: 20
  batch_size: 1
  icl_task_type: code_evaluation
-
  label: human_eval_25
  dataset_uri: eval/local_data/programming/human_eval-0.25.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  num_beams: 20
  batch_size: 1
  icl_task_type: code_evaluation
-
  label: human_eval_50
  dataset_uri: eval/local_data/programming/human_eval-0.5.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  num_beams: 20
  batch_size: 1
  icl_task_type: code_evaluation
-
  label: human_eval_75
  dataset_uri: eval/local_data/programming/human_eval-0.75.jsonl # ADD YOUR OWN DATASET URI
  num_fewshot: [0]
  pass_at_k: 1
  num_beams: 20
  batch_size: 1
  icl_task_type: code_evaluation