alexmarques commited on
Commit
42a50d4
1 Parent(s): 87fc45f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -3
README.md CHANGED
@@ -200,7 +200,7 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
200
  </td>
201
  </tr>
202
  <tr>
203
- <td>GSM-8K-cot (8-shot, strict-match)
204
  </td>
205
  <td>82.03
206
  </td>
@@ -230,7 +230,7 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
230
  </td>
231
  </tr>
232
  <tr>
233
- <td>TruthfulQA (0-shot)
234
  </td>
235
  <td>54.04
236
  </td>
@@ -283,6 +283,7 @@ lm_eval \
283
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
284
  --tasks gsm8k_cot_llama_3.1_instruct \
285
  --apply_chat_template \
 
286
  --num_fewshot 8 \
287
  --batch_size auto
288
  ```
@@ -312,7 +313,7 @@ lm_eval \
312
  lm_eval \
313
  --model vllm \
314
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
315
- --tasks truthfulqa_mc \
316
  --num_fewshot 0 \
317
  --batch_size auto
318
  ```
 
200
  </td>
201
  </tr>
202
  <tr>
203
+ <td>GSM-8K-cot (CoT, 8-shot, strict-match)
204
  </td>
205
  <td>82.03
206
  </td>
 
230
  </td>
231
  </tr>
232
  <tr>
233
+ <td>TruthfulQA (0-shot, mc2)
234
  </td>
235
  <td>54.04
236
  </td>
 
283
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
284
  --tasks gsm8k_cot_llama_3.1_instruct \
285
  --apply_chat_template \
286
+ --fewshot_as_multiturn \
287
  --num_fewshot 8 \
288
  --batch_size auto
289
  ```
 
313
  lm_eval \
314
  --model vllm \
315
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
316
+ --tasks truthfulqa \
317
  --num_fewshot 0 \
318
  --batch_size auto
319
  ```