alexmarques commited on
Commit
893683d
1 Parent(s): b9e995e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -3
README.md CHANGED
@@ -169,7 +169,7 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
169
  </td>
170
  </tr>
171
  <tr>
172
- <td>GSM-8K-cot (8-shot, strict-match)
173
  </td>
174
  <td>93.18
175
  </td>
@@ -199,7 +199,7 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
199
  </td>
200
  </tr>
201
  <tr>
202
- <td>TruthfulQA (0-shot)
203
  </td>
204
  <td>59.90
205
  </td>
@@ -252,6 +252,7 @@ lm_eval \
252
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=2 \
253
  --tasks gsm8k_cot_llama_3.1_instruct \
254
  --apply_chat_template \
 
255
  --num_fewshot 8 \
256
  --batch_size auto
257
  ```
@@ -281,7 +282,7 @@ lm_eval \
281
  lm_eval \
282
  --model vllm \
283
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=2 \
284
- --tasks truthfulqa_mc \
285
  --num_fewshot 0 \
286
  --batch_size auto
287
  ```
 
169
  </td>
170
  </tr>
171
  <tr>
172
+ <td>GSM-8K (CoT, 8-shot, strict-match)
173
  </td>
174
  <td>93.18
175
  </td>
 
199
  </td>
200
  </tr>
201
  <tr>
202
+ <td>TruthfulQA (0-shot, mc2)
203
  </td>
204
  <td>59.90
205
  </td>
 
252
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=2 \
253
  --tasks gsm8k_cot_llama_3.1_instruct \
254
  --apply_chat_template \
255
+ --fewshot_as_multiturn \
256
  --num_fewshot 8 \
257
  --batch_size auto
258
  ```
 
282
  lm_eval \
283
  --model vllm \
284
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=2 \
285
+ --tasks truthfulqa \
286
  --num_fewshot 0 \
287
  --batch_size auto
288
  ```