alexmarques commited on
Commit
8d6e926
1 Parent(s): 10b7edc

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -3
README.md CHANGED
@@ -168,7 +168,7 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
168
  </td>
169
  </tr>
170
  <tr>
171
- <td>GSM-8K-cot (8-shot, strict-match)
172
  </td>
173
  <td>82.03
174
  </td>
@@ -198,7 +198,7 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
198
  </td>
199
  </tr>
200
  <tr>
201
- <td>TruthfulQA (0-shot)
202
  </td>
203
  <td>54.04
204
  </td>
@@ -251,6 +251,7 @@ lm_eval \
251
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
252
  --tasks gsm8k_cot_llama_3.1_instruct \
253
  --apply_chat_template \
 
254
  --num_fewshot 8 \
255
  --batch_size auto
256
  ```
@@ -280,7 +281,7 @@ lm_eval \
280
  lm_eval \
281
  --model vllm \
282
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
283
- --tasks truthfulqa_mc \
284
  --num_fewshot 0 \
285
  --batch_size auto
286
  ```
 
168
  </td>
169
  </tr>
170
  <tr>
171
+ <td>GSM-8K (CoT, 8-shot, strict-match)
172
  </td>
173
  <td>82.03
174
  </td>
 
198
  </td>
199
  </tr>
200
  <tr>
201
+ <td>TruthfulQA (0-shot, mc2)
202
  </td>
203
  <td>54.04
204
  </td>
 
251
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
252
  --tasks gsm8k_cot_llama_3.1_instruct \
253
  --apply_chat_template \
254
+ --fewshot_as_multiturn \
255
  --num_fewshot 8 \
256
  --batch_size auto
257
  ```
 
281
  lm_eval \
282
  --model vllm \
283
  --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8-dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
284
+ --tasks truthfulqa \
285
  --num_fewshot 0 \
286
  --batch_size auto
287
  ```