alexmarques commited on
Commit
a7c0994
1 Parent(s): b8dfdce

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +8 -8
README.md CHANGED
@@ -150,7 +150,7 @@ Detailed model outputs are available as HuggingFace datasets for [Arena-Hard](ht
150
  </td>
151
  <td><strong>Meta-Llama-3.1-8B-Instruct </strong>
152
  </td>
153
- <td><strong>Meta-Llama-3.1-8B-Instruct-quantized.w8a8 (this model)</strong>
154
  </td>
155
  <td><strong>Recovery</strong>
156
  </td>
@@ -513,7 +513,7 @@ lm_eval \
513
  ```
514
  lm_eval \
515
  --model vllm \
516
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
517
  --tasks mmlu_pt_llama_3.1_instruct \
518
  --fewshot_as_multiturn \
519
  --apply_chat_template \
@@ -525,7 +525,7 @@ lm_eval \
525
  ```
526
  lm_eval \
527
  --model vllm \
528
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
529
  --tasks mmlu_es_llama_3.1_instruct \
530
  --fewshot_as_multiturn \
531
  --apply_chat_template \
@@ -537,7 +537,7 @@ lm_eval \
537
  ```
538
  lm_eval \
539
  --model vllm \
540
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
541
  --tasks mmlu_it_llama_3.1_instruct \
542
  --fewshot_as_multiturn \
543
  --apply_chat_template \
@@ -549,7 +549,7 @@ lm_eval \
549
  ```
550
  lm_eval \
551
  --model vllm \
552
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
553
  --tasks mmlu_de_llama_3.1_instruct \
554
  --fewshot_as_multiturn \
555
  --apply_chat_template \
@@ -561,7 +561,7 @@ lm_eval \
561
  ```
562
  lm_eval \
563
  --model vllm \
564
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
565
  --tasks mmlu_fr_llama_3.1_instruct \
566
  --fewshot_as_multiturn \
567
  --apply_chat_template \
@@ -573,7 +573,7 @@ lm_eval \
573
  ```
574
  lm_eval \
575
  --model vllm \
576
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
577
  --tasks mmlu_hi_llama_3.1_instruct \
578
  --fewshot_as_multiturn \
579
  --apply_chat_template \
@@ -585,7 +585,7 @@ lm_eval \
585
  ```
586
  lm_eval \
587
  --model vllm \
588
- --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a8",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
589
  --tasks mmlu_th_llama_3.1_instruct \
590
  --fewshot_as_multiturn \
591
  --apply_chat_template \
 
150
  </td>
151
  <td><strong>Meta-Llama-3.1-8B-Instruct </strong>
152
  </td>
153
+ <td><strong>Meta-Llama-3.1-8B-Instruct-quantized.w4a16 (this model)</strong>
154
  </td>
155
  <td><strong>Recovery</strong>
156
  </td>
 
513
  ```
514
  lm_eval \
515
  --model vllm \
516
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
517
  --tasks mmlu_pt_llama_3.1_instruct \
518
  --fewshot_as_multiturn \
519
  --apply_chat_template \
 
525
  ```
526
  lm_eval \
527
  --model vllm \
528
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
529
  --tasks mmlu_es_llama_3.1_instruct \
530
  --fewshot_as_multiturn \
531
  --apply_chat_template \
 
537
  ```
538
  lm_eval \
539
  --model vllm \
540
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
541
  --tasks mmlu_it_llama_3.1_instruct \
542
  --fewshot_as_multiturn \
543
  --apply_chat_template \
 
549
  ```
550
  lm_eval \
551
  --model vllm \
552
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
553
  --tasks mmlu_de_llama_3.1_instruct \
554
  --fewshot_as_multiturn \
555
  --apply_chat_template \
 
561
  ```
562
  lm_eval \
563
  --model vllm \
564
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
565
  --tasks mmlu_fr_llama_3.1_instruct \
566
  --fewshot_as_multiturn \
567
  --apply_chat_template \
 
573
  ```
574
  lm_eval \
575
  --model vllm \
576
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
577
  --tasks mmlu_hi_llama_3.1_instruct \
578
  --fewshot_as_multiturn \
579
  --apply_chat_template \
 
585
  ```
586
  lm_eval \
587
  --model vllm \
588
+ --model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
589
  --tasks mmlu_th_llama_3.1_instruct \
590
  --fewshot_as_multiturn \
591
  --apply_chat_template \