mtasic85 commited on
Commit
f2a5748
1 Parent(s): 0499733
Files changed (1) hide show
  1. README.md +13 -13
README.md CHANGED
@@ -101,19 +101,19 @@ litgpt evaluate --tasks 'leaderboard' --out_dir 'evaluate-0/' --batch_size 4 --d
101
  litgpt evaluate --tasks 'hellaswag,gsm8k,truthfulqa_mc2,mmlu,winogrande,arc_challenge' --out_dir 'evaluate-1/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/
102
  ```
103
 
104
- | Tasks |Version| Filter |n-shot| Metric | |Value | |Stderr|
105
- |---------------------------------------|------:|----------------|-----:|-----------|---|-----:|---|-----:|
106
- |arc_challenge | 1|none | 0|acc |↑ |0.1911|± |0.0115|
107
- | | |none | 0|acc_norm |↑ |0.2355|± |0.0124|
108
- |gsm8k | 3|flexible-extract| 5|exact_match|↑ |0.0152|± |0.0034|
109
- | | |strict-match | 5|exact_match|↑ |0.0000|± |0.0000|
110
- |hellaswag | 1|none | 0|acc |↑ |0.2661|± |0.0044|
111
- | | |none | 0|acc_norm |↑ |0.2708|± |0.0044|
112
- |mmlu | 2|none | |acc |↑ |0.2315|± |0.0036|
113
- | - humanities | 2|none | |acc |↑ |0.2372|± |0.0062|
114
- | - formal_logic | 1|none | 0|acc |↑ |0.2937|± |0.0407|
115
- | - high_school_european_history | 1|none | 0|acc |↑ |0.2424|± |0.0335|
116
- | - high_school_us_history | 1|none | 0|acc |↑ |0.2451|± |0.0302|
117
  | - high_school_world_history | 1|none | 0|acc |↑ |0.2321|± |0.0275|
118
  | - international_law | 1|none | 0|acc |↑ |0.1983|± |0.0364|
119
  | - jurisprudence | 1|none | 0|acc |↑ |0.2315|± |0.0408|
 
101
  litgpt evaluate --tasks 'hellaswag,gsm8k,truthfulqa_mc2,mmlu,winogrande,arc_challenge' --out_dir 'evaluate-1/' --batch_size 4 --dtype 'bfloat16' out/pretrain/final/
102
  ```
103
 
104
+ | Tasks |Version| Filter |n-shot| Metric | |Value | |Stderr|
105
+ |---------------------------------------|------:|----------------|-----:|-----------|---|-----:|---|-----:|
106
+ |arc_challenge | 1|none | 0|acc |↑ |0.1911|± |0.0115|
107
+ | | |none | 0|acc_norm |↑ |0.2355|± |0.0124|
108
+ |gsm8k | 3|flexible-extract| 5|exact_match|↑ |0.0152|± |0.0034|
109
+ | | |strict-match | 5|exact_match|↑ |0.0000|± |0.0000|
110
+ |hellaswag | 1|none | 0|acc |↑ |0.2661|± |0.0044|
111
+ | | |none | 0|acc_norm |↑ |0.2708|± |0.0044|
112
+ |mmlu | 2|none | |acc |↑ |0.2315|± |0.0036|
113
+ | - humanities | 2|none | |acc |↑ |0.2372|± |0.0062|
114
+ | - formal_logic | 1|none | 0|acc |↑ |0.2937|± |0.0407|
115
+ | - high_school_european_history | 1|none | 0|acc |↑ |0.2424|± |0.0335|
116
+ | - high_school_us_history | 1|none | 0|acc |��� |0.2451|± |0.0302|
117
  | - high_school_world_history | 1|none | 0|acc |↑ |0.2321|± |0.0275|
118
  | - international_law | 1|none | 0|acc |↑ |0.1983|± |0.0364|
119
  | - jurisprudence | 1|none | 0|acc |↑ |0.2315|± |0.0408|