Muennighoff
commited on
Commit
•
3970485
1
Parent(s):
956e279
Add
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- 3b977b77b/evaluation/lm1-3b9-77b-results_lm-eval_global_step73814_2022-12-02-14-09-22.csv +120 -0
- 3b977b77b/evaluation/lm1-3b9-77b-results_lm-eval_global_step73814_2022-12-02-14-09-22.json +427 -0
- 3b977b77b/evaluation/lm1-3b9-77b-results_lm-eval_global_step73814_2022-12-03-12-23-46.csv +10 -0
- 3b977b77b/evaluation/lm1-3b9-77b-results_lm-eval_global_step73814_2022-12-03-12-23-46.json +39 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_100_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_101_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_102_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_103_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_104_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_105_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_106_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_107_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_108_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_109_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_110_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_111_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_112_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_113_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_114_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_115_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_116_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_117_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_117_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_118_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_118_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_119_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_119_mp_rank_01_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
- 3b977b77b/global_step73814/bf16_zero_pp_rank_11_mp_rank_01_optim_states.pt +3 -0
3b977b77b/evaluation/lm1-3b9-77b-results_lm-eval_global_step73814_2022-12-02-14-09-22.csv
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
task,metric,value,err,version
|
2 |
+
copa,acc,0.74,0.044084400227680794,0
|
3 |
+
hendrycksTest-abstract_algebra,acc,0.22,0.04163331998932268,0
|
4 |
+
hendrycksTest-abstract_algebra,acc_norm,0.26,0.04408440022768079,0
|
5 |
+
hendrycksTest-anatomy,acc,0.28888888888888886,0.03915450630414251,0
|
6 |
+
hendrycksTest-anatomy,acc_norm,0.22962962962962963,0.036333844140734636,0
|
7 |
+
hendrycksTest-astronomy,acc,0.21710526315789475,0.03355045304882921,0
|
8 |
+
hendrycksTest-astronomy,acc_norm,0.3355263157894737,0.03842498559395271,0
|
9 |
+
hendrycksTest-business_ethics,acc,0.34,0.04760952285695235,0
|
10 |
+
hendrycksTest-business_ethics,acc_norm,0.31,0.04648231987117316,0
|
11 |
+
hendrycksTest-clinical_knowledge,acc,0.24528301886792453,0.026480357179895678,0
|
12 |
+
hendrycksTest-clinical_knowledge,acc_norm,0.30943396226415093,0.02845015479411863,0
|
13 |
+
hendrycksTest-college_biology,acc,0.2152777777777778,0.034370793441061344,0
|
14 |
+
hendrycksTest-college_biology,acc_norm,0.2222222222222222,0.034765901043041336,0
|
15 |
+
hendrycksTest-college_chemistry,acc,0.26,0.04408440022768078,0
|
16 |
+
hendrycksTest-college_chemistry,acc_norm,0.29,0.045604802157206845,0
|
17 |
+
hendrycksTest-college_computer_science,acc,0.29,0.04560480215720684,0
|
18 |
+
hendrycksTest-college_computer_science,acc_norm,0.24,0.04292346959909284,0
|
19 |
+
hendrycksTest-college_mathematics,acc,0.2,0.04020151261036845,0
|
20 |
+
hendrycksTest-college_mathematics,acc_norm,0.3,0.046056618647183814,0
|
21 |
+
hendrycksTest-college_medicine,acc,0.2543352601156069,0.0332055644308557,0
|
22 |
+
hendrycksTest-college_medicine,acc_norm,0.2543352601156069,0.0332055644308557,0
|
23 |
+
hendrycksTest-college_physics,acc,0.2549019607843137,0.043364327079931764,0
|
24 |
+
hendrycksTest-college_physics,acc_norm,0.28431372549019607,0.04488482852329017,0
|
25 |
+
hendrycksTest-computer_security,acc,0.28,0.04512608598542126,0
|
26 |
+
hendrycksTest-computer_security,acc_norm,0.36,0.048241815132442176,0
|
27 |
+
hendrycksTest-conceptual_physics,acc,0.2553191489361702,0.028504856470514203,0
|
28 |
+
hendrycksTest-conceptual_physics,acc_norm,0.1829787234042553,0.025276041000449966,0
|
29 |
+
hendrycksTest-econometrics,acc,0.21929824561403508,0.03892431106518753,0
|
30 |
+
hendrycksTest-econometrics,acc_norm,0.21929824561403508,0.03892431106518754,0
|
31 |
+
hendrycksTest-electrical_engineering,acc,0.2689655172413793,0.036951833116502325,0
|
32 |
+
hendrycksTest-electrical_engineering,acc_norm,0.30344827586206896,0.038312260488503336,0
|
33 |
+
hendrycksTest-elementary_mathematics,acc,0.21957671957671956,0.02132001859977036,0
|
34 |
+
hendrycksTest-elementary_mathematics,acc_norm,0.25925925925925924,0.022569897074918407,0
|
35 |
+
hendrycksTest-formal_logic,acc,0.29365079365079366,0.04073524322147127,0
|
36 |
+
hendrycksTest-formal_logic,acc_norm,0.23809523809523808,0.038095238095238126,0
|
37 |
+
hendrycksTest-global_facts,acc,0.19,0.039427724440366234,0
|
38 |
+
hendrycksTest-global_facts,acc_norm,0.2,0.04020151261036846,0
|
39 |
+
hendrycksTest-high_school_biology,acc,0.23548387096774193,0.02413763242933771,0
|
40 |
+
hendrycksTest-high_school_biology,acc_norm,0.3032258064516129,0.026148685930671746,0
|
41 |
+
hendrycksTest-high_school_chemistry,acc,0.2019704433497537,0.028247350122180277,0
|
42 |
+
hendrycksTest-high_school_chemistry,acc_norm,0.270935960591133,0.031270907132976984,0
|
43 |
+
hendrycksTest-high_school_computer_science,acc,0.23,0.04229525846816506,0
|
44 |
+
hendrycksTest-high_school_computer_science,acc_norm,0.28,0.04512608598542128,0
|
45 |
+
hendrycksTest-high_school_european_history,acc,0.24848484848484848,0.03374402644139406,0
|
46 |
+
hendrycksTest-high_school_european_history,acc_norm,0.3090909090909091,0.036085410115739666,0
|
47 |
+
hendrycksTest-high_school_geography,acc,0.18181818181818182,0.027479603010538787,0
|
48 |
+
hendrycksTest-high_school_geography,acc_norm,0.2878787878787879,0.03225883512300993,0
|
49 |
+
hendrycksTest-high_school_government_and_politics,acc,0.21243523316062177,0.02951928261681725,0
|
50 |
+
hendrycksTest-high_school_government_and_politics,acc_norm,0.2538860103626943,0.03141024780565318,0
|
51 |
+
hendrycksTest-high_school_macroeconomics,acc,0.2358974358974359,0.02152596540740873,0
|
52 |
+
hendrycksTest-high_school_macroeconomics,acc_norm,0.27692307692307694,0.022688042352424994,0
|
53 |
+
hendrycksTest-high_school_mathematics,acc,0.1925925925925926,0.024043075181945192,0
|
54 |
+
hendrycksTest-high_school_mathematics,acc_norm,0.21481481481481482,0.025040443877000686,0
|
55 |
+
hendrycksTest-high_school_microeconomics,acc,0.24369747899159663,0.027886828078380558,0
|
56 |
+
hendrycksTest-high_school_microeconomics,acc_norm,0.29831932773109243,0.02971914287634287,0
|
57 |
+
hendrycksTest-high_school_physics,acc,0.19205298013245034,0.032162984205936156,0
|
58 |
+
hendrycksTest-high_school_physics,acc_norm,0.25165562913907286,0.03543304234389985,0
|
59 |
+
hendrycksTest-high_school_psychology,acc,0.22935779816513763,0.018025349724618684,0
|
60 |
+
hendrycksTest-high_school_psychology,acc_norm,0.24036697247706423,0.01832060732096407,0
|
61 |
+
hendrycksTest-high_school_statistics,acc,0.23148148148148148,0.028765111718046976,0
|
62 |
+
hendrycksTest-high_school_statistics,acc_norm,0.28703703703703703,0.030851992993257017,0
|
63 |
+
hendrycksTest-high_school_us_history,acc,0.22058823529411764,0.02910225438967409,0
|
64 |
+
hendrycksTest-high_school_us_history,acc_norm,0.2647058823529412,0.0309645179269234,0
|
65 |
+
hendrycksTest-high_school_world_history,acc,0.270042194092827,0.028900721906293426,0
|
66 |
+
hendrycksTest-high_school_world_history,acc_norm,0.3037974683544304,0.029936696387138605,0
|
67 |
+
hendrycksTest-human_aging,acc,0.3094170403587444,0.031024411740572206,0
|
68 |
+
hendrycksTest-human_aging,acc_norm,0.22869955156950672,0.028188240046929193,0
|
69 |
+
hendrycksTest-human_sexuality,acc,0.42748091603053434,0.04338920305792401,0
|
70 |
+
hendrycksTest-human_sexuality,acc_norm,0.31297709923664124,0.04066962905677698,0
|
71 |
+
hendrycksTest-international_law,acc,0.23140495867768596,0.03849856098794089,0
|
72 |
+
hendrycksTest-international_law,acc_norm,0.4462809917355372,0.0453793517794788,0
|
73 |
+
hendrycksTest-jurisprudence,acc,0.3148148148148148,0.04489931073591312,0
|
74 |
+
hendrycksTest-jurisprudence,acc_norm,0.42592592592592593,0.0478034362693679,0
|
75 |
+
hendrycksTest-logical_fallacies,acc,0.25766871165644173,0.03436150827846917,0
|
76 |
+
hendrycksTest-logical_fallacies,acc_norm,0.3067484662576687,0.036230899157241474,0
|
77 |
+
hendrycksTest-machine_learning,acc,0.3125,0.043994650575715215,0
|
78 |
+
hendrycksTest-machine_learning,acc_norm,0.25892857142857145,0.04157751539865629,0
|
79 |
+
hendrycksTest-management,acc,0.27184466019417475,0.044052680241409216,0
|
80 |
+
hendrycksTest-management,acc_norm,0.33980582524271846,0.046897659372781335,0
|
81 |
+
hendrycksTest-marketing,acc,0.27350427350427353,0.029202540153431163,0
|
82 |
+
hendrycksTest-marketing,acc_norm,0.2905982905982906,0.029745048572674054,0
|
83 |
+
hendrycksTest-medical_genetics,acc,0.28,0.04512608598542127,0
|
84 |
+
hendrycksTest-medical_genetics,acc_norm,0.37,0.04852365870939099,0
|
85 |
+
hendrycksTest-miscellaneous,acc,0.26309067688378035,0.015745497169049046,0
|
86 |
+
hendrycksTest-miscellaneous,acc_norm,0.2656449553001277,0.01579430248788872,0
|
87 |
+
hendrycksTest-moral_disputes,acc,0.2658959537572254,0.02378620325550828,0
|
88 |
+
hendrycksTest-moral_disputes,acc_norm,0.3236994219653179,0.025190181327608408,0
|
89 |
+
hendrycksTest-moral_scenarios,acc,0.23910614525139665,0.014265554192331144,0
|
90 |
+
hendrycksTest-moral_scenarios,acc_norm,0.27262569832402234,0.014893391735249588,0
|
91 |
+
hendrycksTest-nutrition,acc,0.2581699346405229,0.025058503316958157,0
|
92 |
+
hendrycksTest-nutrition,acc_norm,0.3790849673202614,0.027780141207023334,0
|
93 |
+
hendrycksTest-philosophy,acc,0.24115755627009647,0.024296594034763426,0
|
94 |
+
hendrycksTest-philosophy,acc_norm,0.3086816720257235,0.026236965881153252,0
|
95 |
+
hendrycksTest-prehistory,acc,0.26851851851851855,0.024659685185967287,0
|
96 |
+
hendrycksTest-prehistory,acc_norm,0.21296296296296297,0.022779719088733396,0
|
97 |
+
hendrycksTest-professional_accounting,acc,0.2198581560283688,0.024706141070705474,0
|
98 |
+
hendrycksTest-professional_accounting,acc_norm,0.22695035460992907,0.024987106365642962,0
|
99 |
+
hendrycksTest-professional_law,acc,0.27509778357235987,0.011405443620996939,0
|
100 |
+
hendrycksTest-professional_law,acc_norm,0.288135593220339,0.011567140661324565,0
|
101 |
+
hendrycksTest-professional_medicine,acc,0.1801470588235294,0.023345163616544835,0
|
102 |
+
hendrycksTest-professional_medicine,acc_norm,0.2610294117647059,0.026679252270103124,0
|
103 |
+
hendrycksTest-professional_psychology,acc,0.24509803921568626,0.01740181671142766,0
|
104 |
+
hendrycksTest-professional_psychology,acc_norm,0.2581699346405229,0.017704531653250068,0
|
105 |
+
hendrycksTest-public_relations,acc,0.24545454545454545,0.04122066502878285,0
|
106 |
+
hendrycksTest-public_relations,acc_norm,0.15454545454545454,0.03462262571262667,0
|
107 |
+
hendrycksTest-security_studies,acc,0.32653061224489793,0.030021056238440313,0
|
108 |
+
hendrycksTest-security_studies,acc_norm,0.2693877551020408,0.02840125202902294,0
|
109 |
+
hendrycksTest-sociology,acc,0.24378109452736318,0.030360490154014645,0
|
110 |
+
hendrycksTest-sociology,acc_norm,0.2736318407960199,0.03152439186555402,0
|
111 |
+
hendrycksTest-us_foreign_policy,acc,0.34,0.04760952285695235,0
|
112 |
+
hendrycksTest-us_foreign_policy,acc_norm,0.36,0.04824181513244218,0
|
113 |
+
hendrycksTest-virology,acc,0.3132530120481928,0.036108050180310235,0
|
114 |
+
hendrycksTest-virology,acc_norm,0.2710843373493976,0.034605799075530276,0
|
115 |
+
hendrycksTest-world_religions,acc,0.3157894736842105,0.03565079670708311,0
|
116 |
+
hendrycksTest-world_religions,acc_norm,0.38596491228070173,0.03733756969066164,0
|
117 |
+
piqa,acc,0.705658324265506,0.01063331147034749,0
|
118 |
+
piqa,acc_norm,0.7121871599564744,0.01056325038305919,0
|
119 |
+
rte,acc,0.5379061371841155,0.030009848912529117,0
|
120 |
+
winogrande,acc,0.5564325177584846,0.0139626949076204,0
|
3b977b77b/evaluation/lm1-3b9-77b-results_lm-eval_global_step73814_2022-12-02-14-09-22.json
ADDED
@@ -0,0 +1,427 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"copa": {
|
4 |
+
"acc": 0.74,
|
5 |
+
"acc_stderr": 0.044084400227680794
|
6 |
+
},
|
7 |
+
"piqa": {
|
8 |
+
"acc": 0.705658324265506,
|
9 |
+
"acc_stderr": 0.01063331147034749,
|
10 |
+
"acc_norm": 0.7121871599564744,
|
11 |
+
"acc_norm_stderr": 0.01056325038305919
|
12 |
+
},
|
13 |
+
"rte": {
|
14 |
+
"acc": 0.5379061371841155,
|
15 |
+
"acc_stderr": 0.030009848912529117
|
16 |
+
},
|
17 |
+
"winogrande": {
|
18 |
+
"acc": 0.5564325177584846,
|
19 |
+
"acc_stderr": 0.0139626949076204
|
20 |
+
},
|
21 |
+
"hendrycksTest-abstract_algebra": {
|
22 |
+
"acc": 0.22,
|
23 |
+
"acc_stderr": 0.04163331998932268,
|
24 |
+
"acc_norm": 0.26,
|
25 |
+
"acc_norm_stderr": 0.04408440022768079
|
26 |
+
},
|
27 |
+
"hendrycksTest-anatomy": {
|
28 |
+
"acc": 0.28888888888888886,
|
29 |
+
"acc_stderr": 0.03915450630414251,
|
30 |
+
"acc_norm": 0.22962962962962963,
|
31 |
+
"acc_norm_stderr": 0.036333844140734636
|
32 |
+
},
|
33 |
+
"hendrycksTest-astronomy": {
|
34 |
+
"acc": 0.21710526315789475,
|
35 |
+
"acc_stderr": 0.03355045304882921,
|
36 |
+
"acc_norm": 0.3355263157894737,
|
37 |
+
"acc_norm_stderr": 0.03842498559395271
|
38 |
+
},
|
39 |
+
"hendrycksTest-business_ethics": {
|
40 |
+
"acc": 0.34,
|
41 |
+
"acc_stderr": 0.04760952285695235,
|
42 |
+
"acc_norm": 0.31,
|
43 |
+
"acc_norm_stderr": 0.04648231987117316
|
44 |
+
},
|
45 |
+
"hendrycksTest-clinical_knowledge": {
|
46 |
+
"acc": 0.24528301886792453,
|
47 |
+
"acc_stderr": 0.026480357179895678,
|
48 |
+
"acc_norm": 0.30943396226415093,
|
49 |
+
"acc_norm_stderr": 0.02845015479411863
|
50 |
+
},
|
51 |
+
"hendrycksTest-college_biology": {
|
52 |
+
"acc": 0.2152777777777778,
|
53 |
+
"acc_stderr": 0.034370793441061344,
|
54 |
+
"acc_norm": 0.2222222222222222,
|
55 |
+
"acc_norm_stderr": 0.034765901043041336
|
56 |
+
},
|
57 |
+
"hendrycksTest-college_chemistry": {
|
58 |
+
"acc": 0.26,
|
59 |
+
"acc_stderr": 0.04408440022768078,
|
60 |
+
"acc_norm": 0.29,
|
61 |
+
"acc_norm_stderr": 0.045604802157206845
|
62 |
+
},
|
63 |
+
"hendrycksTest-college_computer_science": {
|
64 |
+
"acc": 0.29,
|
65 |
+
"acc_stderr": 0.04560480215720684,
|
66 |
+
"acc_norm": 0.24,
|
67 |
+
"acc_norm_stderr": 0.04292346959909284
|
68 |
+
},
|
69 |
+
"hendrycksTest-college_mathematics": {
|
70 |
+
"acc": 0.2,
|
71 |
+
"acc_stderr": 0.04020151261036845,
|
72 |
+
"acc_norm": 0.3,
|
73 |
+
"acc_norm_stderr": 0.046056618647183814
|
74 |
+
},
|
75 |
+
"hendrycksTest-college_medicine": {
|
76 |
+
"acc": 0.2543352601156069,
|
77 |
+
"acc_stderr": 0.0332055644308557,
|
78 |
+
"acc_norm": 0.2543352601156069,
|
79 |
+
"acc_norm_stderr": 0.0332055644308557
|
80 |
+
},
|
81 |
+
"hendrycksTest-college_physics": {
|
82 |
+
"acc": 0.2549019607843137,
|
83 |
+
"acc_stderr": 0.043364327079931764,
|
84 |
+
"acc_norm": 0.28431372549019607,
|
85 |
+
"acc_norm_stderr": 0.04488482852329017
|
86 |
+
},
|
87 |
+
"hendrycksTest-computer_security": {
|
88 |
+
"acc": 0.28,
|
89 |
+
"acc_stderr": 0.04512608598542126,
|
90 |
+
"acc_norm": 0.36,
|
91 |
+
"acc_norm_stderr": 0.048241815132442176
|
92 |
+
},
|
93 |
+
"hendrycksTest-conceptual_physics": {
|
94 |
+
"acc": 0.2553191489361702,
|
95 |
+
"acc_stderr": 0.028504856470514203,
|
96 |
+
"acc_norm": 0.1829787234042553,
|
97 |
+
"acc_norm_stderr": 0.025276041000449966
|
98 |
+
},
|
99 |
+
"hendrycksTest-econometrics": {
|
100 |
+
"acc": 0.21929824561403508,
|
101 |
+
"acc_stderr": 0.03892431106518753,
|
102 |
+
"acc_norm": 0.21929824561403508,
|
103 |
+
"acc_norm_stderr": 0.03892431106518754
|
104 |
+
},
|
105 |
+
"hendrycksTest-electrical_engineering": {
|
106 |
+
"acc": 0.2689655172413793,
|
107 |
+
"acc_stderr": 0.036951833116502325,
|
108 |
+
"acc_norm": 0.30344827586206896,
|
109 |
+
"acc_norm_stderr": 0.038312260488503336
|
110 |
+
},
|
111 |
+
"hendrycksTest-elementary_mathematics": {
|
112 |
+
"acc": 0.21957671957671956,
|
113 |
+
"acc_stderr": 0.02132001859977036,
|
114 |
+
"acc_norm": 0.25925925925925924,
|
115 |
+
"acc_norm_stderr": 0.022569897074918407
|
116 |
+
},
|
117 |
+
"hendrycksTest-formal_logic": {
|
118 |
+
"acc": 0.29365079365079366,
|
119 |
+
"acc_stderr": 0.04073524322147127,
|
120 |
+
"acc_norm": 0.23809523809523808,
|
121 |
+
"acc_norm_stderr": 0.038095238095238126
|
122 |
+
},
|
123 |
+
"hendrycksTest-global_facts": {
|
124 |
+
"acc": 0.19,
|
125 |
+
"acc_stderr": 0.039427724440366234,
|
126 |
+
"acc_norm": 0.2,
|
127 |
+
"acc_norm_stderr": 0.04020151261036846
|
128 |
+
},
|
129 |
+
"hendrycksTest-high_school_biology": {
|
130 |
+
"acc": 0.23548387096774193,
|
131 |
+
"acc_stderr": 0.02413763242933771,
|
132 |
+
"acc_norm": 0.3032258064516129,
|
133 |
+
"acc_norm_stderr": 0.026148685930671746
|
134 |
+
},
|
135 |
+
"hendrycksTest-high_school_chemistry": {
|
136 |
+
"acc": 0.2019704433497537,
|
137 |
+
"acc_stderr": 0.028247350122180277,
|
138 |
+
"acc_norm": 0.270935960591133,
|
139 |
+
"acc_norm_stderr": 0.031270907132976984
|
140 |
+
},
|
141 |
+
"hendrycksTest-high_school_computer_science": {
|
142 |
+
"acc": 0.23,
|
143 |
+
"acc_stderr": 0.04229525846816506,
|
144 |
+
"acc_norm": 0.28,
|
145 |
+
"acc_norm_stderr": 0.04512608598542128
|
146 |
+
},
|
147 |
+
"hendrycksTest-high_school_european_history": {
|
148 |
+
"acc": 0.24848484848484848,
|
149 |
+
"acc_stderr": 0.03374402644139406,
|
150 |
+
"acc_norm": 0.3090909090909091,
|
151 |
+
"acc_norm_stderr": 0.036085410115739666
|
152 |
+
},
|
153 |
+
"hendrycksTest-high_school_geography": {
|
154 |
+
"acc": 0.18181818181818182,
|
155 |
+
"acc_stderr": 0.027479603010538787,
|
156 |
+
"acc_norm": 0.2878787878787879,
|
157 |
+
"acc_norm_stderr": 0.03225883512300993
|
158 |
+
},
|
159 |
+
"hendrycksTest-high_school_government_and_politics": {
|
160 |
+
"acc": 0.21243523316062177,
|
161 |
+
"acc_stderr": 0.02951928261681725,
|
162 |
+
"acc_norm": 0.2538860103626943,
|
163 |
+
"acc_norm_stderr": 0.03141024780565318
|
164 |
+
},
|
165 |
+
"hendrycksTest-high_school_macroeconomics": {
|
166 |
+
"acc": 0.2358974358974359,
|
167 |
+
"acc_stderr": 0.02152596540740873,
|
168 |
+
"acc_norm": 0.27692307692307694,
|
169 |
+
"acc_norm_stderr": 0.022688042352424994
|
170 |
+
},
|
171 |
+
"hendrycksTest-high_school_mathematics": {
|
172 |
+
"acc": 0.1925925925925926,
|
173 |
+
"acc_stderr": 0.024043075181945192,
|
174 |
+
"acc_norm": 0.21481481481481482,
|
175 |
+
"acc_norm_stderr": 0.025040443877000686
|
176 |
+
},
|
177 |
+
"hendrycksTest-high_school_microeconomics": {
|
178 |
+
"acc": 0.24369747899159663,
|
179 |
+
"acc_stderr": 0.027886828078380558,
|
180 |
+
"acc_norm": 0.29831932773109243,
|
181 |
+
"acc_norm_stderr": 0.02971914287634287
|
182 |
+
},
|
183 |
+
"hendrycksTest-high_school_physics": {
|
184 |
+
"acc": 0.19205298013245034,
|
185 |
+
"acc_stderr": 0.032162984205936156,
|
186 |
+
"acc_norm": 0.25165562913907286,
|
187 |
+
"acc_norm_stderr": 0.03543304234389985
|
188 |
+
},
|
189 |
+
"hendrycksTest-high_school_psychology": {
|
190 |
+
"acc": 0.22935779816513763,
|
191 |
+
"acc_stderr": 0.018025349724618684,
|
192 |
+
"acc_norm": 0.24036697247706423,
|
193 |
+
"acc_norm_stderr": 0.01832060732096407
|
194 |
+
},
|
195 |
+
"hendrycksTest-high_school_statistics": {
|
196 |
+
"acc": 0.23148148148148148,
|
197 |
+
"acc_stderr": 0.028765111718046976,
|
198 |
+
"acc_norm": 0.28703703703703703,
|
199 |
+
"acc_norm_stderr": 0.030851992993257017
|
200 |
+
},
|
201 |
+
"hendrycksTest-high_school_us_history": {
|
202 |
+
"acc": 0.22058823529411764,
|
203 |
+
"acc_stderr": 0.02910225438967409,
|
204 |
+
"acc_norm": 0.2647058823529412,
|
205 |
+
"acc_norm_stderr": 0.0309645179269234
|
206 |
+
},
|
207 |
+
"hendrycksTest-high_school_world_history": {
|
208 |
+
"acc": 0.270042194092827,
|
209 |
+
"acc_stderr": 0.028900721906293426,
|
210 |
+
"acc_norm": 0.3037974683544304,
|
211 |
+
"acc_norm_stderr": 0.029936696387138605
|
212 |
+
},
|
213 |
+
"hendrycksTest-human_aging": {
|
214 |
+
"acc": 0.3094170403587444,
|
215 |
+
"acc_stderr": 0.031024411740572206,
|
216 |
+
"acc_norm": 0.22869955156950672,
|
217 |
+
"acc_norm_stderr": 0.028188240046929193
|
218 |
+
},
|
219 |
+
"hendrycksTest-human_sexuality": {
|
220 |
+
"acc": 0.42748091603053434,
|
221 |
+
"acc_stderr": 0.04338920305792401,
|
222 |
+
"acc_norm": 0.31297709923664124,
|
223 |
+
"acc_norm_stderr": 0.04066962905677698
|
224 |
+
},
|
225 |
+
"hendrycksTest-international_law": {
|
226 |
+
"acc": 0.23140495867768596,
|
227 |
+
"acc_stderr": 0.03849856098794089,
|
228 |
+
"acc_norm": 0.4462809917355372,
|
229 |
+
"acc_norm_stderr": 0.0453793517794788
|
230 |
+
},
|
231 |
+
"hendrycksTest-jurisprudence": {
|
232 |
+
"acc": 0.3148148148148148,
|
233 |
+
"acc_stderr": 0.04489931073591312,
|
234 |
+
"acc_norm": 0.42592592592592593,
|
235 |
+
"acc_norm_stderr": 0.0478034362693679
|
236 |
+
},
|
237 |
+
"hendrycksTest-logical_fallacies": {
|
238 |
+
"acc": 0.25766871165644173,
|
239 |
+
"acc_stderr": 0.03436150827846917,
|
240 |
+
"acc_norm": 0.3067484662576687,
|
241 |
+
"acc_norm_stderr": 0.036230899157241474
|
242 |
+
},
|
243 |
+
"hendrycksTest-machine_learning": {
|
244 |
+
"acc": 0.3125,
|
245 |
+
"acc_stderr": 0.043994650575715215,
|
246 |
+
"acc_norm": 0.25892857142857145,
|
247 |
+
"acc_norm_stderr": 0.04157751539865629
|
248 |
+
},
|
249 |
+
"hendrycksTest-management": {
|
250 |
+
"acc": 0.27184466019417475,
|
251 |
+
"acc_stderr": 0.044052680241409216,
|
252 |
+
"acc_norm": 0.33980582524271846,
|
253 |
+
"acc_norm_stderr": 0.046897659372781335
|
254 |
+
},
|
255 |
+
"hendrycksTest-marketing": {
|
256 |
+
"acc": 0.27350427350427353,
|
257 |
+
"acc_stderr": 0.029202540153431163,
|
258 |
+
"acc_norm": 0.2905982905982906,
|
259 |
+
"acc_norm_stderr": 0.029745048572674054
|
260 |
+
},
|
261 |
+
"hendrycksTest-medical_genetics": {
|
262 |
+
"acc": 0.28,
|
263 |
+
"acc_stderr": 0.04512608598542127,
|
264 |
+
"acc_norm": 0.37,
|
265 |
+
"acc_norm_stderr": 0.04852365870939099
|
266 |
+
},
|
267 |
+
"hendrycksTest-miscellaneous": {
|
268 |
+
"acc": 0.26309067688378035,
|
269 |
+
"acc_stderr": 0.015745497169049046,
|
270 |
+
"acc_norm": 0.2656449553001277,
|
271 |
+
"acc_norm_stderr": 0.01579430248788872
|
272 |
+
},
|
273 |
+
"hendrycksTest-moral_disputes": {
|
274 |
+
"acc": 0.2658959537572254,
|
275 |
+
"acc_stderr": 0.02378620325550828,
|
276 |
+
"acc_norm": 0.3236994219653179,
|
277 |
+
"acc_norm_stderr": 0.025190181327608408
|
278 |
+
},
|
279 |
+
"hendrycksTest-moral_scenarios": {
|
280 |
+
"acc": 0.23910614525139665,
|
281 |
+
"acc_stderr": 0.014265554192331144,
|
282 |
+
"acc_norm": 0.27262569832402234,
|
283 |
+
"acc_norm_stderr": 0.014893391735249588
|
284 |
+
},
|
285 |
+
"hendrycksTest-nutrition": {
|
286 |
+
"acc": 0.2581699346405229,
|
287 |
+
"acc_stderr": 0.025058503316958157,
|
288 |
+
"acc_norm": 0.3790849673202614,
|
289 |
+
"acc_norm_stderr": 0.027780141207023334
|
290 |
+
},
|
291 |
+
"hendrycksTest-philosophy": {
|
292 |
+
"acc": 0.24115755627009647,
|
293 |
+
"acc_stderr": 0.024296594034763426,
|
294 |
+
"acc_norm": 0.3086816720257235,
|
295 |
+
"acc_norm_stderr": 0.026236965881153252
|
296 |
+
},
|
297 |
+
"hendrycksTest-prehistory": {
|
298 |
+
"acc": 0.26851851851851855,
|
299 |
+
"acc_stderr": 0.024659685185967287,
|
300 |
+
"acc_norm": 0.21296296296296297,
|
301 |
+
"acc_norm_stderr": 0.022779719088733396
|
302 |
+
},
|
303 |
+
"hendrycksTest-professional_accounting": {
|
304 |
+
"acc": 0.2198581560283688,
|
305 |
+
"acc_stderr": 0.024706141070705474,
|
306 |
+
"acc_norm": 0.22695035460992907,
|
307 |
+
"acc_norm_stderr": 0.024987106365642962
|
308 |
+
},
|
309 |
+
"hendrycksTest-professional_law": {
|
310 |
+
"acc": 0.27509778357235987,
|
311 |
+
"acc_stderr": 0.011405443620996939,
|
312 |
+
"acc_norm": 0.288135593220339,
|
313 |
+
"acc_norm_stderr": 0.011567140661324565
|
314 |
+
},
|
315 |
+
"hendrycksTest-professional_medicine": {
|
316 |
+
"acc": 0.1801470588235294,
|
317 |
+
"acc_stderr": 0.023345163616544835,
|
318 |
+
"acc_norm": 0.2610294117647059,
|
319 |
+
"acc_norm_stderr": 0.026679252270103124
|
320 |
+
},
|
321 |
+
"hendrycksTest-professional_psychology": {
|
322 |
+
"acc": 0.24509803921568626,
|
323 |
+
"acc_stderr": 0.01740181671142766,
|
324 |
+
"acc_norm": 0.2581699346405229,
|
325 |
+
"acc_norm_stderr": 0.017704531653250068
|
326 |
+
},
|
327 |
+
"hendrycksTest-public_relations": {
|
328 |
+
"acc": 0.24545454545454545,
|
329 |
+
"acc_stderr": 0.04122066502878285,
|
330 |
+
"acc_norm": 0.15454545454545454,
|
331 |
+
"acc_norm_stderr": 0.03462262571262667
|
332 |
+
},
|
333 |
+
"hendrycksTest-security_studies": {
|
334 |
+
"acc": 0.32653061224489793,
|
335 |
+
"acc_stderr": 0.030021056238440313,
|
336 |
+
"acc_norm": 0.2693877551020408,
|
337 |
+
"acc_norm_stderr": 0.02840125202902294
|
338 |
+
},
|
339 |
+
"hendrycksTest-sociology": {
|
340 |
+
"acc": 0.24378109452736318,
|
341 |
+
"acc_stderr": 0.030360490154014645,
|
342 |
+
"acc_norm": 0.2736318407960199,
|
343 |
+
"acc_norm_stderr": 0.03152439186555402
|
344 |
+
},
|
345 |
+
"hendrycksTest-us_foreign_policy": {
|
346 |
+
"acc": 0.34,
|
347 |
+
"acc_stderr": 0.04760952285695235,
|
348 |
+
"acc_norm": 0.36,
|
349 |
+
"acc_norm_stderr": 0.04824181513244218
|
350 |
+
},
|
351 |
+
"hendrycksTest-virology": {
|
352 |
+
"acc": 0.3132530120481928,
|
353 |
+
"acc_stderr": 0.036108050180310235,
|
354 |
+
"acc_norm": 0.2710843373493976,
|
355 |
+
"acc_norm_stderr": 0.034605799075530276
|
356 |
+
},
|
357 |
+
"hendrycksTest-world_religions": {
|
358 |
+
"acc": 0.3157894736842105,
|
359 |
+
"acc_stderr": 0.03565079670708311,
|
360 |
+
"acc_norm": 0.38596491228070173,
|
361 |
+
"acc_norm_stderr": 0.03733756969066164
|
362 |
+
}
|
363 |
+
},
|
364 |
+
"versions": {
|
365 |
+
"copa": 0,
|
366 |
+
"piqa": 0,
|
367 |
+
"rte": 0,
|
368 |
+
"winogrande": 0,
|
369 |
+
"hendrycksTest-abstract_algebra": 0,
|
370 |
+
"hendrycksTest-anatomy": 0,
|
371 |
+
"hendrycksTest-astronomy": 0,
|
372 |
+
"hendrycksTest-business_ethics": 0,
|
373 |
+
"hendrycksTest-clinical_knowledge": 0,
|
374 |
+
"hendrycksTest-college_biology": 0,
|
375 |
+
"hendrycksTest-college_chemistry": 0,
|
376 |
+
"hendrycksTest-college_computer_science": 0,
|
377 |
+
"hendrycksTest-college_mathematics": 0,
|
378 |
+
"hendrycksTest-college_medicine": 0,
|
379 |
+
"hendrycksTest-college_physics": 0,
|
380 |
+
"hendrycksTest-computer_security": 0,
|
381 |
+
"hendrycksTest-conceptual_physics": 0,
|
382 |
+
"hendrycksTest-econometrics": 0,
|
383 |
+
"hendrycksTest-electrical_engineering": 0,
|
384 |
+
"hendrycksTest-elementary_mathematics": 0,
|
385 |
+
"hendrycksTest-formal_logic": 0,
|
386 |
+
"hendrycksTest-global_facts": 0,
|
387 |
+
"hendrycksTest-high_school_biology": 0,
|
388 |
+
"hendrycksTest-high_school_chemistry": 0,
|
389 |
+
"hendrycksTest-high_school_computer_science": 0,
|
390 |
+
"hendrycksTest-high_school_european_history": 0,
|
391 |
+
"hendrycksTest-high_school_geography": 0,
|
392 |
+
"hendrycksTest-high_school_government_and_politics": 0,
|
393 |
+
"hendrycksTest-high_school_macroeconomics": 0,
|
394 |
+
"hendrycksTest-high_school_mathematics": 0,
|
395 |
+
"hendrycksTest-high_school_microeconomics": 0,
|
396 |
+
"hendrycksTest-high_school_physics": 0,
|
397 |
+
"hendrycksTest-high_school_psychology": 0,
|
398 |
+
"hendrycksTest-high_school_statistics": 0,
|
399 |
+
"hendrycksTest-high_school_us_history": 0,
|
400 |
+
"hendrycksTest-high_school_world_history": 0,
|
401 |
+
"hendrycksTest-human_aging": 0,
|
402 |
+
"hendrycksTest-human_sexuality": 0,
|
403 |
+
"hendrycksTest-international_law": 0,
|
404 |
+
"hendrycksTest-jurisprudence": 0,
|
405 |
+
"hendrycksTest-logical_fallacies": 0,
|
406 |
+
"hendrycksTest-machine_learning": 0,
|
407 |
+
"hendrycksTest-management": 0,
|
408 |
+
"hendrycksTest-marketing": 0,
|
409 |
+
"hendrycksTest-medical_genetics": 0,
|
410 |
+
"hendrycksTest-miscellaneous": 0,
|
411 |
+
"hendrycksTest-moral_disputes": 0,
|
412 |
+
"hendrycksTest-moral_scenarios": 0,
|
413 |
+
"hendrycksTest-nutrition": 0,
|
414 |
+
"hendrycksTest-philosophy": 0,
|
415 |
+
"hendrycksTest-prehistory": 0,
|
416 |
+
"hendrycksTest-professional_accounting": 0,
|
417 |
+
"hendrycksTest-professional_law": 0,
|
418 |
+
"hendrycksTest-professional_medicine": 0,
|
419 |
+
"hendrycksTest-professional_psychology": 0,
|
420 |
+
"hendrycksTest-public_relations": 0,
|
421 |
+
"hendrycksTest-security_studies": 0,
|
422 |
+
"hendrycksTest-sociology": 0,
|
423 |
+
"hendrycksTest-us_foreign_policy": 0,
|
424 |
+
"hendrycksTest-virology": 0,
|
425 |
+
"hendrycksTest-world_religions": 0
|
426 |
+
}
|
427 |
+
}
|
3b977b77b/evaluation/lm1-3b9-77b-results_lm-eval_global_step73814_2022-12-03-12-23-46.csv
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
task,metric,value,err,version
|
2 |
+
arc_challenge,acc,0.26535836177474403,0.012902554762313967,0
|
3 |
+
arc_challenge,acc_norm,0.295221843003413,0.013329750293382316,0
|
4 |
+
arc_easy,acc,0.5896464646464646,0.010093531255765457,0
|
5 |
+
arc_easy,acc_norm,0.5404040404040404,0.010226230740889027,0
|
6 |
+
boolq,acc,0.5859327217125382,0.008614932353134947,1
|
7 |
+
hellaswag,acc,0.40689105755825533,0.004902502514738602,0
|
8 |
+
hellaswag,acc_norm,0.5210117506472814,0.0049853735507751065,0
|
9 |
+
sciq,acc,0.851,0.011266140684632175,0
|
10 |
+
sciq,acc_norm,0.795,0.012772554096113132,0
|
3b977b77b/evaluation/lm1-3b9-77b-results_lm-eval_global_step73814_2022-12-03-12-23-46.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"results": {
|
3 |
+
"boolq": {
|
4 |
+
"acc": 0.5859327217125382,
|
5 |
+
"acc_stderr": 0.008614932353134947
|
6 |
+
},
|
7 |
+
"hellaswag": {
|
8 |
+
"acc": 0.40689105755825533,
|
9 |
+
"acc_stderr": 0.004902502514738602,
|
10 |
+
"acc_norm": 0.5210117506472814,
|
11 |
+
"acc_norm_stderr": 0.0049853735507751065
|
12 |
+
},
|
13 |
+
"arc_challenge": {
|
14 |
+
"acc": 0.26535836177474403,
|
15 |
+
"acc_stderr": 0.012902554762313967,
|
16 |
+
"acc_norm": 0.295221843003413,
|
17 |
+
"acc_norm_stderr": 0.013329750293382316
|
18 |
+
},
|
19 |
+
"arc_easy": {
|
20 |
+
"acc": 0.5896464646464646,
|
21 |
+
"acc_stderr": 0.010093531255765457,
|
22 |
+
"acc_norm": 0.5404040404040404,
|
23 |
+
"acc_norm_stderr": 0.010226230740889027
|
24 |
+
},
|
25 |
+
"sciq": {
|
26 |
+
"acc": 0.851,
|
27 |
+
"acc_stderr": 0.011266140684632175,
|
28 |
+
"acc_norm": 0.795,
|
29 |
+
"acc_norm_stderr": 0.012772554096113132
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"versions": {
|
33 |
+
"boolq": 1,
|
34 |
+
"hellaswag": 0,
|
35 |
+
"arc_challenge": 0,
|
36 |
+
"arc_easy": 0,
|
37 |
+
"sciq": 0
|
38 |
+
}
|
39 |
+
}
|
3b977b77b/global_step73814/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a29fbd24b355bea1889e32dd263759d483b32734bb1972d3207e8be072849c9a
|
3 |
+
size 95016023
|
3b977b77b/global_step73814/bf16_zero_pp_rank_0_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b049d85994023c23eba0b9d2404c51171d424aa01245d8cd93fb8b94b861fdd
|
3 |
+
size 95016407
|
3b977b77b/global_step73814/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b54022ec72c9582df13e6e31f1a6ed060b4fce63a986aec7234e756a24ff7ce
|
3 |
+
size 95016109
|
3b977b77b/global_step73814/bf16_zero_pp_rank_100_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12f953b34095264a475d8e11d8d5ebaae8600f027ea1d3b4045659a12878b507
|
3 |
+
size 95016557
|
3b977b77b/global_step73814/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8b90f4da07c01695b5c96664367c860f76c1cbbc6da3cf2b8cf9063d3201c22
|
3 |
+
size 95016109
|
3b977b77b/global_step73814/bf16_zero_pp_rank_101_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b215f52d2612e6951cfc5b8127679b69ed7c805e9e7c339942d94d9e8b6c853e
|
3 |
+
size 95016429
|
3b977b77b/global_step73814/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58a16699510b0ab3166810904861d998a15c8790db4c8e2858fc372a74c6b2ef
|
3 |
+
size 95016045
|
3b977b77b/global_step73814/bf16_zero_pp_rank_102_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b4849707bed7ea018d0a4c7ceaabe4177f3dfaa874ce77f4c0638724549d4fa
|
3 |
+
size 95016429
|
3b977b77b/global_step73814/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c015e504acbd2c5e60911087b10ec281a6191786860feeed6c2c85eee9118da0
|
3 |
+
size 95016045
|
3b977b77b/global_step73814/bf16_zero_pp_rank_103_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd3871894c131010d3f845f2eac45e7794bc808fc353a2e12b98386026b1724f
|
3 |
+
size 95016429
|
3b977b77b/global_step73814/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a6f62b9427cbff50821ade3d792b84818a12c3f01f597d94945d79c6719fd1c
|
3 |
+
size 95016173
|
3b977b77b/global_step73814/bf16_zero_pp_rank_104_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc9a83336a48a661294f5cb9cc859a0d66cca4c15eb3361498b405edaa524f84
|
3 |
+
size 95016557
|
3b977b77b/global_step73814/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c696cfbeb527257264d8d6a35dc7692fccb73b1d970735d7811b24ce568b3b8c
|
3 |
+
size 95016045
|
3b977b77b/global_step73814/bf16_zero_pp_rank_105_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b8380086d03c69bc6f7d39793cf0c4e999903953a56a3c5f2f2de955e554fc3
|
3 |
+
size 95016493
|
3b977b77b/global_step73814/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce0d66ac9183255cc40ced499db1222e2db3c89250b55f53f106d713806615e4
|
3 |
+
size 95016109
|
3b977b77b/global_step73814/bf16_zero_pp_rank_106_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38a89983e7bb35da7884615a40f785cf49f12e39fc095204e1fe19e67193b1c1
|
3 |
+
size 95016493
|
3b977b77b/global_step73814/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0599ed0ae07c775b652e5ec04088f9fd1e3435ea502e7cf7924a4e85eeb57a1c
|
3 |
+
size 95016109
|
3b977b77b/global_step73814/bf16_zero_pp_rank_107_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c900173603d7dd19faa0fe74995226801013fcf82e3afef12bf34216c0531a7e
|
3 |
+
size 95016493
|
3b977b77b/global_step73814/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d52946ac110c881029f050c7d1e00fd5f2b680a882e39c14df45da435e73fae
|
3 |
+
size 95016109
|
3b977b77b/global_step73814/bf16_zero_pp_rank_108_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ee9b906ea4f0f7bb0460dae8427a0a21eaaae7842fd9ba5ef1447fd567a5d2e
|
3 |
+
size 95016429
|
3b977b77b/global_step73814/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afc535b58fc224dd1cdc2fb1b5fa4b66c1acbdcaca435884425ddc7b8ec54886
|
3 |
+
size 95016109
|
3b977b77b/global_step73814/bf16_zero_pp_rank_109_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0641f5b851f1592f6d16eb5c2f4ec17fdc16c5e237d4eb349262eea73001d5b2
|
3 |
+
size 95016493
|
3b977b77b/global_step73814/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3523efc349ae5ab1899f6162a70c1870387adb9986abcfe2fa2390065f360a5a
|
3 |
+
size 95016034
|
3b977b77b/global_step73814/bf16_zero_pp_rank_10_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58fba351538a1aa9045b7013d2aa8fdde2b97a769fe8b9ec5d2be6c430e9d6b5
|
3 |
+
size 95016418
|
3b977b77b/global_step73814/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3df7370c0908abc119ebad2983e149fec05ea113068fd71689c57ba6a0d82da2
|
3 |
+
size 95016045
|
3b977b77b/global_step73814/bf16_zero_pp_rank_110_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8471a8ae65b6bc287ddc06a6ca9e213e002efca6f5e0051b8f4c5a1ebf1d0c59
|
3 |
+
size 95016429
|
3b977b77b/global_step73814/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55c67a06d5f8ffedbea94f8d7395f36c222683cc3fe6e3051e387ae1b39acbde
|
3 |
+
size 95016045
|
3b977b77b/global_step73814/bf16_zero_pp_rank_111_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfbe0a6fac28e62c266236bc0342078cff6717bfaaf5690a43d638d894b647ae
|
3 |
+
size 95016493
|
3b977b77b/global_step73814/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38b15c8ee479c7173289455af05e99230c2b8659a1eec826a548f12c72a1a009
|
3 |
+
size 95016109
|
3b977b77b/global_step73814/bf16_zero_pp_rank_112_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:801a2addd491cbeba1fa75c035e961dd4241dd0ec5e4aeccbc95a0896cc41f67
|
3 |
+
size 95016493
|
3b977b77b/global_step73814/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3bb80da486622ac7ccb63093e6034b1799c683f9c85c9af54d56d0c067fc3249
|
3 |
+
size 95016173
|
3b977b77b/global_step73814/bf16_zero_pp_rank_113_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f19838348110bee85b68ce287bd14a230e3887435cf8414f9ee06bf91a737a03
|
3 |
+
size 95016557
|
3b977b77b/global_step73814/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e8a73bfd2885fb5e9c6353e7fe0539a43f2976aa98114316d017b202b8cc817
|
3 |
+
size 95016109
|
3b977b77b/global_step73814/bf16_zero_pp_rank_114_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16417fab42c658d2bdf1eefb6dd771b315b9fdb04c29cec9a0bb7fdc0ba117f2
|
3 |
+
size 95016493
|
3b977b77b/global_step73814/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63d4328db902e7bbb9a932a90c0c17380dc4d2cf2ced33a56eda0e9dfd9470e5
|
3 |
+
size 95016109
|
3b977b77b/global_step73814/bf16_zero_pp_rank_115_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7c79f278d8fb9d8b5f1a57a31ac5b74470890af3e88bb94abb27f41439e66a3e
|
3 |
+
size 95016429
|
3b977b77b/global_step73814/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46f0f92b6e903cb07bedb26574fb8ae7dda99ed5a46ac7a6783869487edcbf98
|
3 |
+
size 95016045
|
3b977b77b/global_step73814/bf16_zero_pp_rank_116_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1846c6c771081ddbd642053a96590e5fdfd642f01ac141d1be43060ae4d29cf
|
3 |
+
size 95016429
|
3b977b77b/global_step73814/bf16_zero_pp_rank_117_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32c034732b37f75f30e1e67e5c91e22d3e984ad5ccdfb342f1209fc16d52f520
|
3 |
+
size 95016045
|
3b977b77b/global_step73814/bf16_zero_pp_rank_117_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ef434eb7c5a2c0e5a3e89001916acad5a8ef2da4cd7fd9cacb1312fd59e80cd
|
3 |
+
size 95016429
|
3b977b77b/global_step73814/bf16_zero_pp_rank_118_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e93825d773e0c1c1a725f827a9867edc8faf40d46cbf81b40f97ce8ee8f68fe
|
3 |
+
size 95016109
|
3b977b77b/global_step73814/bf16_zero_pp_rank_118_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ea553a9f586278dc11a3e2d103820f446129281d2c3875397937b5b3cb753cf2
|
3 |
+
size 95016557
|
3b977b77b/global_step73814/bf16_zero_pp_rank_119_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:262e99597c557c99e102b2e42314d1ea289470adedd7c9a6c537653a3cf52869
|
3 |
+
size 95016173
|
3b977b77b/global_step73814/bf16_zero_pp_rank_119_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c97a25ec429af9e992d7a5b2c0e0ba354a38327699d2b131acdc9d297fc70b8
|
3 |
+
size 95016557
|
3b977b77b/global_step73814/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f325516ed422a64e545f8579303f00c2fd40f506254f0adc28c43c94e9b6e12c
|
3 |
+
size 95016034
|
3b977b77b/global_step73814/bf16_zero_pp_rank_11_mp_rank_01_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91210069813109fdbeefc831d4fba1bcd9905e67a0b724125fc260a754071a8e
|
3 |
+
size 95016418
|