|
task,metric,value,err,version
|
|
anli_r1,acc,0.347,0.015060472031706624,0
|
|
anli_r2,acc,0.325,0.014818724459095527,0
|
|
anli_r3,acc,0.36083333333333334,0.013869180252444864,0
|
|
arc_challenge,acc,0.257679180887372,0.012780770562768405,0
|
|
arc_challenge,acc_norm,0.28071672354948807,0.013131238126975574,0
|
|
arc_easy,acc,0.5542929292929293,0.01019911818332299,0
|
|
arc_easy,acc_norm,0.5223063973063973,0.010249568404555652,0
|
|
boolq,acc,0.5929663608562691,0.008592562887068871,1
|
|
cb,acc,0.4642857142857143,0.06724777654937658,1
|
|
cb,f1,0.2842465753424657,,1
|
|
copa,acc,0.62,0.04878317312145632,0
|
|
hellaswag,acc,0.3514240191196973,0.0047643939851110305,0
|
|
hellaswag,acc_norm,0.42859988050189207,0.004938643787869535,0
|
|
piqa,acc,0.6871599564744287,0.010817714425701104,0
|
|
piqa,acc_norm,0.6828073993471164,0.01085815545438087,0
|
|
rte,acc,0.4548736462093863,0.029973636495415252,0
|
|
sciq,acc,0.873,0.010534798620855743,0
|
|
sciq,acc_norm,0.854,0.011171786285496497,0
|
|
storycloze_2016,acc,0.6264029930518439,0.011186849693644694,0
|
|
winogrande,acc,0.5122336227308603,0.01404827882040562,0
|
|
|