|
task,metric,value,err,version
|
|
anli_r1,acc,0.337,0.014955087918653605,0
|
|
anli_r2,acc,0.336,0.014944140233795027,0
|
|
anli_r3,acc,0.33416666666666667,0.013622434813136781,0
|
|
arc_challenge,acc,0.2440273037542662,0.012551447627856257,0
|
|
arc_challenge,acc_norm,0.2841296928327645,0.013179442447653886,0
|
|
arc_easy,acc,0.5782828282828283,0.010133255284012325,0
|
|
arc_easy,acc_norm,0.5155723905723906,0.010254806331961897,0
|
|
boolq,acc,0.5464831804281346,0.008707182331111646,1
|
|
cb,acc,0.4107142857142857,0.0663363415035954,1
|
|
cb,f1,0.1940928270042194,,1
|
|
copa,acc,0.76,0.04292346959909283,0
|
|
hellaswag,acc,0.4558852818163712,0.004970322156997941,0
|
|
hellaswag,acc_norm,0.5975901214897431,0.004893814890208305,0
|
|
piqa,acc,0.7540805223068553,0.010047331865625194,0
|
|
piqa,acc_norm,0.7622415669205659,0.009932525779525492,0
|
|
rte,acc,0.51985559566787,0.030072723167317184,0
|
|
sciq,acc,0.818,0.012207580637662153,0
|
|
sciq,acc_norm,0.743,0.013825416526895045,0
|
|
storycloze_2016,acc,0.7113842864778194,0.01047831178564294,0
|
|
winogrande,acc,0.5777426992896606,0.013881582030658554,0
|
|
|