task,metric,value,err,version anli_r1,acc,0.337,0.014955087918653605,0 anli_r2,acc,0.336,0.014944140233795027,0 anli_r3,acc,0.33416666666666667,0.013622434813136781,0 arc_challenge,acc,0.2440273037542662,0.012551447627856257,0 arc_challenge,acc_norm,0.2841296928327645,0.013179442447653886,0 arc_easy,acc,0.5782828282828283,0.010133255284012325,0 arc_easy,acc_norm,0.5155723905723906,0.010254806331961897,0 boolq,acc,0.5464831804281346,0.008707182331111646,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.1940928270042194,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.4558852818163712,0.004970322156997941,0 hellaswag,acc_norm,0.5975901214897431,0.004893814890208305,0 piqa,acc,0.7540805223068553,0.010047331865625194,0 piqa,acc_norm,0.7622415669205659,0.009932525779525492,0 rte,acc,0.51985559566787,0.030072723167317184,0 sciq,acc,0.818,0.012207580637662153,0 sciq,acc_norm,0.743,0.013825416526895045,0 storycloze_2016,acc,0.7113842864778194,0.01047831178564294,0 winogrande,acc,0.5777426992896606,0.013881582030658554,0