task,metric,value,err,version anli_r1,acc,0.335,0.014933117490932575,0 anli_r2,acc,0.327,0.014842213153411242,0 anli_r3,acc,0.3458333333333333,0.013736245342311012,0 arc_challenge,acc,0.2687713310580205,0.012955065963710691,0 arc_challenge,acc_norm,0.30204778156996587,0.013417519144716417,0 arc_easy,acc,0.6153198653198653,0.009983171707009008,0 arc_easy,acc_norm,0.5980639730639731,0.010060521220920566,0 boolq,acc,0.5908256880733945,0.008599563442397349,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.28503144654088053,,1 copa,acc,0.8,0.040201512610368445,0 hellaswag,acc,0.4422425811591316,0.004956378590571537,0 hellaswag,acc_norm,0.5832503485361482,0.004920130733271772,0 piqa,acc,0.7230685527747551,0.010440499969334535,0 piqa,acc_norm,0.733949945593036,0.010310039263352826,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.889,0.009938701010583726,0 sciq,acc_norm,0.862,0.010912152632504394,0 storycloze_2016,acc,0.7220737573490112,0.010359403651225854,0 winogrande,acc,0.5659037095501184,0.01392988255569405,0