task,metric,value,err,version anli_r1,acc,0.327,0.014842213153411245,0 anli_r2,acc,0.338,0.01496596071022449,0 anli_r3,acc,0.3358333333333333,0.01363926119093289,0 arc_challenge,acc,0.25,0.012653835621466646,0 arc_challenge,acc_norm,0.28071672354948807,0.013131238126975576,0 arc_easy,acc,0.5547138047138047,0.010198171137873871,0 arc_easy,acc_norm,0.5568181818181818,0.010193324837773498,0 boolq,acc,0.590519877675841,0.008600549751320923,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.28584474885844746,,1 copa,acc,0.66,0.04760952285695237,0 hellaswag,acc,0.35152360087631945,0.004764703145680279,0 hellaswag,acc_norm,0.42620991834295957,0.004935143791573811,0 piqa,acc,0.690424374319913,0.010786656752183345,0 piqa,acc_norm,0.6849836779107725,0.010838072746240653,0 rte,acc,0.47653429602888087,0.030063300411902652,0 sciq,acc,0.88,0.010281328012747398,0 sciq,acc_norm,0.873,0.010534798620855759,0 storycloze_2016,acc,0.6264029930518439,0.011186849693644694,0 winogrande,acc,0.5138121546961326,0.014047122916440419,0