lm1-2b8-55b-oscartasky / evaluation /rankeval /checkpoints_2b855boscartasky_5.csv
Muennighoff's picture
Add eval
83b5f1e
task,metric,value,err,version
anli_r1,acc,0.356,0.015149042659306623,0
anli_r2,acc,0.355,0.015139491543780532,0
anli_r3,acc,0.3433333333333333,0.01371263383046586,0
arc_challenge,acc,0.257679180887372,0.012780770562768395,0
arc_challenge,acc_norm,0.2781569965870307,0.013094469919538812,0
arc_easy,acc,0.5618686868686869,0.010180937100600073,0
arc_easy,acc_norm,0.5589225589225589,0.01018829322104055,0
boolq,acc,0.5770642201834862,0.008640558744656428,1
cb,acc,0.5535714285714286,0.06703189227942394,1
cb,f1,0.3074074074074074,,1
copa,acc,0.67,0.04725815626252607,0
hellaswag,acc,0.3536148177653854,0.004771143074426132,0
hellaswag,acc_norm,0.4255128460466043,0.00493410077448122,0
piqa,acc,0.6838955386289445,0.010848148455700455,0
piqa,acc_norm,0.6789989118607181,0.010892641574707904,0
rte,acc,0.48014440433212996,0.0300727231673172,0
sciq,acc,0.897,0.0096168333396958,0
sciq,acc_norm,0.893,0.009779910359847169,0
storycloze_2016,acc,0.6264029930518439,0.011186849693644694,0
winogrande,acc,0.500394632991318,0.014052481306049516,0