lm1-2b8-55b-oscartasky / evaluation /rankeval /checkpoints_2b855boscartasky_1.csv
Muennighoff's picture
Add eval
83b5f1e
task,metric,value,err,version
anli_r1,acc,0.347,0.015060472031706624,0
anli_r2,acc,0.325,0.014818724459095527,0
anli_r3,acc,0.36083333333333334,0.013869180252444864,0
arc_challenge,acc,0.257679180887372,0.012780770562768405,0
arc_challenge,acc_norm,0.28071672354948807,0.013131238126975574,0
arc_easy,acc,0.5542929292929293,0.01019911818332299,0
arc_easy,acc_norm,0.5223063973063973,0.010249568404555652,0
boolq,acc,0.5929663608562691,0.008592562887068871,1
cb,acc,0.4642857142857143,0.06724777654937658,1
cb,f1,0.2842465753424657,,1
copa,acc,0.62,0.04878317312145632,0
hellaswag,acc,0.3514240191196973,0.0047643939851110305,0
hellaswag,acc_norm,0.42859988050189207,0.004938643787869535,0
piqa,acc,0.6871599564744287,0.010817714425701104,0
piqa,acc_norm,0.6828073993471164,0.01085815545438087,0
rte,acc,0.4548736462093863,0.029973636495415252,0
sciq,acc,0.873,0.010534798620855743,0
sciq,acc_norm,0.854,0.011171786285496497,0
storycloze_2016,acc,0.6264029930518439,0.011186849693644694,0
winogrande,acc,0.5122336227308603,0.01404827882040562,0