Muennighoff's picture
Organize files
b09b20a
raw
history blame
No virus
4.89 kB
dataset,prompt,metric,value
xcopa_id,C1 or C2? premise_idmt,accuracy,0.52
xcopa_id,best_option_idmt,accuracy,0.63
xcopa_id,cause_effect_idmt,accuracy,0.64
xcopa_id,i_am_hesitating_idmt,accuracy,0.66
xcopa_id,plausible_alternatives_idmt,accuracy,0.71
xcopa_id,median,accuracy,0.64
xcopa_sw,C1 or C2? premise_swmt,accuracy,0.6
xcopa_sw,best_option_swmt,accuracy,0.59
xcopa_sw,cause_effect_swmt,accuracy,0.57
xcopa_sw,i_am_hesitating_swmt,accuracy,0.61
xcopa_sw,plausible_alternatives_swmt,accuracy,0.59
xcopa_sw,median,accuracy,0.59
xcopa_ta,C1 or C2? premise_tamt,accuracy,0.6
xcopa_ta,best_option_tamt,accuracy,0.56
xcopa_ta,cause_effect_tamt,accuracy,0.58
xcopa_ta,i_am_hesitating_tamt,accuracy,0.54
xcopa_ta,plausible_alternatives_tamt,accuracy,0.54
xcopa_ta,median,accuracy,0.56
xcopa_vi,C1 or C2? premise_vimt,accuracy,0.63
xcopa_vi,best_option_vimt,accuracy,0.73
xcopa_vi,cause_effect_vimt,accuracy,0.72
xcopa_vi,i_am_hesitating_vimt,accuracy,0.71
xcopa_vi,plausible_alternatives_vimt,accuracy,0.77
xcopa_vi,median,accuracy,0.72
xcopa_zh,C1 or C2? premise_zhmt,accuracy,0.61
xcopa_zh,best_option_zhmt,accuracy,0.69
xcopa_zh,cause_effect_zhmt,accuracy,0.8
xcopa_zh,i_am_hesitating_zhmt,accuracy,0.74
xcopa_zh,plausible_alternatives_zhmt,accuracy,0.76
xcopa_zh,median,accuracy,0.74
xstory_cloze_ar,Answer Given options_armt,accuracy,0.6664460622104567
xstory_cloze_ar,Choose Story Ending_armt,accuracy,0.8385175380542687
xstory_cloze_ar,Generate Ending_armt,accuracy,0.5843812045003309
xstory_cloze_ar,Novel Correct Ending_armt,accuracy,0.827928524156188
xstory_cloze_ar,Story Continuation and Options_armt,accuracy,0.8246194573130378
xstory_cloze_ar,median,accuracy,0.8246194573130378
xstory_cloze_es,Answer Given options_esmt,accuracy,0.8325612177365983
xstory_cloze_es,Choose Story Ending_esmt,accuracy,0.8881535407015222
xstory_cloze_es,Generate Ending_esmt,accuracy,0.6776968894771674
xstory_cloze_es,Novel Correct Ending_esmt,accuracy,0.8656518861681006
xstory_cloze_es,Story Continuation and Options_esmt,accuracy,0.886168100595632
xstory_cloze_es,median,accuracy,0.8656518861681006
xstory_cloze_eu,Answer Given options_eumt,accuracy,0.5678358702845797
xstory_cloze_eu,Choose Story Ending_eumt,accuracy,0.7326273990734613
xstory_cloze_eu,Generate Ending_eumt,accuracy,0.5095962938451357
xstory_cloze_eu,Novel Correct Ending_eumt,accuracy,0.6558570483123759
xstory_cloze_eu,Story Continuation and Options_eumt,accuracy,0.7193911317008603
xstory_cloze_eu,median,accuracy,0.6558570483123759
xstory_cloze_hi,Answer Given options_himt,accuracy,0.7054930509596293
xstory_cloze_hi,Choose Story Ending_himt,accuracy,0.8041032428855063
xstory_cloze_hi,Generate Ending_himt,accuracy,0.614824619457313
xstory_cloze_hi,Novel Correct Ending_himt,accuracy,0.7584381204500331
xstory_cloze_hi,Story Continuation and Options_himt,accuracy,0.7981469225678358
xstory_cloze_hi,median,accuracy,0.7584381204500331
xstory_cloze_id,Answer Given options_idmt,accuracy,0.7326273990734613
xstory_cloze_id,Choose Story Ending_idmt,accuracy,0.8457974851091992
xstory_cloze_id,Generate Ending_idmt,accuracy,0.5678358702845797
xstory_cloze_id,Novel Correct Ending_idmt,accuracy,0.8226340172071476
xstory_cloze_id,Story Continuation and Options_idmt,accuracy,0.8246194573130378
xstory_cloze_id,median,accuracy,0.8226340172071476
xstory_cloze_zh,Answer Given options_zhmt,accuracy,0.7935142289874255
xstory_cloze_zh,Choose Story Ending_zhmt,accuracy,0.8590337524818001
xstory_cloze_zh,Generate Ending_zhmt,accuracy,0.6307081403044341
xstory_cloze_zh,Novel Correct Ending_zhmt,accuracy,0.8590337524818001
xstory_cloze_zh,Story Continuation and Options_zhmt,accuracy,0.8464592984778293
xstory_cloze_zh,median,accuracy,0.8464592984778293
xwinograd_fr,Replace_frmt,accuracy,0.5542168674698795
xwinograd_fr,True or False_frmt,accuracy,0.46987951807228917
xwinograd_fr,does underscore refer to_frmt,accuracy,0.5301204819277109
xwinograd_fr,stand for_frmt,accuracy,0.5662650602409639
xwinograd_fr,underscore refer to_frmt,accuracy,0.5783132530120482
xwinograd_fr,median,accuracy,0.5542168674698795
xwinograd_pt,Replace_ptmt,accuracy,0.5551330798479087
xwinograd_pt,True or False_ptmt,accuracy,0.4600760456273764
xwinograd_pt,does underscore refer to_ptmt,accuracy,0.5513307984790875
xwinograd_pt,stand for_ptmt,accuracy,0.532319391634981
xwinograd_pt,underscore refer to_ptmt,accuracy,0.5361216730038023
xwinograd_pt,median,accuracy,0.5361216730038023
xwinograd_zh,Replace_zhmt,accuracy,0.6130952380952381
xwinograd_zh,True or False_zhmt,accuracy,0.5416666666666666
xwinograd_zh,does underscore refer to_zhmt,accuracy,0.5793650793650794
xwinograd_zh,stand for_zhmt,accuracy,0.5158730158730159
xwinograd_zh,underscore refer to_zhmt,accuracy,0.625
xwinograd_zh,median,accuracy,0.5793650793650794
multiple,average,multiple,0.692383103411949