|
dataset,prompt,metric,value
|
|
xnli_ar,GPT-3 style_arht,accuracy,0.41405622489959837
|
|
xnli_ar,MNLI crowdsource_arht,accuracy,0.3791164658634538
|
|
xnli_ar,can we infer_arht,accuracy,0.3530120481927711
|
|
xnli_ar,guaranteed/possible/impossible_arht,accuracy,0.42289156626506025
|
|
xnli_ar,justified in saying_arht,accuracy,0.3863453815261044
|
|
xnli_ar,median,accuracy,0.3863453815261044
|
|
xnli_es,GPT-3 style_esht,accuracy,0.44417670682730925
|
|
xnli_es,MNLI crowdsource_esht,accuracy,0.3337349397590361
|
|
xnli_es,can we infer_esht,accuracy,0.3333333333333333
|
|
xnli_es,guaranteed/possible/impossible_esht,accuracy,0.5682730923694779
|
|
xnli_es,justified in saying_esht,accuracy,0.3333333333333333
|
|
xnli_es,median,accuracy,0.3337349397590361
|
|
xnli_fr,GPT-3 style_frht,accuracy,0.44457831325301206
|
|
xnli_fr,MNLI crowdsource_frht,accuracy,0.3453815261044177
|
|
xnli_fr,can we infer_frht,accuracy,0.5847389558232932
|
|
xnli_fr,guaranteed/possible/impossible_frht,accuracy,0.4951807228915663
|
|
xnli_fr,justified in saying_frht,accuracy,0.5775100401606426
|
|
xnli_fr,median,accuracy,0.4951807228915663
|
|
xnli_hi,GPT-3 style_hiht,accuracy,0.3481927710843373
|
|
xnli_hi,MNLI crowdsource_hiht,accuracy,0.491566265060241
|
|
xnli_hi,can we infer_hiht,accuracy,0.44538152610441767
|
|
xnli_hi,guaranteed/possible/impossible_hiht,accuracy,0.6020080321285141
|
|
xnli_hi,justified in saying_hiht,accuracy,0.41124497991967873
|
|
xnli_hi,median,accuracy,0.44538152610441767
|
|
xnli_sw,GPT-3 style_swht,accuracy,0.3409638554216867
|
|
xnli_sw,MNLI crowdsource_swht,accuracy,0.3333333333333333
|
|
xnli_sw,can we infer_swht,accuracy,0.37710843373493974
|
|
xnli_sw,guaranteed/possible/impossible_swht,accuracy,0.3646586345381526
|
|
xnli_sw,justified in saying_swht,accuracy,0.3497991967871486
|
|
xnli_sw,median,accuracy,0.3497991967871486
|
|
xnli_ur,GPT-3 style_urht,accuracy,0.41767068273092367
|
|
xnli_ur,MNLI crowdsource_urht,accuracy,0.38433734939759034
|
|
xnli_ur,can we infer_urht,accuracy,0.3389558232931727
|
|
xnli_ur,guaranteed/possible/impossible_urht,accuracy,0.342570281124498
|
|
xnli_ur,justified in saying_urht,accuracy,0.3461847389558233
|
|
xnli_ur,median,accuracy,0.3461847389558233
|
|
xnli_vi,GPT-3 style_viht,accuracy,0.45461847389558235
|
|
xnli_vi,MNLI crowdsource_viht,accuracy,0.37269076305220883
|
|
xnli_vi,can we infer_viht,accuracy,0.39598393574297186
|
|
xnli_vi,guaranteed/possible/impossible_viht,accuracy,0.46987951807228917
|
|
xnli_vi,justified in saying_viht,accuracy,0.37028112449799194
|
|
xnli_vi,median,accuracy,0.39598393574297186
|
|
xnli_zh,GPT-3 style_zhht,accuracy,0.37710843373493974
|
|
xnli_zh,MNLI crowdsource_zhht,accuracy,0.3257028112449799
|
|
xnli_zh,can we infer_zhht,accuracy,0.3369477911646586
|
|
xnli_zh,guaranteed/possible/impossible_zhht,accuracy,0.3333333333333333
|
|
xnli_zh,justified in saying_zhht,accuracy,0.41365461847389556
|
|
xnli_zh,median,accuracy,0.3369477911646586
|
|
multiple,average,multiple,0.3861947791164659
|
|
|