Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,Separability,CFI,SRMR,RMSEA phi-3-mini-128k-instruct,0.3387345679012346,0.4571976280473622,0.039299993295009855,0.281800547806919,0.963768115942029,0.7509527777777777,0.25489166666666674,0.22045000000000003 phi-3-medium-128k-instruct,0.35108024691358025,0.46871557360419164,0.09692037989916814,0.2651981204439735,0.9975845410628019,0.6727694444444445,0.2984500000000001,0.2759472222222221 Mistral-7B-Instruct-v0.1,0.20679012345679013,0.38323622857524176,0.027216280472015988,0.2829498135031582,0.995169082125604,0.500288888888889,0.45314444444444446,0.4191027777777777 Mistral-7B-Instruct-v0.2,0.39814814814814814,0.4692343788574553,0.14417876497818388,0.265188983528973,1.0,0.5787944444444445,0.35010277777777776,0.3171083333333333 Mistral-7B-Instruct-v0.3,0.2824074074074074,0.4168826678339619,0.07960539866974455,0.2742399030139009,0.9975845410628019,0.5231444444444444,0.4214972222222223,0.3914694444444443 Mixtral-8x7B-Instruct-v0.1,0.4930555555555556,0.5307045793457128,0.21473356319081474,0.2624402608740656,1.0,0.6766166666666665,0.25611666666666666,0.24065277777777772 Mixtral-8x22B-Instruct-v0.1,0.2924382716049383,0.41811429894732177,0.1414001940345544,0.2548838005881672,0.9654589371980676,0.45902777777777776,0.4849916666666666,0.4871833333333333 command_r_plus,0.5879629629629629,0.6136142726835458,0.3429686514651868,0.23811982320641845,0.963768115942029,0.7772111111111112,0.17755277777777778,0.17465277777777777 llama_3_8b_instruct,0.5007716049382716,0.5571604188191388,0.24527785038654715,0.245806400289881,0.961352657004831,0.7348277777777779,0.20952222222222228,0.20751944444444437 llama_3_70b_instruct,0.7376543209876543,0.7573878472446817,0.607020698814379,0.18525883672204868,1.0,0.8298166666666668,0.10965277777777771,0.14649722222222217 llama_3.1_8b_instruct,0.5671296296296297,0.6056589663453942,0.4295080949846363,0.22060228669473025,0.9710144927536233,0.6379333333333334,0.3225500000000001,0.3328972222222223 llama_3.1_70b_instruct,0.7739197530864198,0.78874072958529,0.691365862744007,0.1709718847084183,0.9944444444444444,0.8203805555555554,0.14023055555555552,0.17041944444444446 Qwen2-7B-Instruct,0.4529320987654321,0.5256131964101429,0.25108519506513916,0.25776537005719313,0.9855072463768116,0.6248583333333334,0.32358611111111113,0.3028361111111111 Qwen2-72B-Instruct,0.6080246913580247,0.6858608495773215,0.6465993243020925,0.20297742879025626,0.9833333333333333,0.5559722222222221,0.3575638888888889,0.39241388888888884 gpt-3.5-turbo-0125,0.23842592592592593,0.4028828123262879,0.08240359836763214,0.28728574920060357,1.0,0.4998916666666666,0.47583055555555553,0.4404444444444445 gpt-4o-0513,0.7229938271604939,0.707844597747704,0.5122163952167618,0.19201420113771173,1.0,0.7998694444444445,0.14606111111111109,0.1400583333333334 dummy,0.14814814814814814,0.3585809973377891,-0.009004148398032956,0.2928877637010999,1.0,0.5076361111111111,0.4973388888888889,0.4541638888888889