Adding the dummy model and fixing the rank_distance metric bug
Browse files- static/figures/Mistral-7B-Instruct-v0.1/matrix.svg +24 -24
- static/figures/Mistral-7B-Instruct-v0.1/ranks.svg +0 -0
- static/figures/Mistral-7B-Instruct-v0.1/structure.svg +0 -0
- static/figures/Mistral-7B-Instruct-v0.2/matrix.svg +24 -24
- static/figures/Mistral-7B-Instruct-v0.2/ranks.svg +0 -0
- static/figures/Mistral-7B-Instruct-v0.2/structure.svg +0 -0
- static/figures/Mistral-7B-Instruct-v0.3/matrix.svg +24 -24
- static/figures/Mistral-7B-Instruct-v0.3/ranks.svg +0 -0
- static/figures/Mistral-7B-Instruct-v0.3/structure.svg +0 -0
- static/figures/Mixtral-8x22B-Instruct-v0.1/matrix.svg +24 -24
- static/figures/Mixtral-8x22B-Instruct-v0.1/ranks.svg +0 -0
- static/figures/Mixtral-8x22B-Instruct-v0.1/structure.svg +0 -0
- static/figures/Mixtral-8x7B-Instruct-v0.1/matrix.svg +24 -24
- static/figures/Mixtral-8x7B-Instruct-v0.1/ranks.svg +0 -0
- static/figures/Mixtral-8x7B-Instruct-v0.1/structure.svg +0 -0
- static/figures/Qwen2-72B-Instruct/matrix.svg +24 -24
- static/figures/Qwen2-72B-Instruct/ranks.svg +0 -0
- static/figures/Qwen2-72B-Instruct/structure.svg +0 -0
- static/figures/Qwen2-7B-Instruct/matrix.svg +136 -136
- static/figures/Qwen2-7B-Instruct/ranks.svg +0 -0
- static/figures/Qwen2-7B-Instruct/structure.svg +0 -0
- static/figures/cardinal.svg +240 -213
- static/figures/command_r_plus/matrix.svg +24 -24
- static/figures/command_r_plus/ranks.svg +0 -0
- static/figures/command_r_plus/structure.svg +0 -0
- static/figures/dummy/matrix.svg +1927 -0
- static/figures/dummy/ranks.svg +0 -0
- static/figures/dummy/structure.svg +0 -0
- static/figures/gpt-3.5-turbo-0125/matrix.svg +24 -24
- static/figures/gpt-3.5-turbo-0125/ranks.svg +0 -0
- static/figures/gpt-3.5-turbo-0125/structure.svg +0 -0
- static/figures/gpt-4o-0513/matrix.svg +24 -24
- static/figures/gpt-4o-0513/ranks.svg +0 -0
- static/figures/gpt-4o-0513/structure.svg +0 -0
- static/figures/llama_3_70b_instruct/matrix.svg +24 -24
- static/figures/llama_3_70b_instruct/ranks.svg +0 -0
- static/figures/llama_3_70b_instruct/structure.svg +0 -0
- static/figures/llama_3_8b_instruct/matrix.svg +24 -24
- static/figures/llama_3_8b_instruct/ranks.svg +0 -0
- static/figures/llama_3_8b_instruct/structure.svg +0 -0
- static/figures/ordinal.svg +212 -185
- static/figures/phi-3-medium-128k-instruct/matrix.svg +24 -24
- static/figures/phi-3-medium-128k-instruct/ranks.svg +0 -0
- static/figures/phi-3-medium-128k-instruct/structure.svg +0 -0
- static/figures/phi-3-mini-128k-instruct/matrix.svg +24 -24
- static/figures/phi-3-mini-128k-instruct/ranks.svg +0 -0
- static/figures/phi-3-mini-128k-instruct/structure.svg +0 -0
- static/leaderboard.csv +15 -14
static/figures/Mistral-7B-Instruct-v0.1/matrix.svg
CHANGED
static/figures/Mistral-7B-Instruct-v0.1/ranks.svg
CHANGED
static/figures/Mistral-7B-Instruct-v0.1/structure.svg
CHANGED
static/figures/Mistral-7B-Instruct-v0.2/matrix.svg
CHANGED
static/figures/Mistral-7B-Instruct-v0.2/ranks.svg
CHANGED
static/figures/Mistral-7B-Instruct-v0.2/structure.svg
CHANGED
static/figures/Mistral-7B-Instruct-v0.3/matrix.svg
CHANGED
static/figures/Mistral-7B-Instruct-v0.3/ranks.svg
CHANGED
static/figures/Mistral-7B-Instruct-v0.3/structure.svg
CHANGED
static/figures/Mixtral-8x22B-Instruct-v0.1/matrix.svg
CHANGED
static/figures/Mixtral-8x22B-Instruct-v0.1/ranks.svg
CHANGED
static/figures/Mixtral-8x22B-Instruct-v0.1/structure.svg
CHANGED
static/figures/Mixtral-8x7B-Instruct-v0.1/matrix.svg
CHANGED
static/figures/Mixtral-8x7B-Instruct-v0.1/ranks.svg
CHANGED
static/figures/Mixtral-8x7B-Instruct-v0.1/structure.svg
CHANGED
static/figures/Qwen2-72B-Instruct/matrix.svg
CHANGED
static/figures/Qwen2-72B-Instruct/ranks.svg
CHANGED
static/figures/Qwen2-72B-Instruct/structure.svg
CHANGED
static/figures/Qwen2-7B-Instruct/matrix.svg
CHANGED
static/figures/Qwen2-7B-Instruct/ranks.svg
CHANGED
static/figures/Qwen2-7B-Instruct/structure.svg
CHANGED
static/figures/cardinal.svg
CHANGED
static/figures/command_r_plus/matrix.svg
CHANGED
static/figures/command_r_plus/ranks.svg
CHANGED
static/figures/command_r_plus/structure.svg
CHANGED
static/figures/dummy/matrix.svg
ADDED
static/figures/dummy/ranks.svg
ADDED
static/figures/dummy/structure.svg
ADDED
static/figures/gpt-3.5-turbo-0125/matrix.svg
CHANGED
static/figures/gpt-3.5-turbo-0125/ranks.svg
CHANGED
static/figures/gpt-3.5-turbo-0125/structure.svg
CHANGED
static/figures/gpt-4o-0513/matrix.svg
CHANGED
static/figures/gpt-4o-0513/ranks.svg
CHANGED
static/figures/gpt-4o-0513/structure.svg
CHANGED
static/figures/llama_3_70b_instruct/matrix.svg
CHANGED
static/figures/llama_3_70b_instruct/ranks.svg
CHANGED
static/figures/llama_3_70b_instruct/structure.svg
CHANGED
static/figures/llama_3_8b_instruct/matrix.svg
CHANGED
static/figures/llama_3_8b_instruct/ranks.svg
CHANGED
static/figures/llama_3_8b_instruct/structure.svg
CHANGED
static/figures/ordinal.svg
CHANGED
static/figures/phi-3-medium-128k-instruct/matrix.svg
CHANGED
static/figures/phi-3-medium-128k-instruct/ranks.svg
CHANGED
static/figures/phi-3-medium-128k-instruct/structure.svg
CHANGED
static/figures/phi-3-mini-128k-instruct/matrix.svg
CHANGED
static/figures/phi-3-mini-128k-instruct/ranks.svg
CHANGED
static/figures/phi-3-mini-128k-instruct/structure.svg
CHANGED
static/leaderboard.csv
CHANGED
@@ -1,15 +1,16 @@
|
|
1 |
Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Rank Distance,CFI,SRMR,RMSEA,Cronbach alpha
|
2 |
-
phi-3-mini-128k-instruct,0.
|
3 |
-
phi-3-medium-128k-instruct,0.
|
4 |
-
Mistral-7B-Instruct-v0.1,0.
|
5 |
-
Mistral-7B-Instruct-v0.2,0.
|
6 |
-
Mistral-7B-Instruct-v0.3,0.
|
7 |
-
Mixtral-8x7B-Instruct-v0.1,0.
|
8 |
-
Mixtral-8x22B-Instruct-v0.1,0.
|
9 |
-
command_r_plus,0.
|
10 |
-
llama_3_8b_instruct,0.
|
11 |
-
llama_3_70b_instruct,0.
|
12 |
-
Qwen2-7B-Instruct,0.
|
13 |
-
Qwen2-72B-Instruct,0.
|
14 |
-
gpt-3.5-turbo-0125,0.
|
15 |
-
gpt-4o-0513,0.
|
|
|
|
1 |
Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Rank Distance,CFI,SRMR,RMSEA,Cronbach alpha
|
2 |
+
phi-3-mini-128k-instruct,0.2945326278659612,0.21977039084066838,0.039299993295009855,0.44444444444444453,0.18377777777777776,0.5171888888888889,0.5129444444444444,0.11153354438597567
|
3 |
+
phi-3-medium-128k-instruct,0.3156966490299824,0.2171376042216005,0.09692037989916814,0.457516339869281,0.13975555555555555,0.7127888888888889,0.7074888888888888,0.30459548048923546
|
4 |
+
Mistral-7B-Instruct-v0.1,0.3271604938271605,0.24519127388059453,0.027216280472015988,0.5326797385620915,0.22268888888888888,0.32623333333333326,0.3056444444444445,0.039724970488267154
|
5 |
+
Mistral-7B-Instruct-v0.2,0.35714285714285715,0.20754075141840592,0.14417876497818388,0.35620915032679745,0.08117777777777778,0.8068555555555555,0.8051,0.1781386309574924
|
6 |
+
Mistral-7B-Instruct-v0.3,0.3262786596119929,0.22762282302236875,0.07960539866974455,0.3758169934640523,0.15823333333333334,0.6148222222222223,0.6058111111111111,0.1684008059863923
|
7 |
+
Mixtral-8x7B-Instruct-v0.1,0.4514991181657849,0.2729751954201687,0.21473356319081474,0.42156862745098034,0.13543333333333335,0.7121888888888889,0.7086555555555556,0.3048222445803502
|
8 |
+
Mixtral-8x22B-Instruct-v0.1,0.345679012345679,0.2332752639334127,0.1414001940345544,0.2614379084967321,0.11905555555555554,0.8129333333333333,0.8111777777777778,0.30037006331478344
|
9 |
+
command_r_plus,0.5864197530864198,0.4122252692122199,0.3429686514651868,0.326797385620915,0.2763777777777778,0.5177555555555555,0.5135777777777777,0.41990575822570303
|
10 |
+
llama_3_8b_instruct,0.45943562610229277,0.31337997117163063,0.24527785038654715,0.4738562091503269,0.21058888888888885,0.6220777777777777,0.6159777777777777,0.34063121481548086
|
11 |
+
llama_3_70b_instruct,0.8156966490299823,0.6573008320468826,0.607020698814379,0.16013071895424835,0.4662222222222222,0.25131111111111115,0.2503333333333333,0.6831776343408991
|
12 |
+
Qwen2-7B-Instruct,0.5299823633156967,0.3698065688176855,0.25108519506513916,0.43790849673202614,0.2508,0.4224444444444444,0.41727777777777786,0.35074905805286155
|
13 |
+
Qwen2-72B-Instruct,0.6296296296296297,0.47375903651432216,0.6465993243020925,0.16993464052287588,0.06031111111111111,0.9069111111111111,0.9069555555555555,0.6009242274989618
|
14 |
+
gpt-3.5-turbo-0125,0.2627865961199294,0.17582321203907045,0.08240359836763214,0.4901960784313725,0.08906666666666668,0.7106777777777777,0.7033,0.06790170442358906
|
15 |
+
gpt-4o-0513,0.7883597883597884,0.595862330808094,0.5122163952167618,0.13725490196078427,0.4056777777777778,0.24250000000000016,0.23266666666666658,0.5206391872554754
|
16 |
+
dummy,0.2522045855379189,0.1790619751374967,0.004314900344882581,0.588235294117647,0.18077777777777776,0.41885555555555554,0.40022222222222215,-0.17916653102441288
|