grg commited on
Commit
bbc1fb3
1 Parent(s): b19d196

Adding the dummy model and fixing the rank_distance metric bug

Browse files
Files changed (48) hide show
  1. static/figures/Mistral-7B-Instruct-v0.1/matrix.svg +24 -24
  2. static/figures/Mistral-7B-Instruct-v0.1/ranks.svg +0 -0
  3. static/figures/Mistral-7B-Instruct-v0.1/structure.svg +0 -0
  4. static/figures/Mistral-7B-Instruct-v0.2/matrix.svg +24 -24
  5. static/figures/Mistral-7B-Instruct-v0.2/ranks.svg +0 -0
  6. static/figures/Mistral-7B-Instruct-v0.2/structure.svg +0 -0
  7. static/figures/Mistral-7B-Instruct-v0.3/matrix.svg +24 -24
  8. static/figures/Mistral-7B-Instruct-v0.3/ranks.svg +0 -0
  9. static/figures/Mistral-7B-Instruct-v0.3/structure.svg +0 -0
  10. static/figures/Mixtral-8x22B-Instruct-v0.1/matrix.svg +24 -24
  11. static/figures/Mixtral-8x22B-Instruct-v0.1/ranks.svg +0 -0
  12. static/figures/Mixtral-8x22B-Instruct-v0.1/structure.svg +0 -0
  13. static/figures/Mixtral-8x7B-Instruct-v0.1/matrix.svg +24 -24
  14. static/figures/Mixtral-8x7B-Instruct-v0.1/ranks.svg +0 -0
  15. static/figures/Mixtral-8x7B-Instruct-v0.1/structure.svg +0 -0
  16. static/figures/Qwen2-72B-Instruct/matrix.svg +24 -24
  17. static/figures/Qwen2-72B-Instruct/ranks.svg +0 -0
  18. static/figures/Qwen2-72B-Instruct/structure.svg +0 -0
  19. static/figures/Qwen2-7B-Instruct/matrix.svg +136 -136
  20. static/figures/Qwen2-7B-Instruct/ranks.svg +0 -0
  21. static/figures/Qwen2-7B-Instruct/structure.svg +0 -0
  22. static/figures/cardinal.svg +240 -213
  23. static/figures/command_r_plus/matrix.svg +24 -24
  24. static/figures/command_r_plus/ranks.svg +0 -0
  25. static/figures/command_r_plus/structure.svg +0 -0
  26. static/figures/dummy/matrix.svg +1927 -0
  27. static/figures/dummy/ranks.svg +0 -0
  28. static/figures/dummy/structure.svg +0 -0
  29. static/figures/gpt-3.5-turbo-0125/matrix.svg +24 -24
  30. static/figures/gpt-3.5-turbo-0125/ranks.svg +0 -0
  31. static/figures/gpt-3.5-turbo-0125/structure.svg +0 -0
  32. static/figures/gpt-4o-0513/matrix.svg +24 -24
  33. static/figures/gpt-4o-0513/ranks.svg +0 -0
  34. static/figures/gpt-4o-0513/structure.svg +0 -0
  35. static/figures/llama_3_70b_instruct/matrix.svg +24 -24
  36. static/figures/llama_3_70b_instruct/ranks.svg +0 -0
  37. static/figures/llama_3_70b_instruct/structure.svg +0 -0
  38. static/figures/llama_3_8b_instruct/matrix.svg +24 -24
  39. static/figures/llama_3_8b_instruct/ranks.svg +0 -0
  40. static/figures/llama_3_8b_instruct/structure.svg +0 -0
  41. static/figures/ordinal.svg +212 -185
  42. static/figures/phi-3-medium-128k-instruct/matrix.svg +24 -24
  43. static/figures/phi-3-medium-128k-instruct/ranks.svg +0 -0
  44. static/figures/phi-3-medium-128k-instruct/structure.svg +0 -0
  45. static/figures/phi-3-mini-128k-instruct/matrix.svg +24 -24
  46. static/figures/phi-3-mini-128k-instruct/ranks.svg +0 -0
  47. static/figures/phi-3-mini-128k-instruct/structure.svg +0 -0
  48. static/leaderboard.csv +15 -14
static/figures/Mistral-7B-Instruct-v0.1/matrix.svg CHANGED
static/figures/Mistral-7B-Instruct-v0.1/ranks.svg CHANGED
static/figures/Mistral-7B-Instruct-v0.1/structure.svg CHANGED
static/figures/Mistral-7B-Instruct-v0.2/matrix.svg CHANGED
static/figures/Mistral-7B-Instruct-v0.2/ranks.svg CHANGED
static/figures/Mistral-7B-Instruct-v0.2/structure.svg CHANGED
static/figures/Mistral-7B-Instruct-v0.3/matrix.svg CHANGED
static/figures/Mistral-7B-Instruct-v0.3/ranks.svg CHANGED
static/figures/Mistral-7B-Instruct-v0.3/structure.svg CHANGED
static/figures/Mixtral-8x22B-Instruct-v0.1/matrix.svg CHANGED
static/figures/Mixtral-8x22B-Instruct-v0.1/ranks.svg CHANGED
static/figures/Mixtral-8x22B-Instruct-v0.1/structure.svg CHANGED
static/figures/Mixtral-8x7B-Instruct-v0.1/matrix.svg CHANGED
static/figures/Mixtral-8x7B-Instruct-v0.1/ranks.svg CHANGED
static/figures/Mixtral-8x7B-Instruct-v0.1/structure.svg CHANGED
static/figures/Qwen2-72B-Instruct/matrix.svg CHANGED
static/figures/Qwen2-72B-Instruct/ranks.svg CHANGED
static/figures/Qwen2-72B-Instruct/structure.svg CHANGED
static/figures/Qwen2-7B-Instruct/matrix.svg CHANGED
static/figures/Qwen2-7B-Instruct/ranks.svg CHANGED
static/figures/Qwen2-7B-Instruct/structure.svg CHANGED
static/figures/cardinal.svg CHANGED
static/figures/command_r_plus/matrix.svg CHANGED
static/figures/command_r_plus/ranks.svg CHANGED
static/figures/command_r_plus/structure.svg CHANGED
static/figures/dummy/matrix.svg ADDED
static/figures/dummy/ranks.svg ADDED
static/figures/dummy/structure.svg ADDED
static/figures/gpt-3.5-turbo-0125/matrix.svg CHANGED
static/figures/gpt-3.5-turbo-0125/ranks.svg CHANGED
static/figures/gpt-3.5-turbo-0125/structure.svg CHANGED
static/figures/gpt-4o-0513/matrix.svg CHANGED
static/figures/gpt-4o-0513/ranks.svg CHANGED
static/figures/gpt-4o-0513/structure.svg CHANGED
static/figures/llama_3_70b_instruct/matrix.svg CHANGED
static/figures/llama_3_70b_instruct/ranks.svg CHANGED
static/figures/llama_3_70b_instruct/structure.svg CHANGED
static/figures/llama_3_8b_instruct/matrix.svg CHANGED
static/figures/llama_3_8b_instruct/ranks.svg CHANGED
static/figures/llama_3_8b_instruct/structure.svg CHANGED
static/figures/ordinal.svg CHANGED
static/figures/phi-3-medium-128k-instruct/matrix.svg CHANGED
static/figures/phi-3-medium-128k-instruct/ranks.svg CHANGED
static/figures/phi-3-medium-128k-instruct/structure.svg CHANGED
static/figures/phi-3-mini-128k-instruct/matrix.svg CHANGED
static/figures/phi-3-mini-128k-instruct/ranks.svg CHANGED
static/figures/phi-3-mini-128k-instruct/structure.svg CHANGED
static/leaderboard.csv CHANGED
@@ -1,15 +1,16 @@
1
  Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Rank Distance,CFI,SRMR,RMSEA,Cronbach alpha
2
- phi-3-mini-128k-instruct,0.258309591642925,0.18707102480796897,0.039299993295009855,0.7387387387387387,0.18377777777777776,0.5171888888888889,0.5129444444444444,0.11153354438597567
3
- phi-3-medium-128k-instruct,0.28490028490028496,0.18789267301588508,0.09692037989916814,0.7207207207207207,0.13975555555555555,0.7127888888888889,0.7074888888888888,0.30459548048923546
4
- Mistral-7B-Instruct-v0.1,0.3418803418803419,0.23063750442486428,0.027216280472015988,0.6636636636636636,0.22268888888888888,0.32623333333333326,0.3056444444444445,0.039724970488267154
5
- Mistral-7B-Instruct-v0.2,0.3342830009496676,0.1747138068267554,0.14417876497818388,0.6516516516516517,0.08117777777777778,0.8068555555555555,0.8051,0.1781386309574924
6
- Mistral-7B-Instruct-v0.3,0.3133903133903134,0.20131219867252867,0.07960539866974455,0.6126126126126126,0.15823333333333334,0.6148222222222223,0.6058111111111111,0.1684008059863923
7
- Mixtral-8x7B-Instruct-v0.1,0.43114909781576455,0.2437400779497571,0.21473356319081474,0.6846846846846846,0.13543333333333335,0.7121888888888889,0.7086555555555556,0.3048222445803502
8
- Mixtral-8x22B-Instruct-v0.1,0.29629629629629634,0.18791617935864172,0.1414001940345544,0.6696696696696696,0.11905555555555554,0.8129333333333333,0.8111777777777778,0.30037006331478344
9
- command_r_plus,0.560303893637227,0.3737946817620246,0.3429686514651868,0.6726726726726726,0.2763777777777778,0.5177555555555555,0.5135777777777777,0.41990575822570303
10
- llama_3_8b_instruct,0.4691358024691358,0.28828624999947805,0.24527785038654715,0.6996996996996997,0.21058888888888885,0.6220777777777777,0.6159777777777777,0.34063121481548086
11
- llama_3_70b_instruct,0.7701804368471036,0.5976823900754995,0.607020698814379,0.6966966966966968,0.4662222222222222,0.25131111111111115,0.2503333333333333,0.6831776343408991
12
- Qwen2-7B-Instruct,0.5251661918328584,0.3400513233761655,0.25108519506513916,0.7057057057057057,0.2508,0.4224444444444444,0.41727777777777786,0.35074905805286155
13
- Qwen2-72B-Instruct,0.5906932573599241,0.42123592516768155,0.6465993243020925,0.6426426426426426,0.06031111111111111,0.9069111111111111,0.9069555555555555,0.6009242274989618
14
- gpt-3.5-turbo-0125,0.23741690408357075,0.14920836189480854,0.08240359836763214,0.7297297297297297,0.08906666666666668,0.7106777777777777,0.7033,0.06790170442358906
15
- gpt-4o-0513,0.7340930674264008,0.5383734693976642,0.5122163952167618,0.6546546546546546,0.4056777777777778,0.24250000000000016,0.23266666666666658,0.5206391872554754
 
 
1
  Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Rank Distance,CFI,SRMR,RMSEA,Cronbach alpha
2
+ phi-3-mini-128k-instruct,0.2945326278659612,0.21977039084066838,0.039299993295009855,0.44444444444444453,0.18377777777777776,0.5171888888888889,0.5129444444444444,0.11153354438597567
3
+ phi-3-medium-128k-instruct,0.3156966490299824,0.2171376042216005,0.09692037989916814,0.457516339869281,0.13975555555555555,0.7127888888888889,0.7074888888888888,0.30459548048923546
4
+ Mistral-7B-Instruct-v0.1,0.3271604938271605,0.24519127388059453,0.027216280472015988,0.5326797385620915,0.22268888888888888,0.32623333333333326,0.3056444444444445,0.039724970488267154
5
+ Mistral-7B-Instruct-v0.2,0.35714285714285715,0.20754075141840592,0.14417876497818388,0.35620915032679745,0.08117777777777778,0.8068555555555555,0.8051,0.1781386309574924
6
+ Mistral-7B-Instruct-v0.3,0.3262786596119929,0.22762282302236875,0.07960539866974455,0.3758169934640523,0.15823333333333334,0.6148222222222223,0.6058111111111111,0.1684008059863923
7
+ Mixtral-8x7B-Instruct-v0.1,0.4514991181657849,0.2729751954201687,0.21473356319081474,0.42156862745098034,0.13543333333333335,0.7121888888888889,0.7086555555555556,0.3048222445803502
8
+ Mixtral-8x22B-Instruct-v0.1,0.345679012345679,0.2332752639334127,0.1414001940345544,0.2614379084967321,0.11905555555555554,0.8129333333333333,0.8111777777777778,0.30037006331478344
9
+ command_r_plus,0.5864197530864198,0.4122252692122199,0.3429686514651868,0.326797385620915,0.2763777777777778,0.5177555555555555,0.5135777777777777,0.41990575822570303
10
+ llama_3_8b_instruct,0.45943562610229277,0.31337997117163063,0.24527785038654715,0.4738562091503269,0.21058888888888885,0.6220777777777777,0.6159777777777777,0.34063121481548086
11
+ llama_3_70b_instruct,0.8156966490299823,0.6573008320468826,0.607020698814379,0.16013071895424835,0.4662222222222222,0.25131111111111115,0.2503333333333333,0.6831776343408991
12
+ Qwen2-7B-Instruct,0.5299823633156967,0.3698065688176855,0.25108519506513916,0.43790849673202614,0.2508,0.4224444444444444,0.41727777777777786,0.35074905805286155
13
+ Qwen2-72B-Instruct,0.6296296296296297,0.47375903651432216,0.6465993243020925,0.16993464052287588,0.06031111111111111,0.9069111111111111,0.9069555555555555,0.6009242274989618
14
+ gpt-3.5-turbo-0125,0.2627865961199294,0.17582321203907045,0.08240359836763214,0.4901960784313725,0.08906666666666668,0.7106777777777777,0.7033,0.06790170442358906
15
+ gpt-4o-0513,0.7883597883597884,0.595862330808094,0.5122163952167618,0.13725490196078427,0.4056777777777778,0.24250000000000016,0.23266666666666658,0.5206391872554754
16
+ dummy,0.2522045855379189,0.1790619751374967,0.004314900344882581,0.588235294117647,0.18077777777777776,0.41885555555555554,0.40022222222222215,-0.17916653102441288