DanielePoterti commited on
Commit
bb0b4f1
1 Parent(s): 67dff7d

Upload 2 files

Browse files
Files changed (2) hide show
  1. src/macro_area.csv +6 -0
  2. src/question_format.csv +6 -0
src/macro_area.csv CHANGED
@@ -28,6 +28,10 @@ llama-3-8b-instruct,48.2,53.6,63.6,14.3,34.5,29.2,0.0,31.6,50.0
28
  llama-3.1-405b-instruct,85.2,87.7,84.8,100.0,82.8,83.3,50.0,84.2,100.0
29
  llama-3.1-70b-instruct,83.3,87.2,81.8,100.0,79.3,58.3,25.0,79.0,83.3
30
  llama-3.1-8b-instruct,64.8,62.0,66.7,57.1,37.9,16.7,0.0,26.3,66.7
 
 
 
 
31
  maestrale-chat-v0.4-beta,62.0,61.4,60.6,42.9,44.8,33.3,0.0,15.8,50.0
32
  mistral-7b-instruct:nitro,51.8,59.2,51.5,28.6,37.9,29.2,0.0,31.6,33.3
33
  mistral-large,87.0,89.9,81.8,85.7,93.1,83.3,25.0,84.2,100.0
@@ -36,6 +40,8 @@ mixtral-8x22b-instruct,84.3,85.5,81.8,71.4,58.6,83.3,0.0,68.4,83.3
36
  mixtral-8x7b-instruct,74.1,77.1,69.7,42.9,37.9,50.0,0.0,52.6,50.0
37
  modello-italia-9b,28.7,28.5,30.3,14.3,10.3,16.7,0.0,10.5,50.0
38
  nemotron-4-340b-instruct,75.0,77.1,57.6,71.4,62.1,66.7,25.0,73.7,50.0
 
 
39
  phi-3-medium-128k-instruct,60.2,50.8,51.5,42.9,37.9,45.8,0.0,31.6,50.0
40
  phi-3-mini-128k-instruct,36.1,27.9,39.4,42.9,37.9,37.5,0.0,42.1,66.7
41
  qwen-2-72b-instruct,84.3,79.3,72.7,85.7,79.3,75.0,0.0,79.0,100.0
 
28
  llama-3.1-405b-instruct,85.2,87.7,84.8,100.0,82.8,83.3,50.0,84.2,100.0
29
  llama-3.1-70b-instruct,83.3,87.2,81.8,100.0,79.3,58.3,25.0,79.0,83.3
30
  llama-3.1-8b-instruct,64.8,62.0,66.7,57.1,37.9,16.7,0.0,26.3,66.7
31
+ llama-3.2-11b-vision-instruct,67.6,66.5,72.7,57.1,55.2,37.5,0.0,31.6,66.7
32
+ llama-3.2-1b-instruct,16.7,15.6,6.1,14.3,27.6,8.3,0.0,15.8,33.3
33
+ llama-3.2-3b-instruct,33.3,22.9,24.2,0.0,31.0,12.5,0.0,26.3,33.3
34
+ llama-3.2-90b-vision-instruct,83.3,88.8,81.8,85.7,79.3,54.2,25.0,73.7,100.0
35
  maestrale-chat-v0.4-beta,62.0,61.4,60.6,42.9,44.8,33.3,0.0,15.8,50.0
36
  mistral-7b-instruct:nitro,51.8,59.2,51.5,28.6,37.9,29.2,0.0,31.6,33.3
37
  mistral-large,87.0,89.9,81.8,85.7,93.1,83.3,25.0,84.2,100.0
 
40
  mixtral-8x7b-instruct,74.1,77.1,69.7,42.9,37.9,50.0,0.0,52.6,50.0
41
  modello-italia-9b,28.7,28.5,30.3,14.3,10.3,16.7,0.0,10.5,50.0
42
  nemotron-4-340b-instruct,75.0,77.1,57.6,71.4,62.1,66.7,25.0,73.7,50.0
43
+ o1-mini,78.7,81.0,81.8,85.7,86.2,87.5,50.0,84.2,66.7
44
+ o1-preview,86.1,92.7,87.9,100.0,93.1,95.8,50.0,89.5,100.0
45
  phi-3-medium-128k-instruct,60.2,50.8,51.5,42.9,37.9,45.8,0.0,31.6,50.0
46
  phi-3-mini-128k-instruct,36.1,27.9,39.4,42.9,37.9,37.5,0.0,42.1,66.7
47
  qwen-2-72b-instruct,84.3,79.3,72.7,85.7,79.3,75.0,0.0,79.0,100.0
src/question_format.csv CHANGED
@@ -28,6 +28,10 @@ llama-3-8b-instruct,65.6,0.0,0.0,66.7,0.0,0.0,16.7,57.8,28.6,11.1,42.0,0.0,0.0,0
28
  llama-3.1-405b-instruct,100.0,100.0,100.0,96.7,71.4,0.0,83.3,91.6,100.0,77.8,91.4,100.0,75.0,0.0,87.5,61.5,50.0,81.0,12.5
29
  llama-3.1-70b-instruct,96.9,50.0,0.0,93.3,57.1,0.0,50.0,94.4,71.4,88.9,87.6,100.0,75.0,50.0,77.1,46.2,71.4,78.6,12.5
30
  llama-3.1-8b-instruct,71.9,0.0,0.0,71.7,0.0,0.0,16.7,60.6,42.9,22.2,61.7,0.0,25.0,0.0,66.7,7.7,42.9,61.9,0.0
 
 
 
 
31
  maestrale-chat-v0.4-beta,65.6,0.0,0.0,66.7,14.3,0.0,0.0,62.0,0.0,33.3,60.5,0.0,25.0,0.0,62.5,23.1,35.7,71.4,0.0
32
  mistral-7b-instruct:nitro,71.9,0.0,0.0,66.7,0.0,0.0,16.7,59.2,14.3,33.3,50.6,0.0,25.0,0.0,50.0,23.1,28.6,57.1,0.0
33
  mistral-large,96.9,100.0,100.0,96.7,57.1,100.0,66.7,90.1,100.0,100.0,93.8,100.0,100.0,0.0,87.5,61.5,71.4,83.3,12.5
@@ -36,6 +40,8 @@ mixtral-8x22b-instruct,93.8,50.0,0.0,88.3,57.1,100.0,66.7,85.9,85.7,88.9,91.4,0.
36
  mixtral-8x7b-instruct,96.9,0.0,0.0,76.7,14.3,0.0,16.7,80.3,57.1,55.6,71.6,0.0,75.0,0.0,68.8,30.8,57.1,69.0,0.0
37
  modello-italia-9b,25.0,0.0,0.0,31.7,0.0,0.0,0.0,29.6,0.0,22.2,27.2,0.0,25.0,0.0,35.4,0.0,7.1,33.3,0.0
38
  nemotron-4-340b-instruct,87.5,0.0,100.0,76.7,71.4,0.0,16.7,74.6,28.6,55.6,81.5,100.0,100.0,50.0,75.0,53.8,50.0,73.8,12.5
 
 
39
  phi-3-medium-128k-instruct,62.5,0.0,0.0,58.3,14.3,0.0,0.0,59.2,14.3,33.3,60.5,0.0,50.0,0.0,64.6,15.4,28.6,40.5,0.0
40
  phi-3-mini-128k-instruct,34.4,0.0,0.0,30.0,0.0,0.0,0.0,33.8,14.3,22.2,42.0,0.0,25.0,0.0,35.4,7.7,35.7,54.8,0.0
41
  qwen-2-72b-instruct,87.5,100.0,100.0,90.0,57.1,0.0,50.0,81.7,100.0,66.7,87.6,100.0,75.0,50.0,79.2,61.5,50.0,76.2,12.5
 
28
  llama-3.1-405b-instruct,100.0,100.0,100.0,96.7,71.4,0.0,83.3,91.6,100.0,77.8,91.4,100.0,75.0,0.0,87.5,61.5,50.0,81.0,12.5
29
  llama-3.1-70b-instruct,96.9,50.0,0.0,93.3,57.1,0.0,50.0,94.4,71.4,88.9,87.6,100.0,75.0,50.0,77.1,46.2,71.4,78.6,12.5
30
  llama-3.1-8b-instruct,71.9,0.0,0.0,71.7,0.0,0.0,16.7,60.6,42.9,22.2,61.7,0.0,25.0,0.0,66.7,7.7,42.9,61.9,0.0
31
+ llama-3.2-11b-vision-instruct,78.1,0.0,0.0,80.0,0.0,0.0,33.3,64.8,14.3,66.7,64.2,0.0,75.0,0.0,68.8,23.1,64.3,64.3,0.0
32
+ llama-3.2-1b-instruct,25.0,0.0,0.0,23.3,0.0,0.0,16.7,18.3,0.0,11.1,9.9,0.0,0.0,0.0,10.4,0.0,28.6,23.8,0.0
33
+ llama-3.2-3b-instruct,31.2,0.0,0.0,23.3,0.0,0.0,0.0,18.3,0.0,11.1,29.6,0.0,0.0,0.0,33.3,7.7,35.7,47.6,0.0
34
+ llama-3.2-90b-vision-instruct,96.9,100.0,100.0,93.3,42.9,0.0,33.3,95.8,57.1,88.9,88.9,100.0,75.0,50.0,81.2,53.8,71.4,71.4,12.5
35
  maestrale-chat-v0.4-beta,65.6,0.0,0.0,66.7,14.3,0.0,0.0,62.0,0.0,33.3,60.5,0.0,25.0,0.0,62.5,23.1,35.7,71.4,0.0
36
  mistral-7b-instruct:nitro,71.9,0.0,0.0,66.7,0.0,0.0,16.7,59.2,14.3,33.3,50.6,0.0,25.0,0.0,50.0,23.1,28.6,57.1,0.0
37
  mistral-large,96.9,100.0,100.0,96.7,57.1,100.0,66.7,90.1,100.0,100.0,93.8,100.0,100.0,0.0,87.5,61.5,71.4,83.3,12.5
 
40
  mixtral-8x7b-instruct,96.9,0.0,0.0,76.7,14.3,0.0,16.7,80.3,57.1,55.6,71.6,0.0,75.0,0.0,68.8,30.8,57.1,69.0,0.0
41
  modello-italia-9b,25.0,0.0,0.0,31.7,0.0,0.0,0.0,29.6,0.0,22.2,27.2,0.0,25.0,0.0,35.4,0.0,7.1,33.3,0.0
42
  nemotron-4-340b-instruct,87.5,0.0,100.0,76.7,71.4,0.0,16.7,74.6,28.6,55.6,81.5,100.0,100.0,50.0,75.0,53.8,50.0,73.8,12.5
43
+ o1-mini,93.8,100.0,0.0,91.7,57.1,0.0,83.3,78.9,100.0,66.7,86.4,0.0,100.0,0.0,85.4,69.2,78.6,71.4,12.5
44
+ o1-preview,100.0,100.0,100.0,96.7,85.7,100.0,83.3,95.8,100.0,88.9,96.3,100.0,100.0,50.0,95.8,61.5,71.4,78.6,12.5
45
  phi-3-medium-128k-instruct,62.5,0.0,0.0,58.3,14.3,0.0,0.0,59.2,14.3,33.3,60.5,0.0,50.0,0.0,64.6,15.4,28.6,40.5,0.0
46
  phi-3-mini-128k-instruct,34.4,0.0,0.0,30.0,0.0,0.0,0.0,33.8,14.3,22.2,42.0,0.0,25.0,0.0,35.4,7.7,35.7,54.8,0.0
47
  qwen-2-72b-instruct,87.5,100.0,100.0,90.0,57.1,0.0,50.0,81.7,100.0,66.7,87.6,100.0,75.0,50.0,79.2,61.5,50.0,76.2,12.5