sequelbox commited on
Commit
fbb15e0
1 Parent(s): 050b939

updated evals

Browse files
Files changed (1) hide show
  1. README.md +22 -10
README.md CHANGED
@@ -45,7 +45,7 @@ model-index:
45
  num_few_shot: 5
46
  metrics:
47
  - type: acc
48
- value: 77.35
49
  name: acc
50
  - task:
51
  type: text-generation
@@ -57,7 +57,7 @@ model-index:
57
  num_few_shot: 5
58
  metrics:
59
  - type: acc
60
- value: 76.39
61
  name: acc
62
  - task:
63
  type: text-generation
@@ -69,7 +69,7 @@ model-index:
69
  num_few_shot: 5
70
  metrics:
71
  - type: acc
72
- value: 79.03
73
  name: acc
74
  - task:
75
  type: text-generation
@@ -81,7 +81,7 @@ model-index:
81
  num_few_shot: 5
82
  metrics:
83
  - type: acc
84
- value: 50.0
85
  name: acc
86
  - task:
87
  type: text-generation
@@ -93,7 +93,19 @@ model-index:
93
  num_few_shot: 5
94
  metrics:
95
  - type: acc
96
- value: 53.2
 
 
 
 
 
 
 
 
 
 
 
 
97
  name: acc
98
  - task:
99
  type: text-generation
@@ -105,7 +117,7 @@ model-index:
105
  num_few_shot: 5
106
  metrics:
107
  - type: acc
108
- value: 43.14
109
  name: acc
110
  - task:
111
  type: text-generation
@@ -129,7 +141,7 @@ model-index:
129
  num_few_shot: 5
130
  metrics:
131
  - type: acc
132
- value: 55.0
133
  name: acc
134
  - task:
135
  type: text-generation
@@ -141,19 +153,19 @@ model-index:
141
  num_few_shot: 5
142
  metrics:
143
  - type: acc
144
- value: 66.0
145
  name: acc
146
  - task:
147
  type: text-generation
148
  name: Text Generation
149
  dataset:
150
- name: MMLU STEM (5-Shot)
151
  type: MMLU
152
  args:
153
  num_few_shot: 5
154
  metrics:
155
  - type: acc
156
- value: 55.57
157
  name: acc
158
  - task:
159
  type: text-generation
 
45
  num_few_shot: 5
46
  metrics:
47
  - type: acc
48
+ value: 75.85
49
  name: acc
50
  - task:
51
  type: text-generation
 
57
  num_few_shot: 5
58
  metrics:
59
  - type: acc
60
+ value: 68.75
61
  name: acc
62
  - task:
63
  type: text-generation
 
69
  num_few_shot: 5
70
  metrics:
71
  - type: acc
72
+ value: 73.23
73
  name: acc
74
  - task:
75
  type: text-generation
 
81
  num_few_shot: 5
82
  metrics:
83
  - type: acc
84
+ value: 46.00
85
  name: acc
86
  - task:
87
  type: text-generation
 
93
  num_few_shot: 5
94
  metrics:
95
  - type: acc
96
+ value: 44.33
97
+ name: acc
98
+ - task:
99
+ type: text-generation
100
+ name: Text Generation
101
+ dataset:
102
+ name: MMLU Conceptual Physics (5-Shot)
103
+ type: MMLU
104
+ args:
105
+ num_few_shot: 5
106
+ metrics:
107
+ - type: acc
108
+ value: 53.19
109
  name: acc
110
  - task:
111
  type: text-generation
 
117
  num_few_shot: 5
118
  metrics:
119
  - type: acc
120
+ value: 37.25
121
  name: acc
122
  - task:
123
  type: text-generation
 
141
  num_few_shot: 5
142
  metrics:
143
  - type: acc
144
+ value: 56.00
145
  name: acc
146
  - task:
147
  type: text-generation
 
153
  num_few_shot: 5
154
  metrics:
155
  - type: acc
156
+ value: 63.00
157
  name: acc
158
  - task:
159
  type: text-generation
160
  name: Text Generation
161
  dataset:
162
+ name: MMLU Astronomy (5-shot)
163
  type: MMLU
164
  args:
165
  num_few_shot: 5
166
  metrics:
167
  - type: acc
168
+ value: 63.16
169
  name: acc
170
  - task:
171
  type: text-generation