mwitiderrick
commited on
Commit
•
dab4546
1
Parent(s):
511f240
Update README.md
Browse files
README.md
CHANGED
@@ -29,15 +29,6 @@ model-index:
|
|
29 |
- name: hellaswag(0-Shot)
|
30 |
type: hellaswag (0-Shot)
|
31 |
value: 0.4882
|
32 |
-
- task:
|
33 |
-
type: text-generation
|
34 |
-
dataset:
|
35 |
-
name: truthfulqa
|
36 |
-
type: truthfulqa
|
37 |
-
metrics:
|
38 |
-
- name: truthfulqa(0-Shot)
|
39 |
-
type: truthfulqa(0-Shot)
|
40 |
-
value: 64.59
|
41 |
- task:
|
42 |
type: text-generation
|
43 |
dataset:
|
@@ -129,4 +120,55 @@ Step 4: Make the gravy
|
|
129 |
3. Add the chicken broth and bring to a boil.
|
130 |
4. Reduce the heat to low and simmer for 10 minutes or until the gravy is
|
131 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
```
|
|
|
29 |
- name: hellaswag(0-Shot)
|
30 |
type: hellaswag (0-Shot)
|
31 |
value: 0.4882
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
- task:
|
33 |
type: text-generation
|
34 |
dataset:
|
|
|
120 |
3. Add the chicken broth and bring to a boil.
|
121 |
4. Reduce the heat to low and simmer for 10 minutes or until the gravy is
|
122 |
"""
|
123 |
+
```
|
124 |
+
## Truthfulaq metrics
|
125 |
+
```
|
126 |
+
| Tasks |Version|Filter|n-shot| Metric | Value | |Stderr|
|
127 |
+
|-----------------|-------|------|-----:|-----------|-------:|---|-----:|
|
128 |
+
|truthfulqa |N/A |none | 0|acc | 0.3166|± |0.0012|
|
129 |
+
| | |none | 0|bleu_max | 23.7766|± |0.7660|
|
130 |
+
| | |none | 0|bleu_acc | 0.3207|± |0.0163|
|
131 |
+
| | |none | 0|bleu_diff | -7.1853|± |0.7396|
|
132 |
+
| | |none | 0|rouge1_max | 48.6534|± |0.8706|
|
133 |
+
| | |none | 0|rouge1_acc | 0.2766|± |0.0157|
|
134 |
+
| | |none | 0|rouge1_diff| -9.8011|± |0.7883|
|
135 |
+
| | |none | 0|rouge2_max | 31.9289|± |0.9637|
|
136 |
+
| | |none | 0|rouge2_acc | 0.2399|± |0.0149|
|
137 |
+
| | |none | 0|rouge2_diff|-11.3958|± |0.9220|
|
138 |
+
| | |none | 0|rougeL_max | 45.4592|± |0.8754|
|
139 |
+
| | |none | 0|rougeL_acc | 0.2754|± |0.0156|
|
140 |
+
| | |none | 0|rougeL_diff|-10.0740|± |0.7807|
|
141 |
+
| - truthfulqa_gen|Yaml |none | 0|bleu_max | 23.7766|± |0.7660|
|
142 |
+
| | |none | 0|bleu_acc | 0.3207|± |0.0163|
|
143 |
+
| | |none | 0|bleu_diff | -7.1853|± |0.7396|
|
144 |
+
| | |none | 0|rouge1_max | 48.6534|± |0.8706|
|
145 |
+
| | |none | 0|rouge1_acc | 0.2766|± |0.0157|
|
146 |
+
| | |none | 0|rouge1_diff| -9.8011|± |0.7883|
|
147 |
+
| | |none | 0|rouge2_max | 31.9289|± |0.9637|
|
148 |
+
| | |none | 0|rouge2_acc | 0.2399|± |0.0149|
|
149 |
+
| | |none | 0|rouge2_diff|-11.3958|± |0.9220|
|
150 |
+
| | |none | 0|rougeL_max | 45.4592|± |0.8754|
|
151 |
+
| | |none | 0|rougeL_acc | 0.2754|± |0.0156|
|
152 |
+
| | |none | 0|rougeL_diff|-10.0740|± |0.7807|
|
153 |
+
| - truthfulqa_mc1|Yaml |none | 0|acc | 0.2534|± |0.0152|
|
154 |
+
| - truthfulqa_mc2|Yaml |none | 0|acc | 0.3798|± |0.0139|
|
155 |
+
|
156 |
+
| Groups |Version|Filter|n-shot| Metric | Value | |Stderr|
|
157 |
+
|----------|-------|------|-----:|-----------|-------:|---|-----:|
|
158 |
+
|truthfulqa|N/A |none | 0|acc | 0.3166|± |0.0012|
|
159 |
+
| | |none | 0|bleu_max | 23.7766|± |0.7660|
|
160 |
+
| | |none | 0|bleu_acc | 0.3207|± |0.0163|
|
161 |
+
| | |none | 0|bleu_diff | -7.1853|± |0.7396|
|
162 |
+
| | |none | 0|rouge1_max | 48.6534|± |0.8706|
|
163 |
+
| | |none | 0|rouge1_acc | 0.2766|± |0.0157|
|
164 |
+
| | |none | 0|rouge1_diff| -9.8011|± |0.7883|
|
165 |
+
| | |none | 0|rouge2_max | 31.9289|± |0.9637|
|
166 |
+
| | |none | 0|rouge2_acc | 0.2399|± |0.0149|
|
167 |
+
| | |none | 0|rouge2_diff|-11.3958|± |0.9220|
|
168 |
+
| | |none | 0|rougeL_max | 45.4592|± |0.8754|
|
169 |
+
| | |none | 0|rougeL_acc | 0.2754|± |0.0156|
|
170 |
+
| | |none | 0|rougeL_diff|-10.0740|± |0.7807|
|
171 |
+
|
172 |
+
|
173 |
+
|
174 |
```
|