nbroad HF staff commited on
Commit
dcc8e4a
1 Parent(s): 3617f00

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +19 -0
  2. test_results.json +13 -0
  3. train_results.json +8 -0
  4. trainer_state.json +283 -0
all_results.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_samples": 1233,
4
+ "test_false_f1": 0.7841191066997518,
5
+ "test_loss": 0.672432005405426,
6
+ "test_macro_f1": 0.6707393317114294,
7
+ "test_micro_f1": 0.7907542579075426,
8
+ "test_mixture_f1": 0.5116279069767442,
9
+ "test_runtime": 40.6849,
10
+ "test_samples_per_second": 30.306,
11
+ "test_steps_per_second": 0.959,
12
+ "test_true_f1": 0.9061976549413736,
13
+ "test_unproven_f1": 0.48101265822784806,
14
+ "train_loss": 0.5249485609128204,
15
+ "train_runtime": 3291.1672,
16
+ "train_samples": 9804,
17
+ "train_samples_per_second": 8.937,
18
+ "train_steps_per_second": 1.118
19
+ }
test_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_samples": 1233,
3
+ "test_false_f1": 0.7841191066997518,
4
+ "test_loss": 0.672432005405426,
5
+ "test_macro_f1": 0.6707393317114294,
6
+ "test_micro_f1": 0.7907542579075426,
7
+ "test_mixture_f1": 0.5116279069767442,
8
+ "test_runtime": 40.6849,
9
+ "test_samples_per_second": 30.306,
10
+ "test_steps_per_second": 0.959,
11
+ "test_true_f1": 0.9061976549413736,
12
+ "test_unproven_f1": 0.48101265822784806
13
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.5249485609128204,
4
+ "train_runtime": 3291.1672,
5
+ "train_samples": 9804,
6
+ "train_samples_per_second": 8.937,
7
+ "train_steps_per_second": 1.118
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8179571663920923,
3
+ "best_model_checkpoint": "./bigbird-base-health-fact/checkpoint-2452",
4
+ "epoch": 3.0,
5
+ "global_step": 3678,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.08,
12
+ "learning_rate": 2.6630434782608698e-06,
13
+ "loss": 1.2792,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.16,
18
+ "learning_rate": 5.380434782608695e-06,
19
+ "loss": 1.0546,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.24,
24
+ "learning_rate": 8.097826086956523e-06,
25
+ "loss": 0.8857,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 0.33,
30
+ "learning_rate": 9.909365558912388e-06,
31
+ "loss": 0.7958,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 0.41,
36
+ "learning_rate": 9.60725075528701e-06,
37
+ "loss": 0.7246,
38
+ "step": 500
39
+ },
40
+ {
41
+ "epoch": 0.49,
42
+ "learning_rate": 9.305135951661632e-06,
43
+ "loss": 0.7629,
44
+ "step": 600
45
+ },
46
+ {
47
+ "epoch": 0.57,
48
+ "learning_rate": 9.003021148036256e-06,
49
+ "loss": 0.6498,
50
+ "step": 700
51
+ },
52
+ {
53
+ "epoch": 0.65,
54
+ "learning_rate": 8.700906344410877e-06,
55
+ "loss": 0.6201,
56
+ "step": 800
57
+ },
58
+ {
59
+ "epoch": 0.73,
60
+ "learning_rate": 8.398791540785499e-06,
61
+ "loss": 0.5809,
62
+ "step": 900
63
+ },
64
+ {
65
+ "epoch": 0.82,
66
+ "learning_rate": 8.099697885196374e-06,
67
+ "loss": 0.647,
68
+ "step": 1000
69
+ },
70
+ {
71
+ "epoch": 0.9,
72
+ "learning_rate": 7.797583081570997e-06,
73
+ "loss": 0.5814,
74
+ "step": 1100
75
+ },
76
+ {
77
+ "epoch": 0.98,
78
+ "learning_rate": 7.4954682779456205e-06,
79
+ "loss": 0.5563,
80
+ "step": 1200
81
+ },
82
+ {
83
+ "epoch": 1.0,
84
+ "eval_false_f1": 0.7926023778071335,
85
+ "eval_loss": 0.5019509196281433,
86
+ "eval_macro_f1": 0.6062122975261963,
87
+ "eval_micro_f1": 0.7948929159802306,
88
+ "eval_mixture_f1": 0.4591194968553459,
89
+ "eval_runtime": 39.3928,
90
+ "eval_samples_per_second": 30.818,
91
+ "eval_steps_per_second": 0.965,
92
+ "eval_true_f1": 0.8986175115207373,
93
+ "eval_unproven_f1": 0.2745098039215686,
94
+ "step": 1226
95
+ },
96
+ {
97
+ "epoch": 1.06,
98
+ "learning_rate": 7.193353474320243e-06,
99
+ "loss": 0.5271,
100
+ "step": 1300
101
+ },
102
+ {
103
+ "epoch": 1.14,
104
+ "learning_rate": 6.891238670694864e-06,
105
+ "loss": 0.4912,
106
+ "step": 1400
107
+ },
108
+ {
109
+ "epoch": 1.22,
110
+ "learning_rate": 6.589123867069487e-06,
111
+ "loss": 0.4844,
112
+ "step": 1500
113
+ },
114
+ {
115
+ "epoch": 1.31,
116
+ "learning_rate": 6.287009063444109e-06,
117
+ "loss": 0.4816,
118
+ "step": 1600
119
+ },
120
+ {
121
+ "epoch": 1.39,
122
+ "learning_rate": 5.984894259818732e-06,
123
+ "loss": 0.462,
124
+ "step": 1700
125
+ },
126
+ {
127
+ "epoch": 1.47,
128
+ "learning_rate": 5.682779456193354e-06,
129
+ "loss": 0.4087,
130
+ "step": 1800
131
+ },
132
+ {
133
+ "epoch": 1.55,
134
+ "learning_rate": 5.380664652567976e-06,
135
+ "loss": 0.5065,
136
+ "step": 1900
137
+ },
138
+ {
139
+ "epoch": 1.63,
140
+ "learning_rate": 5.078549848942599e-06,
141
+ "loss": 0.4313,
142
+ "step": 2000
143
+ },
144
+ {
145
+ "epoch": 1.71,
146
+ "learning_rate": 4.776435045317221e-06,
147
+ "loss": 0.5098,
148
+ "step": 2100
149
+ },
150
+ {
151
+ "epoch": 1.79,
152
+ "learning_rate": 4.4743202416918435e-06,
153
+ "loss": 0.4699,
154
+ "step": 2200
155
+ },
156
+ {
157
+ "epoch": 1.88,
158
+ "learning_rate": 4.172205438066466e-06,
159
+ "loss": 0.4408,
160
+ "step": 2300
161
+ },
162
+ {
163
+ "epoch": 1.96,
164
+ "learning_rate": 3.8700906344410875e-06,
165
+ "loss": 0.5048,
166
+ "step": 2400
167
+ },
168
+ {
169
+ "epoch": 2.0,
170
+ "eval_false_f1": 0.8201811125485123,
171
+ "eval_loss": 0.4968700110912323,
172
+ "eval_macro_f1": 0.684587518040316,
173
+ "eval_micro_f1": 0.8179571663920923,
174
+ "eval_mixture_f1": 0.43416370106761565,
175
+ "eval_runtime": 39.4059,
176
+ "eval_samples_per_second": 30.808,
177
+ "eval_steps_per_second": 0.964,
178
+ "eval_true_f1": 0.9125766871165646,
179
+ "eval_unproven_f1": 0.5714285714285714,
180
+ "step": 2452
181
+ },
182
+ {
183
+ "epoch": 2.04,
184
+ "learning_rate": 3.5679758308157103e-06,
185
+ "loss": 0.378,
186
+ "step": 2500
187
+ },
188
+ {
189
+ "epoch": 2.12,
190
+ "learning_rate": 3.2658610271903322e-06,
191
+ "loss": 0.3631,
192
+ "step": 2600
193
+ },
194
+ {
195
+ "epoch": 2.2,
196
+ "learning_rate": 2.963746223564955e-06,
197
+ "loss": 0.3475,
198
+ "step": 2700
199
+ },
200
+ {
201
+ "epoch": 2.28,
202
+ "learning_rate": 2.661631419939577e-06,
203
+ "loss": 0.3283,
204
+ "step": 2800
205
+ },
206
+ {
207
+ "epoch": 2.37,
208
+ "learning_rate": 2.3595166163142e-06,
209
+ "loss": 0.317,
210
+ "step": 2900
211
+ },
212
+ {
213
+ "epoch": 2.45,
214
+ "learning_rate": 2.0604229607250755e-06,
215
+ "loss": 0.3541,
216
+ "step": 3000
217
+ },
218
+ {
219
+ "epoch": 2.53,
220
+ "learning_rate": 1.758308157099698e-06,
221
+ "loss": 0.3818,
222
+ "step": 3100
223
+ },
224
+ {
225
+ "epoch": 2.61,
226
+ "learning_rate": 1.4561933534743203e-06,
227
+ "loss": 0.3467,
228
+ "step": 3200
229
+ },
230
+ {
231
+ "epoch": 2.69,
232
+ "learning_rate": 1.1540785498489427e-06,
233
+ "loss": 0.3048,
234
+ "step": 3300
235
+ },
236
+ {
237
+ "epoch": 2.77,
238
+ "learning_rate": 8.51963746223565e-07,
239
+ "loss": 0.3334,
240
+ "step": 3400
241
+ },
242
+ {
243
+ "epoch": 2.85,
244
+ "learning_rate": 5.498489425981874e-07,
245
+ "loss": 0.3855,
246
+ "step": 3500
247
+ },
248
+ {
249
+ "epoch": 2.94,
250
+ "learning_rate": 2.477341389728097e-07,
251
+ "loss": 0.3454,
252
+ "step": 3600
253
+ },
254
+ {
255
+ "epoch": 3.0,
256
+ "eval_false_f1": 0.811443433029909,
257
+ "eval_loss": 0.5863622426986694,
258
+ "eval_macro_f1": 0.6874160790583576,
259
+ "eval_micro_f1": 0.8130148270181219,
260
+ "eval_mixture_f1": 0.4556962025316456,
261
+ "eval_runtime": 39.6082,
262
+ "eval_samples_per_second": 30.65,
263
+ "eval_steps_per_second": 0.959,
264
+ "eval_true_f1": 0.9153605015673981,
265
+ "eval_unproven_f1": 0.5671641791044776,
266
+ "step": 3678
267
+ },
268
+ {
269
+ "epoch": 3.0,
270
+ "step": 3678,
271
+ "total_flos": 2.106512041918464e+16,
272
+ "train_loss": 0.5249485609128204,
273
+ "train_runtime": 3291.1672,
274
+ "train_samples_per_second": 8.937,
275
+ "train_steps_per_second": 1.118
276
+ }
277
+ ],
278
+ "max_steps": 3678,
279
+ "num_train_epochs": 3,
280
+ "total_flos": 2.106512041918464e+16,
281
+ "trial_name": null,
282
+ "trial_params": null
283
+ }