ShengdingHu commited on
Commit
ee896ef
1 Parent(s): d8151ae

Training in progress, step 100

Browse files
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
- "epoch": 20.0,
3
- "eval_accuracy": 80.30581039755351,
4
- "eval_average_metrics": 80.30581039755351,
5
- "eval_loss": 0.19154316186904907,
6
- "eval_runtime": 17.148,
7
- "eval_samples_per_second": 95.346,
8
- "test_accuracy": 79.7553516819572,
9
- "test_average_metrics": 79.7553516819572,
10
- "test_loss": 0.195680171251297,
11
- "test_runtime": 16.947,
12
- "test_samples_per_second": 96.477,
13
- "train_loss": 0.20100380073159427,
14
- "train_runtime": 3071.6757,
15
  "train_samples": 9427,
16
- "train_samples_per_second": 61.38,
17
- "train_steps_per_second": 1.921
18
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 76.75840978593273,
4
+ "eval_average_metrics": 76.75840978593273,
5
+ "eval_loss": 0.20979416370391846,
6
+ "eval_runtime": 6.9844,
7
+ "eval_samples_per_second": 234.094,
8
+ "test_accuracy": 75.77981651376146,
9
+ "test_average_metrics": 75.77981651376146,
10
+ "test_loss": 0.21779079735279083,
11
+ "test_runtime": 7.071,
12
+ "test_samples_per_second": 231.226,
13
+ "train_loss": 0.6385218733448094,
14
+ "train_runtime": 73.7325,
15
  "train_samples": 9427,
16
+ "train_samples_per_second": 127.854,
17
+ "train_steps_per_second": 4.001
18
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 20.0,
3
- "eval_accuracy": 80.30581039755351,
4
- "eval_average_metrics": 80.30581039755351,
5
- "eval_loss": 0.19154316186904907,
6
- "eval_runtime": 17.148,
7
- "eval_samples_per_second": 95.346
8
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 76.75840978593273,
4
+ "eval_average_metrics": 76.75840978593273,
5
+ "eval_loss": 0.20979416370391846,
6
+ "eval_runtime": 6.9844,
7
+ "eval_samples_per_second": 234.094
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d01703b0e5945e998181595a54904d84f19c30fcde9c48bd6d3311f9cd8babf
3
  size 2602117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bf941da0becc1082c240f9292c525d37e4bf345ae4c3a4ed24e85b953d86370
3
  size 2602117
runs/May13_08-53-35_node1/events.out.tfevents.1652403398.node1.2643622.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b7cd2ff90e199bc10c9439ec248e4178f8bbf99e124fff5289c104c37d70841
3
+ size 684
runs/May13_09-01-04_node1/1652403708.8985012/events.out.tfevents.1652403708.node1.2647054.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b398091cc1ada54c78722afbd7f57ba9e99d70b811605b3054363edc7e3be973
3
+ size 5392
runs/May13_09-01-04_node1/events.out.tfevents.1652403708.node1.2647054.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:467422c02e850a86481450e8b4475f3293c05f36cd2c14c38237c35f6d3dc0a6
3
+ size 4720
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 20.0,
3
- "test_accuracy": 79.7553516819572,
4
- "test_average_metrics": 79.7553516819572,
5
- "test_loss": 0.195680171251297,
6
- "test_runtime": 16.947,
7
- "test_samples_per_second": 96.477
8
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "test_accuracy": 75.77981651376146,
4
+ "test_average_metrics": 75.77981651376146,
5
+ "test_loss": 0.21779079735279083,
6
+ "test_runtime": 7.071,
7
+ "test_samples_per_second": 231.226
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 20.0,
3
- "train_loss": 0.20100380073159427,
4
- "train_runtime": 3071.6757,
5
  "train_samples": 9427,
6
- "train_samples_per_second": 61.38,
7
- "train_steps_per_second": 1.921
8
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.6385218733448094,
4
+ "train_runtime": 73.7325,
5
  "train_samples": 9427,
6
+ "train_samples_per_second": 127.854,
7
+ "train_steps_per_second": 4.001
8
  }
trainer_state.json CHANGED
@@ -1,352 +1,43 @@
1
  {
2
- "best_metric": 80.30581039755351,
3
- "best_model_checkpoint": "outputs/bitfit/t5-base/superglue-boolq/checkpoint-2600",
4
- "epoch": 20.0,
5
- "global_step": 5900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.68,
12
- "eval_accuracy": 74.6788990825688,
13
- "eval_average_metrics": 74.6788990825688,
14
- "eval_loss": 0.21743574738502502,
15
- "eval_runtime": 16.9831,
16
- "eval_samples_per_second": 96.272,
17
- "step": 200
18
- },
19
- {
20
- "epoch": 1.36,
21
- "eval_accuracy": 75.71865443425077,
22
- "eval_average_metrics": 75.71865443425077,
23
- "eval_loss": 0.21506452560424805,
24
- "eval_runtime": 14.6109,
25
- "eval_samples_per_second": 111.903,
26
- "step": 400
27
- },
28
- {
29
- "epoch": 1.69,
30
- "learning_rate": 0.00027457627118644066,
31
- "loss": 0.374,
32
- "step": 500
33
- },
34
- {
35
- "epoch": 2.03,
36
- "eval_accuracy": 77.18654434250764,
37
- "eval_average_metrics": 77.18654434250764,
38
- "eval_loss": 0.20255930721759796,
39
- "eval_runtime": 17.1645,
40
- "eval_samples_per_second": 95.254,
41
- "step": 600
42
- },
43
- {
44
- "epoch": 2.71,
45
- "eval_accuracy": 76.5137614678899,
46
- "eval_average_metrics": 76.5137614678899,
47
- "eval_loss": 0.220754012465477,
48
- "eval_runtime": 17.1929,
49
- "eval_samples_per_second": 95.097,
50
- "step": 800
51
- },
52
- {
53
- "epoch": 3.39,
54
- "learning_rate": 0.00024915254237288135,
55
- "loss": 0.2114,
56
- "step": 1000
57
- },
58
- {
59
- "epoch": 3.39,
60
- "eval_accuracy": 78.1651376146789,
61
- "eval_average_metrics": 78.1651376146789,
62
- "eval_loss": 0.19822736084461212,
63
- "eval_runtime": 14.5865,
64
- "eval_samples_per_second": 112.09,
65
- "step": 1000
66
- },
67
- {
68
- "epoch": 4.07,
69
- "eval_accuracy": 78.71559633027523,
70
- "eval_average_metrics": 78.71559633027523,
71
- "eval_loss": 0.19769148528575897,
72
- "eval_runtime": 17.1866,
73
- "eval_samples_per_second": 95.132,
74
- "step": 1200
75
- },
76
- {
77
- "epoch": 4.75,
78
- "eval_accuracy": 78.2262996941896,
79
- "eval_average_metrics": 78.2262996941896,
80
- "eval_loss": 0.1974276602268219,
81
- "eval_runtime": 17.1722,
82
- "eval_samples_per_second": 95.212,
83
- "step": 1400
84
- },
85
- {
86
- "epoch": 5.08,
87
- "learning_rate": 0.000223728813559322,
88
- "loss": 0.1997,
89
- "step": 1500
90
- },
91
- {
92
- "epoch": 5.42,
93
- "eval_accuracy": 78.71559633027523,
94
- "eval_average_metrics": 78.71559633027523,
95
- "eval_loss": 0.19954562187194824,
96
- "eval_runtime": 14.5679,
97
- "eval_samples_per_second": 112.233,
98
- "step": 1600
99
- },
100
- {
101
- "epoch": 6.1,
102
- "eval_accuracy": 78.2262996941896,
103
- "eval_average_metrics": 78.2262996941896,
104
- "eval_loss": 0.20299072563648224,
105
- "eval_runtime": 17.2624,
106
- "eval_samples_per_second": 94.714,
107
- "step": 1800
108
- },
109
- {
110
- "epoch": 6.78,
111
- "learning_rate": 0.0001983050847457627,
112
- "loss": 0.1934,
113
- "step": 2000
114
- },
115
- {
116
- "epoch": 6.78,
117
- "eval_accuracy": 79.63302752293579,
118
- "eval_average_metrics": 79.63302752293579,
119
- "eval_loss": 0.19578830897808075,
120
- "eval_runtime": 17.2808,
121
- "eval_samples_per_second": 94.614,
122
- "step": 2000
123
- },
124
- {
125
- "epoch": 7.46,
126
- "eval_accuracy": 79.26605504587157,
127
- "eval_average_metrics": 79.26605504587157,
128
- "eval_loss": 0.1937599629163742,
129
- "eval_runtime": 15.3794,
130
- "eval_samples_per_second": 106.311,
131
- "step": 2200
132
- },
133
- {
134
- "epoch": 8.14,
135
- "eval_accuracy": 77.92048929663609,
136
- "eval_average_metrics": 77.92048929663609,
137
- "eval_loss": 0.2019716054201126,
138
- "eval_runtime": 17.5057,
139
- "eval_samples_per_second": 93.398,
140
- "step": 2400
141
- },
142
- {
143
- "epoch": 8.47,
144
- "learning_rate": 0.0001728813559322034,
145
- "loss": 0.1907,
146
- "step": 2500
147
- },
148
- {
149
- "epoch": 8.81,
150
- "eval_accuracy": 80.30581039755351,
151
- "eval_average_metrics": 80.30581039755351,
152
- "eval_loss": 0.19154316186904907,
153
- "eval_runtime": 17.3782,
154
- "eval_samples_per_second": 94.083,
155
- "step": 2600
156
- },
157
- {
158
- "epoch": 9.49,
159
- "eval_accuracy": 79.93883792048929,
160
- "eval_average_metrics": 79.93883792048929,
161
- "eval_loss": 0.19677455723285675,
162
- "eval_runtime": 15.9661,
163
- "eval_samples_per_second": 102.404,
164
- "step": 2800
165
- },
166
- {
167
- "epoch": 10.17,
168
- "learning_rate": 0.00014745762711864405,
169
- "loss": 0.183,
170
- "step": 3000
171
  },
172
  {
173
- "epoch": 10.17,
174
- "eval_accuracy": 79.44954128440367,
175
- "eval_average_metrics": 79.44954128440367,
176
- "eval_loss": 0.19117017090320587,
177
- "eval_runtime": 17.2749,
178
- "eval_samples_per_second": 94.646,
179
- "step": 3000
180
- },
181
- {
182
- "epoch": 10.85,
183
- "eval_accuracy": 78.10397553516819,
184
- "eval_average_metrics": 78.10397553516819,
185
- "eval_loss": 0.20411182940006256,
186
- "eval_runtime": 17.1181,
187
- "eval_samples_per_second": 95.513,
188
- "step": 3200
189
- },
190
- {
191
- "epoch": 11.53,
192
- "eval_accuracy": 79.02140672782875,
193
- "eval_average_metrics": 79.02140672782875,
194
- "eval_loss": 0.194900244474411,
195
- "eval_runtime": 17.1955,
196
- "eval_samples_per_second": 95.083,
197
- "step": 3400
198
- },
199
- {
200
- "epoch": 11.86,
201
- "learning_rate": 0.00012203389830508474,
202
- "loss": 0.181,
203
- "step": 3500
204
- },
205
- {
206
- "epoch": 12.2,
207
- "eval_accuracy": 79.38837920489297,
208
- "eval_average_metrics": 79.38837920489297,
209
- "eval_loss": 0.20091596245765686,
210
- "eval_runtime": 16.9905,
211
- "eval_samples_per_second": 96.23,
212
- "step": 3600
213
- },
214
- {
215
- "epoch": 12.88,
216
- "eval_accuracy": 79.81651376146789,
217
- "eval_average_metrics": 79.81651376146789,
218
- "eval_loss": 0.18894420564174652,
219
- "eval_runtime": 17.2706,
220
- "eval_samples_per_second": 94.669,
221
- "step": 3800
222
- },
223
- {
224
- "epoch": 13.56,
225
- "learning_rate": 9.661016949152541e-05,
226
- "loss": 0.1786,
227
- "step": 4000
228
- },
229
- {
230
- "epoch": 13.56,
231
- "eval_accuracy": 78.77675840978593,
232
- "eval_average_metrics": 78.77675840978593,
233
- "eval_loss": 0.20160046219825745,
234
- "eval_runtime": 17.0941,
235
- "eval_samples_per_second": 95.647,
236
- "step": 4000
237
- },
238
- {
239
- "epoch": 14.24,
240
- "eval_accuracy": 79.57186544342507,
241
- "eval_average_metrics": 79.57186544342507,
242
- "eval_loss": 0.19864365458488464,
243
- "eval_runtime": 17.0536,
244
- "eval_samples_per_second": 95.874,
245
- "step": 4200
246
- },
247
- {
248
- "epoch": 14.92,
249
- "eval_accuracy": 79.51070336391437,
250
- "eval_average_metrics": 79.51070336391437,
251
- "eval_loss": 0.19150203466415405,
252
- "eval_runtime": 17.2063,
253
- "eval_samples_per_second": 95.024,
254
- "step": 4400
255
- },
256
- {
257
- "epoch": 15.25,
258
- "learning_rate": 7.11864406779661e-05,
259
- "loss": 0.1769,
260
- "step": 4500
261
- },
262
- {
263
- "epoch": 15.59,
264
- "eval_accuracy": 78.77675840978593,
265
- "eval_average_metrics": 78.77675840978593,
266
- "eval_loss": 0.19904659688472748,
267
- "eval_runtime": 17.1806,
268
- "eval_samples_per_second": 95.165,
269
- "step": 4600
270
- },
271
- {
272
- "epoch": 16.27,
273
- "eval_accuracy": 79.20489296636084,
274
- "eval_average_metrics": 79.20489296636084,
275
- "eval_loss": 0.19741013646125793,
276
- "eval_runtime": 17.2538,
277
- "eval_samples_per_second": 94.762,
278
- "step": 4800
279
- },
280
- {
281
- "epoch": 16.95,
282
- "learning_rate": 4.576271186440678e-05,
283
- "loss": 0.1741,
284
- "step": 5000
285
- },
286
- {
287
- "epoch": 16.95,
288
- "eval_accuracy": 79.51070336391437,
289
- "eval_average_metrics": 79.51070336391437,
290
- "eval_loss": 0.19429509341716766,
291
- "eval_runtime": 17.4899,
292
- "eval_samples_per_second": 93.483,
293
- "step": 5000
294
- },
295
- {
296
- "epoch": 17.63,
297
- "eval_accuracy": 78.89908256880734,
298
- "eval_average_metrics": 78.89908256880734,
299
- "eval_loss": 0.20053960382938385,
300
- "eval_runtime": 17.2615,
301
- "eval_samples_per_second": 94.719,
302
- "step": 5200
303
- },
304
- {
305
- "epoch": 18.31,
306
- "eval_accuracy": 79.32721712538226,
307
- "eval_average_metrics": 79.32721712538226,
308
- "eval_loss": 0.1975349634885788,
309
- "eval_runtime": 17.2849,
310
- "eval_samples_per_second": 94.591,
311
- "step": 5400
312
- },
313
- {
314
- "epoch": 18.64,
315
- "learning_rate": 2.0338983050847455e-05,
316
- "loss": 0.1717,
317
- "step": 5500
318
- },
319
- {
320
- "epoch": 18.98,
321
- "eval_accuracy": 78.77675840978593,
322
- "eval_average_metrics": 78.77675840978593,
323
- "eval_loss": 0.20098499953746796,
324
- "eval_runtime": 17.3626,
325
- "eval_samples_per_second": 94.168,
326
- "step": 5600
327
- },
328
- {
329
- "epoch": 19.66,
330
- "eval_accuracy": 79.20489296636084,
331
- "eval_average_metrics": 79.20489296636084,
332
- "eval_loss": 0.19755637645721436,
333
- "eval_runtime": 17.3825,
334
- "eval_samples_per_second": 94.06,
335
- "step": 5800
336
  },
337
  {
338
- "epoch": 20.0,
339
- "step": 5900,
340
- "total_flos": 5.74047486286578e+16,
341
- "train_loss": 0.20100380073159427,
342
- "train_runtime": 3071.6757,
343
- "train_samples_per_second": 61.38,
344
- "train_steps_per_second": 1.921
345
  }
346
  ],
347
- "max_steps": 5900,
348
- "num_train_epochs": 20,
349
- "total_flos": 5.74047486286578e+16,
350
  "trial_name": null,
351
  "trial_params": null
352
  }
 
1
  {
2
+ "best_metric": 76.75840978593273,
3
+ "best_model_checkpoint": "outputs/lora/t5-base/superglue-boolq/checkpoint-200",
4
+ "epoch": 1.0,
5
+ "global_step": 295,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.34,
12
+ "eval_accuracy": 73.27217125382262,
13
+ "eval_average_metrics": 73.27217125382262,
14
+ "eval_loss": 0.22932085394859314,
15
+ "eval_runtime": 6.9808,
16
+ "eval_samples_per_second": 234.214,
17
+ "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  },
19
  {
20
+ "epoch": 0.68,
21
+ "eval_accuracy": 76.75840978593273,
22
+ "eval_average_metrics": 76.75840978593273,
23
+ "eval_loss": 0.20979416370391846,
24
+ "eval_runtime": 7.0533,
25
+ "eval_samples_per_second": 231.807,
26
+ "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  },
28
  {
29
+ "epoch": 1.0,
30
+ "step": 295,
31
+ "total_flos": 2875122627969024.0,
32
+ "train_loss": 0.6385218733448094,
33
+ "train_runtime": 73.7325,
34
+ "train_samples_per_second": 127.854,
35
+ "train_steps_per_second": 4.001
36
  }
37
  ],
38
+ "max_steps": 295,
39
+ "num_train_epochs": 1,
40
+ "total_flos": 2875122627969024.0,
41
  "trial_name": null,
42
  "trial_params": null
43
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88b31f564a16ff14e33b57eb5d36963b70b4c507029230e10268f329a5bf8ce5
3
  size 3311
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a78b1a8b87a3c5ea8fc6661c07621a881dfa3ca8419a2b5df765b5a7a81b73c7
3
  size 3311