jflotz commited on
Commit
02a5ea7
1 Parent(s): edc5476

Training in progress, step 20000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ddf8c73a99cc42b29915ad54282c497fea591218d3df26b14997a77e4eb6d55
3
  size 50044241
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9c29fbdf49be880cb6493988a94306303a92a77dfa3a5685c194a3ed9741dd7
3
  size 50044241
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdc61f9c8be03d76c40deff98eea159770951d05b23400b4c372c8b407086f24
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cab920afe4d207be60e60c397c80474cec3b0781a866712c960668a023bb8b59
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8e6070527688748b61c783bb22aaf8ce741c15f561e7b1d5c9a7481b92b181a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4daecf9b867e6b1022a006e977cc17780663921ebe9348f84c4f92fd1098fd3
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8e6070527688748b61c783bb22aaf8ce741c15f561e7b1d5c9a7481b92b181a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4daecf9b867e6b1022a006e977cc17780663921ebe9348f84c4f92fd1098fd3
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8e6070527688748b61c783bb22aaf8ce741c15f561e7b1d5c9a7481b92b181a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4daecf9b867e6b1022a006e977cc17780663921ebe9348f84c4f92fd1098fd3
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8e6070527688748b61c783bb22aaf8ce741c15f561e7b1d5c9a7481b92b181a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4daecf9b867e6b1022a006e977cc17780663921ebe9348f84c4f92fd1098fd3
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8e6070527688748b61c783bb22aaf8ce741c15f561e7b1d5c9a7481b92b181a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4daecf9b867e6b1022a006e977cc17780663921ebe9348f84c4f92fd1098fd3
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8e6070527688748b61c783bb22aaf8ce741c15f561e7b1d5c9a7481b92b181a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4daecf9b867e6b1022a006e977cc17780663921ebe9348f84c4f92fd1098fd3
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8e6070527688748b61c783bb22aaf8ce741c15f561e7b1d5c9a7481b92b181a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4daecf9b867e6b1022a006e977cc17780663921ebe9348f84c4f92fd1098fd3
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8e6070527688748b61c783bb22aaf8ce741c15f561e7b1d5c9a7481b92b181a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4daecf9b867e6b1022a006e977cc17780663921ebe9348f84c4f92fd1098fd3
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:595e6d0c765b4677e7c91d65c2aacefa0d09faec0213d2321b49d411358f597f
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fa6f3c4ec253d8129a7481e01148ec46428b7a6eb1631c7fb589fd92c25c12f
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.44610992148465384,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -206,11 +206,211 @@
206
  "eval_samples_per_second": 1027.899,
207
  "eval_steps_per_second": 16.11,
208
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  }
210
  ],
211
  "max_steps": 250000,
212
  "num_train_epochs": 12,
213
- "total_flos": 1.6016800286580408e+20,
214
  "trial_name": null,
215
  "trial_params": null
216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8922198429693077,
5
+ "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
206
  "eval_samples_per_second": 1027.899,
207
  "eval_steps_per_second": 16.11,
208
  "step": 10000
209
+ },
210
+ {
211
+ "epoch": 0.47,
212
+ "learning_rate": 0.0005039999999999999,
213
+ "loss": 0.6775,
214
+ "step": 10500
215
+ },
216
+ {
217
+ "epoch": 0.49,
218
+ "learning_rate": 0.0005279999999999999,
219
+ "loss": 0.6775,
220
+ "step": 11000
221
+ },
222
+ {
223
+ "epoch": 0.49,
224
+ "eval_loss": 0.6769479513168335,
225
+ "eval_runtime": 2.177,
226
+ "eval_samples_per_second": 1055.099,
227
+ "eval_steps_per_second": 16.536,
228
+ "step": 11000
229
+ },
230
+ {
231
+ "epoch": 0.51,
232
+ "learning_rate": 0.000552,
233
+ "loss": 0.6773,
234
+ "step": 11500
235
+ },
236
+ {
237
+ "epoch": 0.54,
238
+ "learning_rate": 0.0005759999999999999,
239
+ "loss": 0.6773,
240
+ "step": 12000
241
+ },
242
+ {
243
+ "epoch": 0.54,
244
+ "eval_loss": 0.6773238182067871,
245
+ "eval_runtime": 2.1281,
246
+ "eval_samples_per_second": 1079.366,
247
+ "eval_steps_per_second": 16.916,
248
+ "step": 12000
249
+ },
250
+ {
251
+ "epoch": 0.56,
252
+ "learning_rate": 0.0006,
253
+ "loss": 0.6773,
254
+ "step": 12500
255
+ },
256
+ {
257
+ "epoch": 0.58,
258
+ "learning_rate": 0.0005999935478721662,
259
+ "loss": 0.6774,
260
+ "step": 13000
261
+ },
262
+ {
263
+ "epoch": 0.58,
264
+ "eval_loss": 0.677127480506897,
265
+ "eval_runtime": 2.1773,
266
+ "eval_samples_per_second": 1054.986,
267
+ "eval_steps_per_second": 16.534,
268
+ "step": 13000
269
+ },
270
+ {
271
+ "epoch": 0.6,
272
+ "learning_rate": 0.000599974191770902,
273
+ "loss": 0.6773,
274
+ "step": 13500
275
+ },
276
+ {
277
+ "epoch": 0.62,
278
+ "learning_rate": 0.0005999419325429058,
279
+ "loss": 0.6773,
280
+ "step": 14000
281
+ },
282
+ {
283
+ "epoch": 0.62,
284
+ "eval_loss": 0.6771531105041504,
285
+ "eval_runtime": 2.1173,
286
+ "eval_samples_per_second": 1084.86,
287
+ "eval_steps_per_second": 17.003,
288
+ "step": 14000
289
+ },
290
+ {
291
+ "epoch": 0.65,
292
+ "learning_rate": 0.0005998967715993009,
293
+ "loss": 0.6773,
294
+ "step": 14500
295
+ },
296
+ {
297
+ "epoch": 0.67,
298
+ "learning_rate": 0.0005998387109155732,
299
+ "loss": 0.6773,
300
+ "step": 15000
301
+ },
302
+ {
303
+ "epoch": 0.67,
304
+ "eval_loss": 0.6771678924560547,
305
+ "eval_runtime": 2.1919,
306
+ "eval_samples_per_second": 1047.963,
307
+ "eval_steps_per_second": 16.424,
308
+ "step": 15000
309
+ },
310
+ {
311
+ "epoch": 0.69,
312
+ "learning_rate": 0.000599767753031485,
313
+ "loss": 0.6773,
314
+ "step": 15500
315
+ },
316
+ {
317
+ "epoch": 0.71,
318
+ "learning_rate": 0.0005996839010509641,
319
+ "loss": 0.6772,
320
+ "step": 16000
321
+ },
322
+ {
323
+ "epoch": 0.71,
324
+ "eval_loss": 0.6776318550109863,
325
+ "eval_runtime": 2.199,
326
+ "eval_samples_per_second": 1044.559,
327
+ "eval_steps_per_second": 16.371,
328
+ "step": 16000
329
+ },
330
+ {
331
+ "epoch": 0.74,
332
+ "learning_rate": 0.0005995871586419678,
333
+ "loss": 0.6773,
334
+ "step": 16500
335
+ },
336
+ {
337
+ "epoch": 0.76,
338
+ "learning_rate": 0.0005994775300363225,
339
+ "loss": 0.6773,
340
+ "step": 17000
341
+ },
342
+ {
343
+ "epoch": 0.76,
344
+ "eval_loss": 0.676984429359436,
345
+ "eval_runtime": 2.1946,
346
+ "eval_samples_per_second": 1046.652,
347
+ "eval_steps_per_second": 16.404,
348
+ "step": 17000
349
+ },
350
+ {
351
+ "epoch": 0.78,
352
+ "learning_rate": 0.0005993550200295384,
353
+ "loss": 0.6772,
354
+ "step": 17500
355
+ },
356
+ {
357
+ "epoch": 0.8,
358
+ "learning_rate": 0.0005992196339806002,
359
+ "loss": 0.6772,
360
+ "step": 18000
361
+ },
362
+ {
363
+ "epoch": 0.8,
364
+ "eval_loss": 0.6774880290031433,
365
+ "eval_runtime": 2.1027,
366
+ "eval_samples_per_second": 1092.415,
367
+ "eval_steps_per_second": 17.121,
368
+ "step": 18000
369
+ },
370
+ {
371
+ "epoch": 0.83,
372
+ "learning_rate": 0.0005990713778117324,
373
+ "loss": 0.6773,
374
+ "step": 18500
375
+ },
376
+ {
377
+ "epoch": 0.85,
378
+ "learning_rate": 0.0005989102580081398,
379
+ "loss": 0.6772,
380
+ "step": 19000
381
+ },
382
+ {
383
+ "epoch": 0.85,
384
+ "eval_loss": 0.676984965801239,
385
+ "eval_runtime": 2.1686,
386
+ "eval_samples_per_second": 1059.223,
387
+ "eval_steps_per_second": 16.601,
388
+ "step": 19000
389
+ },
390
+ {
391
+ "epoch": 0.87,
392
+ "learning_rate": 0.0005987362816177249,
393
+ "loss": 0.6773,
394
+ "step": 19500
395
+ },
396
+ {
397
+ "epoch": 0.89,
398
+ "learning_rate": 0.0005985494562507783,
399
+ "loss": 0.6774,
400
+ "step": 20000
401
+ },
402
+ {
403
+ "epoch": 0.89,
404
+ "eval_loss": 0.6769698262214661,
405
+ "eval_runtime": 2.1456,
406
+ "eval_samples_per_second": 1070.586,
407
+ "eval_steps_per_second": 16.779,
408
+ "step": 20000
409
  }
410
  ],
411
  "max_steps": 250000,
412
  "num_train_epochs": 12,
413
+ "total_flos": 3.2033600573160817e+20,
414
  "trial_name": null,
415
  "trial_params": null
416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fdc61f9c8be03d76c40deff98eea159770951d05b23400b4c372c8b407086f24
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cab920afe4d207be60e60c397c80474cec3b0781a866712c960668a023bb8b59
3
  size 25761253