MarkelFe commited on
Commit
5c10363
1 Parent(s): e278c9d

Training in progress, step 500000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:005db65eb58f9237e48ebb211a3461d36a8a5572524730ba998bc2393e3a0b49
3
  size 995605445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da5c66e223c9afd97a3b2031f28400ecc3ff5cf48322a645ecb5c3d4ba9e2cc8
3
  size 995605445
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e76e0744d6f0f537056ad5103f02a6d5f5eb5eb960f943e779627f94d0de15e0
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:724fe75b2891e3eb33370e185b7549e8b85ea750d0af5d509737ae3f8927f173
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28300f1d8f6ee8059c5bd58bb510fd233bccfaebc75fff86f7a87e9ea5e4924e
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2934ce8ee53a3f126996335cb7a1d2a9354eb222cdf717375f161332a72ce0b8
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.601477412633772,
5
- "global_step": 490000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -6284,11 +6284,139 @@
6284
  "eval_samples_per_second": 166.429,
6285
  "eval_steps_per_second": 20.808,
6286
  "step": 490000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6287
  }
6288
  ],
6289
  "max_steps": 633540,
6290
  "num_train_epochs": 15,
6291
- "total_flos": 1.0338847011072e+17,
6292
  "trial_name": null,
6293
  "trial_params": null
6294
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.838242257789563,
5
+ "global_step": 500000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
6284
  "eval_samples_per_second": 166.429,
6285
  "eval_steps_per_second": 20.808,
6286
  "step": 490000
6287
+ },
6288
+ {
6289
+ "epoch": 11.61,
6290
+ "learning_rate": 0.0,
6291
+ "loss": 2.4271,
6292
+ "step": 490500
6293
+ },
6294
+ {
6295
+ "epoch": 11.63,
6296
+ "learning_rate": 0.0,
6297
+ "loss": 2.4343,
6298
+ "step": 491000
6299
+ },
6300
+ {
6301
+ "epoch": 11.64,
6302
+ "learning_rate": 0.0,
6303
+ "loss": 2.4414,
6304
+ "step": 491500
6305
+ },
6306
+ {
6307
+ "epoch": 11.65,
6308
+ "learning_rate": 0.0,
6309
+ "loss": 2.4335,
6310
+ "step": 492000
6311
+ },
6312
+ {
6313
+ "epoch": 11.66,
6314
+ "learning_rate": 0.0,
6315
+ "loss": 2.4353,
6316
+ "step": 492500
6317
+ },
6318
+ {
6319
+ "epoch": 11.67,
6320
+ "learning_rate": 0.0,
6321
+ "loss": 2.4308,
6322
+ "step": 493000
6323
+ },
6324
+ {
6325
+ "epoch": 11.68,
6326
+ "learning_rate": 0.0,
6327
+ "loss": 2.4395,
6328
+ "step": 493500
6329
+ },
6330
+ {
6331
+ "epoch": 11.7,
6332
+ "learning_rate": 0.0,
6333
+ "loss": 2.4239,
6334
+ "step": 494000
6335
+ },
6336
+ {
6337
+ "epoch": 11.71,
6338
+ "learning_rate": 0.0,
6339
+ "loss": 2.4247,
6340
+ "step": 494500
6341
+ },
6342
+ {
6343
+ "epoch": 11.72,
6344
+ "learning_rate": 0.0,
6345
+ "loss": 2.4229,
6346
+ "step": 495000
6347
+ },
6348
+ {
6349
+ "epoch": 11.73,
6350
+ "learning_rate": 0.0,
6351
+ "loss": 2.4253,
6352
+ "step": 495500
6353
+ },
6354
+ {
6355
+ "epoch": 11.74,
6356
+ "learning_rate": 0.0,
6357
+ "loss": 2.4108,
6358
+ "step": 496000
6359
+ },
6360
+ {
6361
+ "epoch": 11.76,
6362
+ "learning_rate": 0.0,
6363
+ "loss": 2.4133,
6364
+ "step": 496500
6365
+ },
6366
+ {
6367
+ "epoch": 11.77,
6368
+ "learning_rate": 0.0,
6369
+ "loss": 2.4318,
6370
+ "step": 497000
6371
+ },
6372
+ {
6373
+ "epoch": 11.78,
6374
+ "learning_rate": 0.0,
6375
+ "loss": 2.4255,
6376
+ "step": 497500
6377
+ },
6378
+ {
6379
+ "epoch": 11.79,
6380
+ "learning_rate": 0.0,
6381
+ "loss": 2.4123,
6382
+ "step": 498000
6383
+ },
6384
+ {
6385
+ "epoch": 11.8,
6386
+ "learning_rate": 0.0,
6387
+ "loss": 2.4323,
6388
+ "step": 498500
6389
+ },
6390
+ {
6391
+ "epoch": 11.81,
6392
+ "learning_rate": 0.0,
6393
+ "loss": 2.4014,
6394
+ "step": 499000
6395
+ },
6396
+ {
6397
+ "epoch": 11.83,
6398
+ "learning_rate": 0.0,
6399
+ "loss": 2.4377,
6400
+ "step": 499500
6401
+ },
6402
+ {
6403
+ "epoch": 11.84,
6404
+ "learning_rate": 0.0,
6405
+ "loss": 2.4497,
6406
+ "step": 500000
6407
+ },
6408
+ {
6409
+ "epoch": 11.84,
6410
+ "eval_loss": 3.1522228717803955,
6411
+ "eval_runtime": 113.0091,
6412
+ "eval_samples_per_second": 166.11,
6413
+ "eval_steps_per_second": 20.768,
6414
+ "step": 500000
6415
  }
6416
  ],
6417
  "max_steps": 633540,
6418
  "num_train_epochs": 15,
6419
+ "total_flos": 1.0549596386304e+17,
6420
  "trial_name": null,
6421
  "trial_params": null
6422
  }