MarkelFe commited on
Commit
1323a5e
1 Parent(s): 5c10363

Training in progress, step 510000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da5c66e223c9afd97a3b2031f28400ecc3ff5cf48322a645ecb5c3d4ba9e2cc8
3
  size 995605445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68aa498d0e3983775adcecf2a5f78debf331313630e2436769b35a9399923c5c
3
  size 995605445
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:724fe75b2891e3eb33370e185b7549e8b85ea750d0af5d509737ae3f8927f173
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:397a701b388141e6d39ca7be9b1469b741594cf39c18781ab0d884cb484ca723
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2934ce8ee53a3f126996335cb7a1d2a9354eb222cdf717375f161332a72ce0b8
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa99ca31aedd6429013044fa3d239b87534181d83f6f020dbd10161874bc75c9
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.838242257789563,
5
- "global_step": 500000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -6412,11 +6412,139 @@
6412
  "eval_samples_per_second": 166.11,
6413
  "eval_steps_per_second": 20.768,
6414
  "step": 500000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6415
  }
6416
  ],
6417
  "max_steps": 633540,
6418
  "num_train_epochs": 15,
6419
- "total_flos": 1.0549596386304e+17,
6420
  "trial_name": null,
6421
  "trial_params": null
6422
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.075007102945355,
5
+ "global_step": 510000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
6412
  "eval_samples_per_second": 166.11,
6413
  "eval_steps_per_second": 20.768,
6414
  "step": 500000
6415
+ },
6416
+ {
6417
+ "epoch": 11.85,
6418
+ "learning_rate": 0.0,
6419
+ "loss": 2.4292,
6420
+ "step": 500500
6421
+ },
6422
+ {
6423
+ "epoch": 11.86,
6424
+ "learning_rate": 0.0,
6425
+ "loss": 2.4061,
6426
+ "step": 501000
6427
+ },
6428
+ {
6429
+ "epoch": 11.87,
6430
+ "learning_rate": 0.0,
6431
+ "loss": 2.4113,
6432
+ "step": 501500
6433
+ },
6434
+ {
6435
+ "epoch": 11.89,
6436
+ "learning_rate": 0.0,
6437
+ "loss": 2.4295,
6438
+ "step": 502000
6439
+ },
6440
+ {
6441
+ "epoch": 11.9,
6442
+ "learning_rate": 0.0,
6443
+ "loss": 2.4258,
6444
+ "step": 502500
6445
+ },
6446
+ {
6447
+ "epoch": 11.91,
6448
+ "learning_rate": 0.0,
6449
+ "loss": 2.4297,
6450
+ "step": 503000
6451
+ },
6452
+ {
6453
+ "epoch": 11.92,
6454
+ "learning_rate": 0.0,
6455
+ "loss": 2.4361,
6456
+ "step": 503500
6457
+ },
6458
+ {
6459
+ "epoch": 11.93,
6460
+ "learning_rate": 0.0,
6461
+ "loss": 2.4306,
6462
+ "step": 504000
6463
+ },
6464
+ {
6465
+ "epoch": 11.94,
6466
+ "learning_rate": 0.0,
6467
+ "loss": 2.4255,
6468
+ "step": 504500
6469
+ },
6470
+ {
6471
+ "epoch": 11.96,
6472
+ "learning_rate": 0.0,
6473
+ "loss": 2.4262,
6474
+ "step": 505000
6475
+ },
6476
+ {
6477
+ "epoch": 11.97,
6478
+ "learning_rate": 0.0,
6479
+ "loss": 2.4273,
6480
+ "step": 505500
6481
+ },
6482
+ {
6483
+ "epoch": 11.98,
6484
+ "learning_rate": 0.0,
6485
+ "loss": 2.4112,
6486
+ "step": 506000
6487
+ },
6488
+ {
6489
+ "epoch": 11.99,
6490
+ "learning_rate": 0.0,
6491
+ "loss": 2.4283,
6492
+ "step": 506500
6493
+ },
6494
+ {
6495
+ "epoch": 12.0,
6496
+ "learning_rate": 0.0,
6497
+ "loss": 2.4236,
6498
+ "step": 507000
6499
+ },
6500
+ {
6501
+ "epoch": 12.02,
6502
+ "learning_rate": 0.0,
6503
+ "loss": 2.4315,
6504
+ "step": 507500
6505
+ },
6506
+ {
6507
+ "epoch": 12.03,
6508
+ "learning_rate": 0.0,
6509
+ "loss": 2.4321,
6510
+ "step": 508000
6511
+ },
6512
+ {
6513
+ "epoch": 12.04,
6514
+ "learning_rate": 0.0,
6515
+ "loss": 2.4189,
6516
+ "step": 508500
6517
+ },
6518
+ {
6519
+ "epoch": 12.05,
6520
+ "learning_rate": 0.0,
6521
+ "loss": 2.422,
6522
+ "step": 509000
6523
+ },
6524
+ {
6525
+ "epoch": 12.06,
6526
+ "learning_rate": 0.0,
6527
+ "loss": 2.4198,
6528
+ "step": 509500
6529
+ },
6530
+ {
6531
+ "epoch": 12.08,
6532
+ "learning_rate": 0.0,
6533
+ "loss": 2.4139,
6534
+ "step": 510000
6535
+ },
6536
+ {
6537
+ "epoch": 12.08,
6538
+ "eval_loss": 3.1522228717803955,
6539
+ "eval_runtime": 112.9202,
6540
+ "eval_samples_per_second": 166.241,
6541
+ "eval_steps_per_second": 20.785,
6542
+ "step": 510000
6543
  }
6544
  ],
6545
  "max_steps": 633540,
6546
  "num_train_epochs": 15,
6547
+ "total_flos": 1.07592688505088e+17,
6548
  "trial_name": null,
6549
  "trial_params": null
6550
  }