jflotz commited on
Commit
1fa2dd6
1 Parent(s): 287ea59

Training in progress, step 180000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:890dc581163da8ac34698455aef8e08af0d03b6c31e289a5752b729c1a6eb8ad
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae1b0d96f667a7b16c0c7d2f49737d4497a498e6fce49a93b982d94c8de8309c
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:478382e1e4afce83db55490eec55f2c2cc88645b07b91562f7bf2468273abbf0
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:434932d378cb26c672decfb210d19f8b4a125ed1b9443d7969a7371379940fc8
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8bf8ede2357b1086ec56810178a0bcfed2f59612fe08e69399b2d94840600f1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:016c297c140d00f5c3ece68fb6697280ca6918ef74f0bee47ce148ceaa7045be
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8bf8ede2357b1086ec56810178a0bcfed2f59612fe08e69399b2d94840600f1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:016c297c140d00f5c3ece68fb6697280ca6918ef74f0bee47ce148ceaa7045be
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8bf8ede2357b1086ec56810178a0bcfed2f59612fe08e69399b2d94840600f1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:016c297c140d00f5c3ece68fb6697280ca6918ef74f0bee47ce148ceaa7045be
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8bf8ede2357b1086ec56810178a0bcfed2f59612fe08e69399b2d94840600f1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:016c297c140d00f5c3ece68fb6697280ca6918ef74f0bee47ce148ceaa7045be
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8bf8ede2357b1086ec56810178a0bcfed2f59612fe08e69399b2d94840600f1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:016c297c140d00f5c3ece68fb6697280ca6918ef74f0bee47ce148ceaa7045be
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8bf8ede2357b1086ec56810178a0bcfed2f59612fe08e69399b2d94840600f1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:016c297c140d00f5c3ece68fb6697280ca6918ef74f0bee47ce148ceaa7045be
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8bf8ede2357b1086ec56810178a0bcfed2f59612fe08e69399b2d94840600f1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:016c297c140d00f5c3ece68fb6697280ca6918ef74f0bee47ce148ceaa7045be
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8bf8ede2357b1086ec56810178a0bcfed2f59612fe08e69399b2d94840600f1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:016c297c140d00f5c3ece68fb6697280ca6918ef74f0bee47ce148ceaa7045be
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1470898ecdc6550560113c5e2cfd1e79edea6b27c0b7d35814645546c1b5bff0
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae7e48b658f6388c6c044e6d37239970a21307494d626979f7e10630dfa93207
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.583868665239115,
5
- "global_step": 170000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3406,11 +3406,211 @@
3406
  "eval_samples_per_second": 1033.305,
3407
  "eval_steps_per_second": 16.195,
3408
  "step": 170000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3409
  }
3410
  ],
3411
  "max_steps": 250000,
3412
  "num_train_epochs": 12,
3413
- "total_flos": 2.7228061248434544e+21,
3414
  "trial_name": null,
3415
  "trial_params": null
3416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.029978586723768,
5
+ "global_step": 180000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3406
  "eval_samples_per_second": 1033.305,
3407
  "eval_steps_per_second": 16.195,
3408
  "step": 170000
3409
+ },
3410
+ {
3411
+ "epoch": 7.61,
3412
+ "learning_rate": 0.00015862789502737648,
3413
+ "loss": 0.3728,
3414
+ "step": 170500
3415
+ },
3416
+ {
3417
+ "epoch": 7.63,
3418
+ "learning_rate": 0.00015693712647480446,
3419
+ "loss": 0.3731,
3420
+ "step": 171000
3421
+ },
3422
+ {
3423
+ "epoch": 7.63,
3424
+ "eval_loss": 0.3457169234752655,
3425
+ "eval_runtime": 2.2902,
3426
+ "eval_samples_per_second": 1002.962,
3427
+ "eval_steps_per_second": 15.719,
3428
+ "step": 171000
3429
+ },
3430
+ {
3431
+ "epoch": 7.65,
3432
+ "learning_rate": 0.00015525283467197743,
3433
+ "loss": 0.3727,
3434
+ "step": 171500
3435
+ },
3436
+ {
3437
+ "epoch": 7.67,
3438
+ "learning_rate": 0.00015357509329527556,
3439
+ "loss": 0.3726,
3440
+ "step": 172000
3441
+ },
3442
+ {
3443
+ "epoch": 7.67,
3444
+ "eval_loss": 0.3477800190448761,
3445
+ "eval_runtime": 2.185,
3446
+ "eval_samples_per_second": 1051.256,
3447
+ "eval_steps_per_second": 16.476,
3448
+ "step": 172000
3449
+ },
3450
+ {
3451
+ "epoch": 7.7,
3452
+ "learning_rate": 0.00015190397573454158,
3453
+ "loss": 0.3727,
3454
+ "step": 172500
3455
+ },
3456
+ {
3457
+ "epoch": 7.72,
3458
+ "learning_rate": 0.00015023955508987127,
3459
+ "loss": 0.3725,
3460
+ "step": 173000
3461
+ },
3462
+ {
3463
+ "epoch": 7.72,
3464
+ "eval_loss": 0.3447197675704956,
3465
+ "eval_runtime": 2.1651,
3466
+ "eval_samples_per_second": 1060.935,
3467
+ "eval_steps_per_second": 16.628,
3468
+ "step": 173000
3469
+ },
3470
+ {
3471
+ "epoch": 7.74,
3472
+ "learning_rate": 0.00014858190416841565,
3473
+ "loss": 0.3724,
3474
+ "step": 173500
3475
+ },
3476
+ {
3477
+ "epoch": 7.76,
3478
+ "learning_rate": 0.00014693109548119591,
3479
+ "loss": 0.3722,
3480
+ "step": 174000
3481
+ },
3482
+ {
3483
+ "epoch": 7.76,
3484
+ "eval_loss": 0.3459009826183319,
3485
+ "eval_runtime": 2.2671,
3486
+ "eval_samples_per_second": 1013.172,
3487
+ "eval_steps_per_second": 15.879,
3488
+ "step": 174000
3489
+ },
3490
+ {
3491
+ "epoch": 7.78,
3492
+ "learning_rate": 0.00014528720123993226,
3493
+ "loss": 0.3721,
3494
+ "step": 174500
3495
+ },
3496
+ {
3497
+ "epoch": 7.81,
3498
+ "learning_rate": 0.0001436502933538841,
3499
+ "loss": 0.3723,
3500
+ "step": 175000
3501
+ },
3502
+ {
3503
+ "epoch": 7.81,
3504
+ "eval_loss": 0.3462165296077728,
3505
+ "eval_runtime": 2.2195,
3506
+ "eval_samples_per_second": 1034.906,
3507
+ "eval_steps_per_second": 16.22,
3508
+ "step": 175000
3509
+ },
3510
+ {
3511
+ "epoch": 7.83,
3512
+ "learning_rate": 0.00014202044342670508,
3513
+ "loss": 0.372,
3514
+ "step": 175500
3515
+ },
3516
+ {
3517
+ "epoch": 7.85,
3518
+ "learning_rate": 0.00014039772275331125,
3519
+ "loss": 0.3718,
3520
+ "step": 176000
3521
+ },
3522
+ {
3523
+ "epoch": 7.85,
3524
+ "eval_loss": 0.3463585674762726,
3525
+ "eval_runtime": 2.2454,
3526
+ "eval_samples_per_second": 1022.987,
3527
+ "eval_steps_per_second": 16.033,
3528
+ "step": 176000
3529
+ },
3530
+ {
3531
+ "epoch": 7.87,
3532
+ "learning_rate": 0.00013878220231676152,
3533
+ "loss": 0.3716,
3534
+ "step": 176500
3535
+ },
3536
+ {
3537
+ "epoch": 7.9,
3538
+ "learning_rate": 0.00013717395278515355,
3539
+ "loss": 0.3716,
3540
+ "step": 177000
3541
+ },
3542
+ {
3543
+ "epoch": 7.9,
3544
+ "eval_loss": 0.34527209401130676,
3545
+ "eval_runtime": 2.2682,
3546
+ "eval_samples_per_second": 1012.694,
3547
+ "eval_steps_per_second": 15.872,
3548
+ "step": 177000
3549
+ },
3550
+ {
3551
+ "epoch": 7.92,
3552
+ "learning_rate": 0.00013557304450853162,
3553
+ "loss": 0.3714,
3554
+ "step": 177500
3555
+ },
3556
+ {
3557
+ "epoch": 7.94,
3558
+ "learning_rate": 0.00013397954751581014,
3559
+ "loss": 0.3712,
3560
+ "step": 178000
3561
+ },
3562
+ {
3563
+ "epoch": 7.94,
3564
+ "eval_loss": 0.34656643867492676,
3565
+ "eval_runtime": 2.2489,
3566
+ "eval_samples_per_second": 1021.386,
3567
+ "eval_steps_per_second": 16.008,
3568
+ "step": 178000
3569
+ },
3570
+ {
3571
+ "epoch": 7.96,
3572
+ "learning_rate": 0.00013239353151170983,
3573
+ "loss": 0.371,
3574
+ "step": 178500
3575
+ },
3576
+ {
3577
+ "epoch": 7.99,
3578
+ "learning_rate": 0.00013081506587370853,
3579
+ "loss": 0.3712,
3580
+ "step": 179000
3581
+ },
3582
+ {
3583
+ "epoch": 7.99,
3584
+ "eval_loss": 0.34555310010910034,
3585
+ "eval_runtime": 2.1617,
3586
+ "eval_samples_per_second": 1062.607,
3587
+ "eval_steps_per_second": 16.654,
3588
+ "step": 179000
3589
+ },
3590
+ {
3591
+ "epoch": 8.01,
3592
+ "learning_rate": 0.00012924421964900695,
3593
+ "loss": 0.371,
3594
+ "step": 179500
3595
+ },
3596
+ {
3597
+ "epoch": 8.03,
3598
+ "learning_rate": 0.00012768106155150758,
3599
+ "loss": 0.3709,
3600
+ "step": 180000
3601
+ },
3602
+ {
3603
+ "epoch": 8.03,
3604
+ "eval_loss": 0.34523507952690125,
3605
+ "eval_runtime": 2.3269,
3606
+ "eval_samples_per_second": 987.138,
3607
+ "eval_steps_per_second": 15.471,
3608
+ "step": 180000
3609
  }
3610
  ],
3611
  "max_steps": 250000,
3612
  "num_train_epochs": 12,
3613
+ "total_flos": 2.882966995727085e+21,
3614
  "trial_name": null,
3615
  "trial_params": null
3616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:478382e1e4afce83db55490eec55f2c2cc88645b07b91562f7bf2468273abbf0
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:434932d378cb26c672decfb210d19f8b4a125ed1b9443d7969a7371379940fc8
3
  size 25761253