MarkelFe commited on
Commit
25ef1c4
1 Parent(s): 1ed73fd

Training in progress, step 280000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9d65ea00f1a4532d9306cc6f4e8018944c05cd5e826ead50529858951946c8f
3
  size 995605445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c30841bcd23da5df2accbfd6af7df679a0f9376a5ff351a918222fc37e2df39d
3
  size 995605445
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab480b419c0f298e92c97801c5e30cd6bf7b2e720ec80893bd6afb26e102af44
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dc7a713a7c891eea844f1039f4fde31fa20db49886276b4d9efdfb0adc4f71c
3
  size 510398013
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a15c47cefa9b6894ac6fc265d0b5a2c5f32b4f04f48025868ab92fe22c109d6
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:756723c1a0732aaa05624ee70ae528bad508ab4125a6ec49d10c2305797cf89f
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0850d5b39e5e46479ee0d56a5f9b2df4ff7ac65b66947c5427afcf97c554f26
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:864fdc949856c3c644b2231c52465618168b961732a730093122274720c4b9ee
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.392650819206365,
5
- "global_step": 270000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3468,11 +3468,139 @@
3468
  "eval_samples_per_second": 166.39,
3469
  "eval_steps_per_second": 20.803,
3470
  "step": 270000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3471
  }
3472
  ],
3473
  "max_steps": 633540,
3474
  "num_train_epochs": 15,
3475
- "total_flos": 5.7077724099456e+16,
3476
  "trial_name": null,
3477
  "trial_params": null
3478
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.629415664362155,
5
+ "global_step": 280000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3468
  "eval_samples_per_second": 166.39,
3469
  "eval_steps_per_second": 20.803,
3470
  "step": 270000
3471
+ },
3472
+ {
3473
+ "epoch": 6.4,
3474
+ "learning_rate": 4.976860182466775e-06,
3475
+ "loss": 2.4924,
3476
+ "step": 270500
3477
+ },
3478
+ {
3479
+ "epoch": 6.42,
3480
+ "learning_rate": 4.91372289042523e-06,
3481
+ "loss": 2.484,
3482
+ "step": 271000
3483
+ },
3484
+ {
3485
+ "epoch": 6.43,
3486
+ "learning_rate": 4.850585598383686e-06,
3487
+ "loss": 2.5076,
3488
+ "step": 271500
3489
+ },
3490
+ {
3491
+ "epoch": 6.44,
3492
+ "learning_rate": 4.787448306342141e-06,
3493
+ "loss": 2.4958,
3494
+ "step": 272000
3495
+ },
3496
+ {
3497
+ "epoch": 6.45,
3498
+ "learning_rate": 4.724311014300597e-06,
3499
+ "loss": 2.4692,
3500
+ "step": 272500
3501
+ },
3502
+ {
3503
+ "epoch": 6.46,
3504
+ "learning_rate": 4.661173722259053e-06,
3505
+ "loss": 2.4638,
3506
+ "step": 273000
3507
+ },
3508
+ {
3509
+ "epoch": 6.48,
3510
+ "learning_rate": 4.598036430217509e-06,
3511
+ "loss": 2.4956,
3512
+ "step": 273500
3513
+ },
3514
+ {
3515
+ "epoch": 6.49,
3516
+ "learning_rate": 4.534899138175964e-06,
3517
+ "loss": 2.4985,
3518
+ "step": 274000
3519
+ },
3520
+ {
3521
+ "epoch": 6.5,
3522
+ "learning_rate": 4.47176184613442e-06,
3523
+ "loss": 2.5003,
3524
+ "step": 274500
3525
+ },
3526
+ {
3527
+ "epoch": 6.51,
3528
+ "learning_rate": 4.408624554092875e-06,
3529
+ "loss": 2.4914,
3530
+ "step": 275000
3531
+ },
3532
+ {
3533
+ "epoch": 6.52,
3534
+ "learning_rate": 4.345487262051331e-06,
3535
+ "loss": 2.4981,
3536
+ "step": 275500
3537
+ },
3538
+ {
3539
+ "epoch": 6.53,
3540
+ "learning_rate": 4.282349970009787e-06,
3541
+ "loss": 2.5008,
3542
+ "step": 276000
3543
+ },
3544
+ {
3545
+ "epoch": 6.55,
3546
+ "learning_rate": 4.219212677968242e-06,
3547
+ "loss": 2.5063,
3548
+ "step": 276500
3549
+ },
3550
+ {
3551
+ "epoch": 6.56,
3552
+ "learning_rate": 4.156075385926698e-06,
3553
+ "loss": 2.4841,
3554
+ "step": 277000
3555
+ },
3556
+ {
3557
+ "epoch": 6.57,
3558
+ "learning_rate": 4.092938093885154e-06,
3559
+ "loss": 2.4833,
3560
+ "step": 277500
3561
+ },
3562
+ {
3563
+ "epoch": 6.58,
3564
+ "learning_rate": 4.029800801843609e-06,
3565
+ "loss": 2.4839,
3566
+ "step": 278000
3567
+ },
3568
+ {
3569
+ "epoch": 6.59,
3570
+ "learning_rate": 3.966663509802065e-06,
3571
+ "loss": 2.489,
3572
+ "step": 278500
3573
+ },
3574
+ {
3575
+ "epoch": 6.61,
3576
+ "learning_rate": 3.90352621776052e-06,
3577
+ "loss": 2.4897,
3578
+ "step": 279000
3579
+ },
3580
+ {
3581
+ "epoch": 6.62,
3582
+ "learning_rate": 3.840388925718976e-06,
3583
+ "loss": 2.5142,
3584
+ "step": 279500
3585
+ },
3586
+ {
3587
+ "epoch": 6.63,
3588
+ "learning_rate": 3.7772516336774316e-06,
3589
+ "loss": 2.4862,
3590
+ "step": 280000
3591
+ },
3592
+ {
3593
+ "epoch": 6.63,
3594
+ "eval_loss": 3.1480441093444824,
3595
+ "eval_runtime": 112.7656,
3596
+ "eval_samples_per_second": 166.469,
3597
+ "eval_steps_per_second": 20.813,
3598
+ "step": 280000
3599
  }
3600
  ],
3601
  "max_steps": 633540,
3602
  "num_train_epochs": 15,
3603
+ "total_flos": 5.919198694848e+16,
3604
  "trial_name": null,
3605
  "trial_params": null
3606
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab480b419c0f298e92c97801c5e30cd6bf7b2e720ec80893bd6afb26e102af44
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0dc7a713a7c891eea844f1039f4fde31fa20db49886276b4d9efdfb0adc4f71c
3
  size 510398013