MarkelFe commited on
Commit
de7cac6
1 Parent(s): f86303f

Training in progress, step 50000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dafd1a597d541396efb5c638af9a54bbc8ac18c68cd48a20f76d0ca2bc1e01ef
3
  size 995605189
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d898d6ea104f17cbbf27e7461b1d6144bf9fe1ae55eea4068e53eb3839da02a
3
  size 995605189
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:529a26013828a3643211b2475be16389f662dc8c3c06d5719b6a70819da68c8a
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98a99a09048543fe282b50b60bfea4bb009b9decd582e348ad2d38001d140d18
3
  size 510398013
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3004509848150ee75a25be67dfe66552d50a705fc14ae18f62d4bd5e0795e8c
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c6359c6c031dbde530d264752810896a8f9d4957f508f18193732bd65efe52d
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91a499652f62153d33f4dd0503b07f247980d8d57628f7715c2fef0cb3d0b038
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b9bb18d023d349a9a0f7837b4603b5fafaa2d673ffb5da28ae0fb78b43c6d7e
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9470593806231651,
5
- "global_step": 40000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -524,11 +524,139 @@
524
  "eval_samples_per_second": 166.017,
525
  "eval_steps_per_second": 20.757,
526
  "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
527
  }
528
  ],
529
  "max_steps": 633540,
530
  "num_train_epochs": 15,
531
- "total_flos": 8586525680640000.0,
532
  "trial_name": null,
533
  "trial_params": null
534
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1838242257789564,
5
+ "global_step": 50000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
524
  "eval_samples_per_second": 166.017,
525
  "eval_steps_per_second": 20.757,
526
  "step": 40000
527
+ },
528
+ {
529
+ "epoch": 0.96,
530
+ "learning_rate": 3.4020014521577174e-05,
531
+ "loss": 3.2013,
532
+ "step": 40500
533
+ },
534
+ {
535
+ "epoch": 0.97,
536
+ "learning_rate": 3.3956877229535626e-05,
537
+ "loss": 3.1909,
538
+ "step": 41000
539
+ },
540
+ {
541
+ "epoch": 0.98,
542
+ "learning_rate": 3.3893739937494084e-05,
543
+ "loss": 3.1857,
544
+ "step": 41500
545
+ },
546
+ {
547
+ "epoch": 0.99,
548
+ "learning_rate": 3.3830602645452536e-05,
549
+ "loss": 3.1904,
550
+ "step": 42000
551
+ },
552
+ {
553
+ "epoch": 1.01,
554
+ "learning_rate": 3.3767465353410995e-05,
555
+ "loss": 3.0815,
556
+ "step": 42500
557
+ },
558
+ {
559
+ "epoch": 1.02,
560
+ "learning_rate": 3.370432806136945e-05,
561
+ "loss": 3.0012,
562
+ "step": 43000
563
+ },
564
+ {
565
+ "epoch": 1.03,
566
+ "learning_rate": 3.3641190769327905e-05,
567
+ "loss": 2.9995,
568
+ "step": 43500
569
+ },
570
+ {
571
+ "epoch": 1.04,
572
+ "learning_rate": 3.357805347728636e-05,
573
+ "loss": 3.0085,
574
+ "step": 44000
575
+ },
576
+ {
577
+ "epoch": 1.05,
578
+ "learning_rate": 3.351491618524482e-05,
579
+ "loss": 3.0499,
580
+ "step": 44500
581
+ },
582
+ {
583
+ "epoch": 1.07,
584
+ "learning_rate": 3.3451778893203274e-05,
585
+ "loss": 3.0286,
586
+ "step": 45000
587
+ },
588
+ {
589
+ "epoch": 1.08,
590
+ "learning_rate": 3.338864160116173e-05,
591
+ "loss": 3.0233,
592
+ "step": 45500
593
+ },
594
+ {
595
+ "epoch": 1.09,
596
+ "learning_rate": 3.3325504309120184e-05,
597
+ "loss": 3.0465,
598
+ "step": 46000
599
+ },
600
+ {
601
+ "epoch": 1.1,
602
+ "learning_rate": 3.3262367017078635e-05,
603
+ "loss": 3.0409,
604
+ "step": 46500
605
+ },
606
+ {
607
+ "epoch": 1.11,
608
+ "learning_rate": 3.3199229725037094e-05,
609
+ "loss": 3.0265,
610
+ "step": 47000
611
+ },
612
+ {
613
+ "epoch": 1.12,
614
+ "learning_rate": 3.313609243299555e-05,
615
+ "loss": 3.0177,
616
+ "step": 47500
617
+ },
618
+ {
619
+ "epoch": 1.14,
620
+ "learning_rate": 3.3072955140954004e-05,
621
+ "loss": 3.0255,
622
+ "step": 48000
623
+ },
624
+ {
625
+ "epoch": 1.15,
626
+ "learning_rate": 3.300981784891246e-05,
627
+ "loss": 3.0417,
628
+ "step": 48500
629
+ },
630
+ {
631
+ "epoch": 1.16,
632
+ "learning_rate": 3.294668055687092e-05,
633
+ "loss": 3.0222,
634
+ "step": 49000
635
+ },
636
+ {
637
+ "epoch": 1.17,
638
+ "learning_rate": 3.288354326482937e-05,
639
+ "loss": 3.0286,
640
+ "step": 49500
641
+ },
642
+ {
643
+ "epoch": 1.18,
644
+ "learning_rate": 3.282040597278783e-05,
645
+ "loss": 3.0283,
646
+ "step": 50000
647
+ },
648
+ {
649
+ "epoch": 1.18,
650
+ "eval_loss": 3.1600818634033203,
651
+ "eval_runtime": 113.1846,
652
+ "eval_samples_per_second": 165.853,
653
+ "eval_steps_per_second": 20.736,
654
+ "step": 50000
655
  }
656
  ],
657
  "max_steps": 633540,
658
  "num_train_epochs": 15,
659
+ "total_flos": 1.0692746654976e+16,
660
  "trial_name": null,
661
  "trial_params": null
662
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:529a26013828a3643211b2475be16389f662dc8c3c06d5719b6a70819da68c8a
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98a99a09048543fe282b50b60bfea4bb009b9decd582e348ad2d38001d140d18
3
  size 510398013