jflotz commited on
Commit
9833961
1 Parent(s): 933465f

Training in progress, step 140000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb9dcc4ff2bc74ab69f0263634058bb8fb9cc796ba30099ac6af07a524413216
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcab0d8867b5b850ae68e36d8ef2c70a5dbfa9a4b065a13a6b362e3397cf3fe2
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77770474995765e28cd3f772259c3b9f70956913fcf88d26cfbea63dec9f29f8
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f41bd458cd3584fbcfdfac193494afb86160004f3f15f4d7c8df5b7ded36762
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f52dddb37e26c52f49ab4dce6c56c4f27359125886e16875691cfe50bcc29ef0
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f52dddb37e26c52f49ab4dce6c56c4f27359125886e16875691cfe50bcc29ef0
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f52dddb37e26c52f49ab4dce6c56c4f27359125886e16875691cfe50bcc29ef0
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f52dddb37e26c52f49ab4dce6c56c4f27359125886e16875691cfe50bcc29ef0
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f52dddb37e26c52f49ab4dce6c56c4f27359125886e16875691cfe50bcc29ef0
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f52dddb37e26c52f49ab4dce6c56c4f27359125886e16875691cfe50bcc29ef0
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f52dddb37e26c52f49ab4dce6c56c4f27359125886e16875691cfe50bcc29ef0
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3017ce4acc395c8911d9ef1e39e206b06d5a44dba6f5be7a0a365fda3aceface
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f52dddb37e26c52f49ab4dce6c56c4f27359125886e16875691cfe50bcc29ef0
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa61e63d6ec853afa02e48d5167bab30a383bd9f05f192b20c686fb9a3478097
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed68d92642b5c57649c135331b8243d8047b1dee7f4eb5f6f68f9dc4d2f32821
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.799428979300499,
5
- "global_step": 130000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2606,11 +2606,211 @@
2606
  "eval_samples_per_second": 1064.4,
2607
  "eval_steps_per_second": 16.682,
2608
  "step": 130000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2609
  }
2610
  ],
2611
  "max_steps": 250000,
2612
  "num_train_epochs": 12,
2613
- "total_flos": 2.0821483773445852e+21,
2614
  "trial_name": null,
2615
  "trial_params": null
2616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.245538900785154,
5
+ "global_step": 140000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2606
  "eval_samples_per_second": 1064.4,
2607
  "eval_steps_per_second": 16.682,
2608
  "step": 130000
2609
+ },
2610
+ {
2611
+ "epoch": 5.82,
2612
+ "learning_rate": 0.00030792659356955893,
2613
+ "loss": 0.3853,
2614
+ "step": 130500
2615
+ },
2616
+ {
2617
+ "epoch": 5.84,
2618
+ "learning_rate": 0.0003059755454143586,
2619
+ "loss": 0.3849,
2620
+ "step": 131000
2621
+ },
2622
+ {
2623
+ "epoch": 5.84,
2624
+ "eval_loss": 0.3566107451915741,
2625
+ "eval_runtime": 2.2772,
2626
+ "eval_samples_per_second": 1008.7,
2627
+ "eval_steps_per_second": 15.809,
2628
+ "step": 131000
2629
+ },
2630
+ {
2631
+ "epoch": 5.87,
2632
+ "learning_rate": 0.00030402445458564144,
2633
+ "loss": 0.3845,
2634
+ "step": 131500
2635
+ },
2636
+ {
2637
+ "epoch": 5.89,
2638
+ "learning_rate": 0.0003020734064304411,
2639
+ "loss": 0.3845,
2640
+ "step": 132000
2641
+ },
2642
+ {
2643
+ "epoch": 5.89,
2644
+ "eval_loss": 0.35691574215888977,
2645
+ "eval_runtime": 2.2557,
2646
+ "eval_samples_per_second": 1018.294,
2647
+ "eval_steps_per_second": 15.959,
2648
+ "step": 132000
2649
+ },
2650
+ {
2651
+ "epoch": 5.91,
2652
+ "learning_rate": 0.00030012248629392423,
2653
+ "loss": 0.3842,
2654
+ "step": 132500
2655
+ },
2656
+ {
2657
+ "epoch": 5.93,
2658
+ "learning_rate": 0.00029817177951565793,
2659
+ "loss": 0.384,
2660
+ "step": 133000
2661
+ },
2662
+ {
2663
+ "epoch": 5.93,
2664
+ "eval_loss": 0.35665038228034973,
2665
+ "eval_runtime": 2.2062,
2666
+ "eval_samples_per_second": 1041.164,
2667
+ "eval_steps_per_second": 16.318,
2668
+ "step": 133000
2669
+ },
2670
+ {
2671
+ "epoch": 5.96,
2672
+ "learning_rate": 0.00029622137142587594,
2673
+ "loss": 0.3837,
2674
+ "step": 133500
2675
+ },
2676
+ {
2677
+ "epoch": 5.98,
2678
+ "learning_rate": 0.0002942713473417466,
2679
+ "loss": 0.3921,
2680
+ "step": 134000
2681
+ },
2682
+ {
2683
+ "epoch": 5.98,
2684
+ "eval_loss": 0.3628464341163635,
2685
+ "eval_runtime": 2.2578,
2686
+ "eval_samples_per_second": 1017.356,
2687
+ "eval_steps_per_second": 15.945,
2688
+ "step": 134000
2689
+ },
2690
+ {
2691
+ "epoch": 6.0,
2692
+ "learning_rate": 0.00029232179256364054,
2693
+ "loss": 0.3854,
2694
+ "step": 134500
2695
+ },
2696
+ {
2697
+ "epoch": 6.02,
2698
+ "learning_rate": 0.0002903727923713994,
2699
+ "loss": 0.3844,
2700
+ "step": 135000
2701
+ },
2702
+ {
2703
+ "epoch": 6.02,
2704
+ "eval_loss": 0.3565491735935211,
2705
+ "eval_runtime": 2.2392,
2706
+ "eval_samples_per_second": 1025.813,
2707
+ "eval_steps_per_second": 16.077,
2708
+ "step": 135000
2709
+ },
2710
+ {
2711
+ "epoch": 6.04,
2712
+ "learning_rate": 0.00028842443202060556,
2713
+ "loss": 0.383,
2714
+ "step": 135500
2715
+ },
2716
+ {
2717
+ "epoch": 6.07,
2718
+ "learning_rate": 0.00028647679673885255,
2719
+ "loss": 0.383,
2720
+ "step": 136000
2721
+ },
2722
+ {
2723
+ "epoch": 6.07,
2724
+ "eval_loss": 0.35466697812080383,
2725
+ "eval_runtime": 2.2168,
2726
+ "eval_samples_per_second": 1036.2,
2727
+ "eval_steps_per_second": 16.24,
2728
+ "step": 136000
2729
+ },
2730
+ {
2731
+ "epoch": 6.09,
2732
+ "learning_rate": 0.000284529971722017,
2733
+ "loss": 0.3829,
2734
+ "step": 136500
2735
+ },
2736
+ {
2737
+ "epoch": 6.11,
2738
+ "learning_rate": 0.0002825840421305321,
2739
+ "loss": 0.3828,
2740
+ "step": 137000
2741
+ },
2742
+ {
2743
+ "epoch": 6.11,
2744
+ "eval_loss": 0.3585571050643921,
2745
+ "eval_runtime": 2.2164,
2746
+ "eval_samples_per_second": 1036.381,
2747
+ "eval_steps_per_second": 16.243,
2748
+ "step": 137000
2749
+ },
2750
+ {
2751
+ "epoch": 6.13,
2752
+ "learning_rate": 0.00028063909308566196,
2753
+ "loss": 0.3826,
2754
+ "step": 137500
2755
+ },
2756
+ {
2757
+ "epoch": 6.16,
2758
+ "learning_rate": 0.00027869520966577874,
2759
+ "loss": 0.3824,
2760
+ "step": 138000
2761
+ },
2762
+ {
2763
+ "epoch": 6.16,
2764
+ "eval_loss": 0.35530510544776917,
2765
+ "eval_runtime": 2.2219,
2766
+ "eval_samples_per_second": 1033.801,
2767
+ "eval_steps_per_second": 16.202,
2768
+ "step": 138000
2769
+ },
2770
+ {
2771
+ "epoch": 6.18,
2772
+ "learning_rate": 0.00027675247690264027,
2773
+ "loss": 0.3823,
2774
+ "step": 138500
2775
+ },
2776
+ {
2777
+ "epoch": 6.2,
2778
+ "learning_rate": 0.0002748109797776715,
2779
+ "loss": 0.3825,
2780
+ "step": 139000
2781
+ },
2782
+ {
2783
+ "epoch": 6.2,
2784
+ "eval_loss": 0.3549206256866455,
2785
+ "eval_runtime": 2.1769,
2786
+ "eval_samples_per_second": 1055.166,
2787
+ "eval_steps_per_second": 16.537,
2788
+ "step": 139000
2789
+ },
2790
+ {
2791
+ "epoch": 6.22,
2792
+ "learning_rate": 0.0002728708032182461,
2793
+ "loss": 0.382,
2794
+ "step": 139500
2795
+ },
2796
+ {
2797
+ "epoch": 6.25,
2798
+ "learning_rate": 0.0002709320320939721,
2799
+ "loss": 0.3818,
2800
+ "step": 140000
2801
+ },
2802
+ {
2803
+ "epoch": 6.25,
2804
+ "eval_loss": 0.3537048399448395,
2805
+ "eval_runtime": 2.2266,
2806
+ "eval_samples_per_second": 1031.628,
2807
+ "eval_steps_per_second": 16.168,
2808
+ "step": 140000
2809
  }
2810
  ],
2811
  "max_steps": 250000,
2812
  "num_train_epochs": 12,
2813
+ "total_flos": 2.2423092482282158e+21,
2814
  "trial_name": null,
2815
  "trial_params": null
2816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77770474995765e28cd3f772259c3b9f70956913fcf88d26cfbea63dec9f29f8
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f41bd458cd3584fbcfdfac193494afb86160004f3f15f4d7c8df5b7ded36762
3
  size 25761253