joelniklaus commited on
Commit
18666c0
1 Parent(s): f18be93

Training in progress, step 500000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1b43161845c8e6bda7b95036487c6c5183ff73534909a2f952ed1b54df93899
3
  size 2693742553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c209d7d792409b07dabad05566654bf1047f949bf8a11f25025f639825d32da
3
  size 2693742553
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9c8b2494cce6ffb8f8fa2a74583f549bfdaa9d5b42d682399d2b1885ce2b565
3
  size 1346893675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac187bef9a34d630faee9ec0cd127f331d3e2fe8b57735f9ec2546c182382abb
3
  size 1346893675
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8e01afc5629600fbeb0ae6d44b6a9d9e243c9b03ab21b6e5cd16e2d2d04f793
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bad853d40d48fdb79c5e32b7c00a4df540c20194e32eb4e912f8065e53140d7
3
  size 13611
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8e01afc5629600fbeb0ae6d44b6a9d9e243c9b03ab21b6e5cd16e2d2d04f793
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bad853d40d48fdb79c5e32b7c00a4df540c20194e32eb4e912f8065e53140d7
3
  size 13611
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8e01afc5629600fbeb0ae6d44b6a9d9e243c9b03ab21b6e5cd16e2d2d04f793
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bad853d40d48fdb79c5e32b7c00a4df540c20194e32eb4e912f8065e53140d7
3
  size 13611
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8e01afc5629600fbeb0ae6d44b6a9d9e243c9b03ab21b6e5cd16e2d2d04f793
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bad853d40d48fdb79c5e32b7c00a4df540c20194e32eb4e912f8065e53140d7
3
  size 13611
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8e01afc5629600fbeb0ae6d44b6a9d9e243c9b03ab21b6e5cd16e2d2d04f793
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bad853d40d48fdb79c5e32b7c00a4df540c20194e32eb4e912f8065e53140d7
3
  size 13611
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8e01afc5629600fbeb0ae6d44b6a9d9e243c9b03ab21b6e5cd16e2d2d04f793
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bad853d40d48fdb79c5e32b7c00a4df540c20194e32eb4e912f8065e53140d7
3
  size 13611
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8e01afc5629600fbeb0ae6d44b6a9d9e243c9b03ab21b6e5cd16e2d2d04f793
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bad853d40d48fdb79c5e32b7c00a4df540c20194e32eb4e912f8065e53140d7
3
  size 13611
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8e01afc5629600fbeb0ae6d44b6a9d9e243c9b03ab21b6e5cd16e2d2d04f793
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bad853d40d48fdb79c5e32b7c00a4df540c20194e32eb4e912f8065e53140d7
3
  size 13611
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78e735efa7e40e0dd22dcac5cb3724b0cbe120563d603ea4b62f22b0f40fc602
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:343c6b635858637988d8e8755a05f1cbe7ddf578dd01d595da23e4248a214be2
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.106896,
5
- "global_step": 450000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2778,11 +2778,319 @@
2778
  "eval_samples_per_second": 273.578,
2779
  "eval_steps_per_second": 4.323,
2780
  "step": 450000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2781
  }
2782
  ],
2783
  "max_steps": 1000000,
2784
  "num_train_epochs": 9223372036854775807,
2785
- "total_flos": 2.684246915457378e+19,
2786
  "trial_name": null,
2787
  "trial_params": null
2788
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.022764,
5
+ "global_step": 500000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2778
  "eval_samples_per_second": 273.578,
2779
  "eval_steps_per_second": 4.323,
2780
  "step": 450000
2781
+ },
2782
+ {
2783
+ "epoch": 3.11,
2784
+ "learning_rate": 6.211392012633932e-05,
2785
+ "loss": 0.5995,
2786
+ "step": 451000
2787
+ },
2788
+ {
2789
+ "epoch": 3.11,
2790
+ "learning_rate": 6.195343341974899e-05,
2791
+ "loss": 0.5591,
2792
+ "step": 452000
2793
+ },
2794
+ {
2795
+ "epoch": 3.11,
2796
+ "learning_rate": 6.179281599232591e-05,
2797
+ "loss": 0.4996,
2798
+ "step": 453000
2799
+ },
2800
+ {
2801
+ "epoch": 3.11,
2802
+ "learning_rate": 6.163206960055651e-05,
2803
+ "loss": 0.6401,
2804
+ "step": 454000
2805
+ },
2806
+ {
2807
+ "epoch": 3.11,
2808
+ "learning_rate": 6.147119600233758e-05,
2809
+ "loss": 0.6407,
2810
+ "step": 455000
2811
+ },
2812
+ {
2813
+ "epoch": 3.11,
2814
+ "learning_rate": 6.131019695695702e-05,
2815
+ "loss": 0.6093,
2816
+ "step": 456000
2817
+ },
2818
+ {
2819
+ "epoch": 3.11,
2820
+ "learning_rate": 6.11490742250746e-05,
2821
+ "loss": 0.5331,
2822
+ "step": 457000
2823
+ },
2824
+ {
2825
+ "epoch": 3.11,
2826
+ "learning_rate": 6.0987829568702656e-05,
2827
+ "loss": 0.5195,
2828
+ "step": 458000
2829
+ },
2830
+ {
2831
+ "epoch": 3.12,
2832
+ "learning_rate": 6.0826464751186994e-05,
2833
+ "loss": 0.5519,
2834
+ "step": 459000
2835
+ },
2836
+ {
2837
+ "epoch": 3.12,
2838
+ "learning_rate": 6.066498153718735e-05,
2839
+ "loss": 0.6198,
2840
+ "step": 460000
2841
+ },
2842
+ {
2843
+ "epoch": 3.12,
2844
+ "learning_rate": 6.05033816926583e-05,
2845
+ "loss": 0.6362,
2846
+ "step": 461000
2847
+ },
2848
+ {
2849
+ "epoch": 3.12,
2850
+ "learning_rate": 6.034166698482984e-05,
2851
+ "loss": 0.6054,
2852
+ "step": 462000
2853
+ },
2854
+ {
2855
+ "epoch": 3.12,
2856
+ "learning_rate": 6.017983918218812e-05,
2857
+ "loss": 0.5557,
2858
+ "step": 463000
2859
+ },
2860
+ {
2861
+ "epoch": 3.12,
2862
+ "learning_rate": 6.001790005445607e-05,
2863
+ "loss": 0.4995,
2864
+ "step": 464000
2865
+ },
2866
+ {
2867
+ "epoch": 3.12,
2868
+ "learning_rate": 5.985585137257401e-05,
2869
+ "loss": 0.6346,
2870
+ "step": 465000
2871
+ },
2872
+ {
2873
+ "epoch": 3.12,
2874
+ "learning_rate": 5.969369490868042e-05,
2875
+ "loss": 0.6212,
2876
+ "step": 466000
2877
+ },
2878
+ {
2879
+ "epoch": 3.12,
2880
+ "learning_rate": 5.953143243609235e-05,
2881
+ "loss": 0.6074,
2882
+ "step": 467000
2883
+ },
2884
+ {
2885
+ "epoch": 3.12,
2886
+ "learning_rate": 5.9369065729286245e-05,
2887
+ "loss": 0.5449,
2888
+ "step": 468000
2889
+ },
2890
+ {
2891
+ "epoch": 3.13,
2892
+ "learning_rate": 5.9206596563878357e-05,
2893
+ "loss": 0.5182,
2894
+ "step": 469000
2895
+ },
2896
+ {
2897
+ "epoch": 3.13,
2898
+ "learning_rate": 5.90440267166055e-05,
2899
+ "loss": 0.5475,
2900
+ "step": 470000
2901
+ },
2902
+ {
2903
+ "epoch": 3.13,
2904
+ "learning_rate": 5.888135796530544e-05,
2905
+ "loss": 0.6102,
2906
+ "step": 471000
2907
+ },
2908
+ {
2909
+ "epoch": 3.13,
2910
+ "learning_rate": 5.871859208889759e-05,
2911
+ "loss": 0.6328,
2912
+ "step": 472000
2913
+ },
2914
+ {
2915
+ "epoch": 3.13,
2916
+ "learning_rate": 5.85557308673635e-05,
2917
+ "loss": 0.6164,
2918
+ "step": 473000
2919
+ },
2920
+ {
2921
+ "epoch": 3.13,
2922
+ "learning_rate": 5.8392776081727385e-05,
2923
+ "loss": 0.5453,
2924
+ "step": 474000
2925
+ },
2926
+ {
2927
+ "epoch": 3.13,
2928
+ "learning_rate": 5.8229729514036705e-05,
2929
+ "loss": 0.5044,
2930
+ "step": 475000
2931
+ },
2932
+ {
2933
+ "epoch": 3.13,
2934
+ "learning_rate": 5.8066592947342555e-05,
2935
+ "loss": 0.6267,
2936
+ "step": 476000
2937
+ },
2938
+ {
2939
+ "epoch": 3.13,
2940
+ "learning_rate": 5.7903368165680327e-05,
2941
+ "loss": 0.6045,
2942
+ "step": 477000
2943
+ },
2944
+ {
2945
+ "epoch": 4.0,
2946
+ "learning_rate": 5.7740056954050084e-05,
2947
+ "loss": 0.5922,
2948
+ "step": 478000
2949
+ },
2950
+ {
2951
+ "epoch": 4.0,
2952
+ "learning_rate": 5.757666109839702e-05,
2953
+ "loss": 0.6466,
2954
+ "step": 479000
2955
+ },
2956
+ {
2957
+ "epoch": 4.0,
2958
+ "learning_rate": 5.74131823855921e-05,
2959
+ "loss": 0.6223,
2960
+ "step": 480000
2961
+ },
2962
+ {
2963
+ "epoch": 4.0,
2964
+ "learning_rate": 5.72496226034123e-05,
2965
+ "loss": 0.5029,
2966
+ "step": 481000
2967
+ },
2968
+ {
2969
+ "epoch": 4.0,
2970
+ "learning_rate": 5.7085983540521216e-05,
2971
+ "loss": 0.4718,
2972
+ "step": 482000
2973
+ },
2974
+ {
2975
+ "epoch": 4.01,
2976
+ "learning_rate": 5.692226698644938e-05,
2977
+ "loss": 0.596,
2978
+ "step": 483000
2979
+ },
2980
+ {
2981
+ "epoch": 4.01,
2982
+ "learning_rate": 5.675847473157485e-05,
2983
+ "loss": 0.6254,
2984
+ "step": 484000
2985
+ },
2986
+ {
2987
+ "epoch": 4.01,
2988
+ "learning_rate": 5.6594608567103456e-05,
2989
+ "loss": 0.657,
2990
+ "step": 485000
2991
+ },
2992
+ {
2993
+ "epoch": 4.01,
2994
+ "learning_rate": 5.6430670285049314e-05,
2995
+ "loss": 0.5453,
2996
+ "step": 486000
2997
+ },
2998
+ {
2999
+ "epoch": 4.01,
3000
+ "learning_rate": 5.6266661678215216e-05,
3001
+ "loss": 0.5089,
3002
+ "step": 487000
3003
+ },
3004
+ {
3005
+ "epoch": 4.01,
3006
+ "learning_rate": 5.6102584540173006e-05,
3007
+ "loss": 0.5764,
3008
+ "step": 488000
3009
+ },
3010
+ {
3011
+ "epoch": 4.01,
3012
+ "learning_rate": 5.5938440665244006e-05,
3013
+ "loss": 0.5935,
3014
+ "step": 489000
3015
+ },
3016
+ {
3017
+ "epoch": 4.01,
3018
+ "learning_rate": 5.577423184847932e-05,
3019
+ "loss": 0.6528,
3020
+ "step": 490000
3021
+ },
3022
+ {
3023
+ "epoch": 4.01,
3024
+ "learning_rate": 5.560995988564023e-05,
3025
+ "loss": 0.6246,
3026
+ "step": 491000
3027
+ },
3028
+ {
3029
+ "epoch": 4.01,
3030
+ "learning_rate": 5.544562657317863e-05,
3031
+ "loss": 0.5041,
3032
+ "step": 492000
3033
+ },
3034
+ {
3035
+ "epoch": 4.02,
3036
+ "learning_rate": 5.52812337082173e-05,
3037
+ "loss": 0.4668,
3038
+ "step": 493000
3039
+ },
3040
+ {
3041
+ "epoch": 4.02,
3042
+ "learning_rate": 5.511678308853026e-05,
3043
+ "loss": 0.5947,
3044
+ "step": 494000
3045
+ },
3046
+ {
3047
+ "epoch": 4.02,
3048
+ "learning_rate": 5.495227651252315e-05,
3049
+ "loss": 0.6195,
3050
+ "step": 495000
3051
+ },
3052
+ {
3053
+ "epoch": 4.02,
3054
+ "learning_rate": 5.478771577921351e-05,
3055
+ "loss": 0.6533,
3056
+ "step": 496000
3057
+ },
3058
+ {
3059
+ "epoch": 4.02,
3060
+ "learning_rate": 5.462310268821118e-05,
3061
+ "loss": 0.548,
3062
+ "step": 497000
3063
+ },
3064
+ {
3065
+ "epoch": 4.02,
3066
+ "learning_rate": 5.445843903969854e-05,
3067
+ "loss": 0.5116,
3068
+ "step": 498000
3069
+ },
3070
+ {
3071
+ "epoch": 4.02,
3072
+ "learning_rate": 5.4293726634410855e-05,
3073
+ "loss": 0.565,
3074
+ "step": 499000
3075
+ },
3076
+ {
3077
+ "epoch": 4.02,
3078
+ "learning_rate": 5.4128967273616625e-05,
3079
+ "loss": 0.5847,
3080
+ "step": 500000
3081
+ },
3082
+ {
3083
+ "epoch": 4.02,
3084
+ "eval_loss": 0.3354858458042145,
3085
+ "eval_runtime": 18.2066,
3086
+ "eval_samples_per_second": 274.625,
3087
+ "eval_steps_per_second": 4.339,
3088
+ "step": 500000
3089
  }
3090
  ],
3091
  "max_steps": 1000000,
3092
  "num_train_epochs": 9223372036854775807,
3093
+ "total_flos": 2.982497152659987e+19,
3094
  "trial_name": null,
3095
  "trial_params": null
3096
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9c8b2494cce6ffb8f8fa2a74583f549bfdaa9d5b42d682399d2b1885ce2b565
3
  size 1346893675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac187bef9a34d630faee9ec0cd127f331d3e2fe8b57735f9ec2546c182382abb
3
  size 1346893675
runs/Jan25_00-38-33_t1v-n-15e54913-w-0/events.out.tfevents.1674607146.t1v-n-15e54913-w-0.589238.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88cd7e4595e4d61202d20a54a751c1b6fcd33f74c1aca86faa4ec2c43066de6a
3
- size 53436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1256053eaffa2aaea4ea73f5aa007f4811fb073d001135e08b955f17f2b23a9c
3
+ size 61712