MarkelFe commited on
Commit
faeb024
1 Parent(s): 11dd3f4

Training in progress, step 240000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c9d709a3c4448fe60ea415f10ee91a070b24f9162901c412c4e21dcfd54ab7b
3
  size 995605445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6513746a4d26d5fd8e8b1510e4afbe1bf070c07834ed6053beed680b7ca90b2
3
  size 995605445
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbc9c398625e51025e3deb582f2b959048d2b72dfb099c42bbf32395fdf91c5a
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eefbf39ee5b2d619291bd576add06f62ce7d41ddd117d09ee485a01c6448e0e6
3
  size 510398013
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75d2ca6ab96f3202129b4649f78a5046017c32ffaaa3dc647813c032cd6852a6
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35f8148796f898b2b4fcf5c05df1927f2a493f35953ae82aca4269bacc8abba0
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d94ea35adb5eefa4698c691c0c27296b7c47b8b7685eea584b57c3f9813cff98
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff1a502467cfb3dd9e512dddfc344981425abb5014f29f7395635782b358b80b
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.445591438583199,
5
- "global_step": 230000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2956,11 +2956,139 @@
2956
  "eval_samples_per_second": 166.094,
2957
  "eval_steps_per_second": 20.766,
2958
  "step": 230000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2959
  }
2960
  ],
2961
  "max_steps": 633540,
2962
  "num_train_epochs": 15,
2963
- "total_flos": 4.8674597867136e+16,
2964
  "trial_name": null,
2965
  "trial_params": null
2966
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.68235628373899,
5
+ "global_step": 240000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2956
  "eval_samples_per_second": 166.094,
2957
  "eval_steps_per_second": 20.766,
2958
  "step": 230000
2959
+ },
2960
+ {
2961
+ "epoch": 5.46,
2962
+ "learning_rate": 1.0027843545790321e-05,
2963
+ "loss": 2.56,
2964
+ "step": 230500
2965
+ },
2966
+ {
2967
+ "epoch": 5.47,
2968
+ "learning_rate": 9.964706253748776e-06,
2969
+ "loss": 2.5605,
2970
+ "step": 231000
2971
+ },
2972
+ {
2973
+ "epoch": 5.48,
2974
+ "learning_rate": 9.901568961707233e-06,
2975
+ "loss": 2.5527,
2976
+ "step": 231500
2977
+ },
2978
+ {
2979
+ "epoch": 5.49,
2980
+ "learning_rate": 9.838431669665688e-06,
2981
+ "loss": 2.5613,
2982
+ "step": 232000
2983
+ },
2984
+ {
2985
+ "epoch": 5.5,
2986
+ "learning_rate": 9.775294377624145e-06,
2987
+ "loss": 2.5698,
2988
+ "step": 232500
2989
+ },
2990
+ {
2991
+ "epoch": 5.52,
2992
+ "learning_rate": 9.7121570855826e-06,
2993
+ "loss": 2.5681,
2994
+ "step": 233000
2995
+ },
2996
+ {
2997
+ "epoch": 5.53,
2998
+ "learning_rate": 9.649019793541055e-06,
2999
+ "loss": 2.5763,
3000
+ "step": 233500
3001
+ },
3002
+ {
3003
+ "epoch": 5.54,
3004
+ "learning_rate": 9.585882501499512e-06,
3005
+ "loss": 2.5602,
3006
+ "step": 234000
3007
+ },
3008
+ {
3009
+ "epoch": 5.55,
3010
+ "learning_rate": 9.522745209457967e-06,
3011
+ "loss": 2.5559,
3012
+ "step": 234500
3013
+ },
3014
+ {
3015
+ "epoch": 5.56,
3016
+ "learning_rate": 9.459607917416422e-06,
3017
+ "loss": 2.5877,
3018
+ "step": 235000
3019
+ },
3020
+ {
3021
+ "epoch": 5.58,
3022
+ "learning_rate": 9.396470625374878e-06,
3023
+ "loss": 2.5779,
3024
+ "step": 235500
3025
+ },
3026
+ {
3027
+ "epoch": 5.59,
3028
+ "learning_rate": 9.333333333333334e-06,
3029
+ "loss": 2.5723,
3030
+ "step": 236000
3031
+ },
3032
+ {
3033
+ "epoch": 5.6,
3034
+ "learning_rate": 9.27019604129179e-06,
3035
+ "loss": 2.5695,
3036
+ "step": 236500
3037
+ },
3038
+ {
3039
+ "epoch": 5.61,
3040
+ "learning_rate": 9.207058749250245e-06,
3041
+ "loss": 2.576,
3042
+ "step": 237000
3043
+ },
3044
+ {
3045
+ "epoch": 5.62,
3046
+ "learning_rate": 9.143921457208701e-06,
3047
+ "loss": 2.5758,
3048
+ "step": 237500
3049
+ },
3050
+ {
3051
+ "epoch": 5.64,
3052
+ "learning_rate": 9.080784165167156e-06,
3053
+ "loss": 2.5674,
3054
+ "step": 238000
3055
+ },
3056
+ {
3057
+ "epoch": 5.65,
3058
+ "learning_rate": 9.017646873125613e-06,
3059
+ "loss": 2.5822,
3060
+ "step": 238500
3061
+ },
3062
+ {
3063
+ "epoch": 5.66,
3064
+ "learning_rate": 8.954509581084068e-06,
3065
+ "loss": 2.5668,
3066
+ "step": 239000
3067
+ },
3068
+ {
3069
+ "epoch": 5.67,
3070
+ "learning_rate": 8.891372289042523e-06,
3071
+ "loss": 2.5873,
3072
+ "step": 239500
3073
+ },
3074
+ {
3075
+ "epoch": 5.68,
3076
+ "learning_rate": 8.82823499700098e-06,
3077
+ "loss": 2.5795,
3078
+ "step": 240000
3079
+ },
3080
+ {
3081
+ "epoch": 5.68,
3082
+ "eval_loss": 3.1380364894866943,
3083
+ "eval_runtime": 112.9332,
3084
+ "eval_samples_per_second": 166.222,
3085
+ "eval_steps_per_second": 20.782,
3086
+ "step": 240000
3087
  }
3088
  ],
3089
  "max_steps": 633540,
3090
  "num_train_epochs": 15,
3091
+ "total_flos": 5.078657441088e+16,
3092
  "trial_name": null,
3093
  "trial_params": null
3094
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbc9c398625e51025e3deb582f2b959048d2b72dfb099c42bbf32395fdf91c5a
3
  size 510398013
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eefbf39ee5b2d619291bd576add06f62ce7d41ddd117d09ee485a01c6448e0e6
3
  size 510398013