jflotz commited on
Commit
6343949
1 Parent(s): 6120dcc

Training in progress, step 160000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23bcf588931894b4b3e5c35722962c0de8f54f9d9596c1f09a2a4e5a7a2d9f29
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46bff3dba845cd7e7f19ae9dbbe463bda3e2fa6806365056b512226d84f8fe42
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80d9dd260c6a1eea214a50fb35a68c126c97cfb31dd7f81eeaee885739a9a21c
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fd59d7cfb5049e46570c40bfa98b1ab6951678f21e1e8ee02bf24082eee2565
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:979f2083c322d2f1d58623c944daa2f548e1e3a498ad6f148608a638e1a6db3c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd053fb6d27885aadc27c02a534d4a05640e7e350d6bb076a775c2584889f6ac
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:979f2083c322d2f1d58623c944daa2f548e1e3a498ad6f148608a638e1a6db3c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd053fb6d27885aadc27c02a534d4a05640e7e350d6bb076a775c2584889f6ac
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:979f2083c322d2f1d58623c944daa2f548e1e3a498ad6f148608a638e1a6db3c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd053fb6d27885aadc27c02a534d4a05640e7e350d6bb076a775c2584889f6ac
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:979f2083c322d2f1d58623c944daa2f548e1e3a498ad6f148608a638e1a6db3c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd053fb6d27885aadc27c02a534d4a05640e7e350d6bb076a775c2584889f6ac
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:979f2083c322d2f1d58623c944daa2f548e1e3a498ad6f148608a638e1a6db3c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd053fb6d27885aadc27c02a534d4a05640e7e350d6bb076a775c2584889f6ac
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:979f2083c322d2f1d58623c944daa2f548e1e3a498ad6f148608a638e1a6db3c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd053fb6d27885aadc27c02a534d4a05640e7e350d6bb076a775c2584889f6ac
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:979f2083c322d2f1d58623c944daa2f548e1e3a498ad6f148608a638e1a6db3c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd053fb6d27885aadc27c02a534d4a05640e7e350d6bb076a775c2584889f6ac
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:979f2083c322d2f1d58623c944daa2f548e1e3a498ad6f148608a638e1a6db3c
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd053fb6d27885aadc27c02a534d4a05640e7e350d6bb076a775c2584889f6ac
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:684030441e546f328363202be7e7a1e6d60b5494506eab9e81487ca712343e2e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc071f88617eb6afe60581ffbadbc2441a73aeec527e5556ce742e0646660ab9
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.691648822269808,
5
- "global_step": 150000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3006,11 +3006,211 @@
3006
  "eval_samples_per_second": 1061.094,
3007
  "eval_steps_per_second": 16.63,
3008
  "step": 150000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3009
  }
3010
  ],
3011
  "max_steps": 250000,
3012
  "num_train_epochs": 12,
3013
- "total_flos": 2.40247725109402e+21,
3014
  "trial_name": null,
3015
  "trial_params": null
3016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 7.137758743754461,
5
+ "global_step": 160000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3006
  "eval_samples_per_second": 1061.094,
3007
  "eval_steps_per_second": 16.63,
3008
  "step": 150000
3009
+ },
3010
+ {
3011
+ "epoch": 6.71,
3012
+ "learning_rate": 0.00023069198757093631,
3013
+ "loss": 0.379,
3014
+ "step": 150500
3015
+ },
3016
+ {
3017
+ "epoch": 6.74,
3018
+ "learning_rate": 0.00022880544432159663,
3019
+ "loss": 0.3833,
3020
+ "step": 151000
3021
+ },
3022
+ {
3023
+ "epoch": 6.74,
3024
+ "eval_loss": 0.3517898917198181,
3025
+ "eval_runtime": 2.2,
3026
+ "eval_samples_per_second": 1044.076,
3027
+ "eval_steps_per_second": 16.363,
3028
+ "step": 151000
3029
+ },
3030
+ {
3031
+ "epoch": 6.76,
3032
+ "learning_rate": 0.00022692223406895848,
3033
+ "loss": 0.3782,
3034
+ "step": 151500
3035
+ },
3036
+ {
3037
+ "epoch": 6.78,
3038
+ "learning_rate": 0.000225042439190739,
3039
+ "loss": 0.3805,
3040
+ "step": 152000
3041
+ },
3042
+ {
3043
+ "epoch": 6.78,
3044
+ "eval_loss": 0.35134270787239075,
3045
+ "eval_runtime": 2.2005,
3046
+ "eval_samples_per_second": 1043.87,
3047
+ "eval_steps_per_second": 16.36,
3048
+ "step": 152000
3049
+ },
3050
+ {
3051
+ "epoch": 6.8,
3052
+ "learning_rate": 0.00022316614191525587,
3053
+ "loss": 0.3788,
3054
+ "step": 152500
3055
+ },
3056
+ {
3057
+ "epoch": 6.83,
3058
+ "learning_rate": 0.00022129342431783026,
3059
+ "loss": 0.3785,
3060
+ "step": 153000
3061
+ },
3062
+ {
3063
+ "epoch": 6.83,
3064
+ "eval_loss": 0.35215121507644653,
3065
+ "eval_runtime": 2.2421,
3066
+ "eval_samples_per_second": 1024.496,
3067
+ "eval_steps_per_second": 16.057,
3068
+ "step": 153000
3069
+ },
3070
+ {
3071
+ "epoch": 6.85,
3072
+ "learning_rate": 0.00021942436831719677,
3073
+ "loss": 0.3776,
3074
+ "step": 153500
3075
+ },
3076
+ {
3077
+ "epoch": 6.87,
3078
+ "learning_rate": 0.00021755905567191967,
3079
+ "loss": 0.3772,
3080
+ "step": 154000
3081
+ },
3082
+ {
3083
+ "epoch": 6.87,
3084
+ "eval_loss": 0.34930697083473206,
3085
+ "eval_runtime": 2.254,
3086
+ "eval_samples_per_second": 1019.092,
3087
+ "eval_steps_per_second": 15.972,
3088
+ "step": 154000
3089
+ },
3090
+ {
3091
+ "epoch": 6.89,
3092
+ "learning_rate": 0.00021569756797681686,
3093
+ "loss": 0.3775,
3094
+ "step": 154500
3095
+ },
3096
+ {
3097
+ "epoch": 6.91,
3098
+ "learning_rate": 0.00021383998665939054,
3099
+ "loss": 0.3772,
3100
+ "step": 155000
3101
+ },
3102
+ {
3103
+ "epoch": 6.91,
3104
+ "eval_loss": 0.35032403469085693,
3105
+ "eval_runtime": 2.2005,
3106
+ "eval_samples_per_second": 1043.877,
3107
+ "eval_steps_per_second": 16.36,
3108
+ "step": 155000
3109
+ },
3110
+ {
3111
+ "epoch": 6.94,
3112
+ "learning_rate": 0.00021198639297626516,
3113
+ "loss": 0.3769,
3114
+ "step": 155500
3115
+ },
3116
+ {
3117
+ "epoch": 6.96,
3118
+ "learning_rate": 0.0002101368680096334,
3119
+ "loss": 0.3771,
3120
+ "step": 156000
3121
+ },
3122
+ {
3123
+ "epoch": 6.96,
3124
+ "eval_loss": 0.35130995512008667,
3125
+ "eval_runtime": 2.2636,
3126
+ "eval_samples_per_second": 1014.77,
3127
+ "eval_steps_per_second": 15.904,
3128
+ "step": 156000
3129
+ },
3130
+ {
3131
+ "epoch": 6.98,
3132
+ "learning_rate": 0.00020829149266370862,
3133
+ "loss": 0.377,
3134
+ "step": 156500
3135
+ },
3136
+ {
3137
+ "epoch": 7.0,
3138
+ "learning_rate": 0.00020645034766118703,
3139
+ "loss": 0.3769,
3140
+ "step": 157000
3141
+ },
3142
+ {
3143
+ "epoch": 7.0,
3144
+ "eval_loss": 0.35048824548721313,
3145
+ "eval_runtime": 2.2749,
3146
+ "eval_samples_per_second": 1009.702,
3147
+ "eval_steps_per_second": 15.825,
3148
+ "step": 157000
3149
+ },
3150
+ {
3151
+ "epoch": 7.03,
3152
+ "learning_rate": 0.00020461351353971526,
3153
+ "loss": 0.3766,
3154
+ "step": 157500
3155
+ },
3156
+ {
3157
+ "epoch": 7.05,
3158
+ "learning_rate": 0.00020278107064836847,
3159
+ "loss": 0.3766,
3160
+ "step": 158000
3161
+ },
3162
+ {
3163
+ "epoch": 7.05,
3164
+ "eval_loss": 0.3498741090297699,
3165
+ "eval_runtime": 2.2625,
3166
+ "eval_samples_per_second": 1015.239,
3167
+ "eval_steps_per_second": 15.911,
3168
+ "step": 158000
3169
+ },
3170
+ {
3171
+ "epoch": 7.07,
3172
+ "learning_rate": 0.00020095309914413485,
3173
+ "loss": 0.3764,
3174
+ "step": 158500
3175
+ },
3176
+ {
3177
+ "epoch": 7.09,
3178
+ "learning_rate": 0.00019912967898840997,
3179
+ "loss": 0.3762,
3180
+ "step": 159000
3181
+ },
3182
+ {
3183
+ "epoch": 7.09,
3184
+ "eval_loss": 0.34895479679107666,
3185
+ "eval_runtime": 2.1885,
3186
+ "eval_samples_per_second": 1049.579,
3187
+ "eval_steps_per_second": 16.45,
3188
+ "step": 159000
3189
+ },
3190
+ {
3191
+ "epoch": 7.12,
3192
+ "learning_rate": 0.00019731088994349834,
3193
+ "loss": 0.3763,
3194
+ "step": 159500
3195
+ },
3196
+ {
3197
+ "epoch": 7.14,
3198
+ "learning_rate": 0.0001954968115691248,
3199
+ "loss": 0.376,
3200
+ "step": 160000
3201
+ },
3202
+ {
3203
+ "epoch": 7.14,
3204
+ "eval_loss": 0.3464561402797699,
3205
+ "eval_runtime": 2.2955,
3206
+ "eval_samples_per_second": 1000.637,
3207
+ "eval_steps_per_second": 15.683,
3208
+ "step": 160000
3209
  }
3210
  ],
3211
  "max_steps": 250000,
3212
  "num_train_epochs": 12,
3213
+ "total_flos": 2.5626381219776503e+21,
3214
  "trial_name": null,
3215
  "trial_params": null
3216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80d9dd260c6a1eea214a50fb35a68c126c97cfb31dd7f81eeaee885739a9a21c
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fd59d7cfb5049e46570c40bfa98b1ab6951678f21e1e8ee02bf24082eee2565
3
  size 25761253