joelniklaus commited on
Commit
10eaa83
1 Parent(s): 5449a45

Training in progress, step 550000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a62bfa8d277ab7ac59a8cc3a5e212fcf4e9bfaae3122967d2a6fc96a8b4c6af
3
  size 1475917081
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:664e8bca0dbf713a2564be91841c5c4723d07b2319c3beb8290d8ccc3addabe7
3
  size 1475917081
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fbc9a1b6ca9b1946a07e482e5b1ab192e6e4c3ccbddb52ac6d681ffe0482303
3
  size 737971755
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9c8933e89b36b6243a69421363ed0e3f7593acb8f0d9962f799658502da2cb7
3
  size 737971755
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48341d6b6e9b29741925f8b920a5a2e5299c8897009a4bb183581d01cbe522c8
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2247858ed0cbf9c0f40d07f640b745978b29094f2bfb6dae02f9ade590bc8e74
3
  size 13611
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48341d6b6e9b29741925f8b920a5a2e5299c8897009a4bb183581d01cbe522c8
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2247858ed0cbf9c0f40d07f640b745978b29094f2bfb6dae02f9ade590bc8e74
3
  size 13611
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48341d6b6e9b29741925f8b920a5a2e5299c8897009a4bb183581d01cbe522c8
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2247858ed0cbf9c0f40d07f640b745978b29094f2bfb6dae02f9ade590bc8e74
3
  size 13611
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48341d6b6e9b29741925f8b920a5a2e5299c8897009a4bb183581d01cbe522c8
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2247858ed0cbf9c0f40d07f640b745978b29094f2bfb6dae02f9ade590bc8e74
3
  size 13611
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48341d6b6e9b29741925f8b920a5a2e5299c8897009a4bb183581d01cbe522c8
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2247858ed0cbf9c0f40d07f640b745978b29094f2bfb6dae02f9ade590bc8e74
3
  size 13611
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48341d6b6e9b29741925f8b920a5a2e5299c8897009a4bb183581d01cbe522c8
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2247858ed0cbf9c0f40d07f640b745978b29094f2bfb6dae02f9ade590bc8e74
3
  size 13611
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48341d6b6e9b29741925f8b920a5a2e5299c8897009a4bb183581d01cbe522c8
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2247858ed0cbf9c0f40d07f640b745978b29094f2bfb6dae02f9ade590bc8e74
3
  size 13611
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48341d6b6e9b29741925f8b920a5a2e5299c8897009a4bb183581d01cbe522c8
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2247858ed0cbf9c0f40d07f640b745978b29094f2bfb6dae02f9ade590bc8e74
3
  size 13611
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:343c6b635858637988d8e8755a05f1cbe7ddf578dd01d595da23e4248a214be2
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b390312ddc1614538c7fd82ca2c4639dfed127a83cb04c40dedde6f67b4e460
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5,
5
- "global_step": 500000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3086,11 +3086,319 @@
3086
  "eval_samples_per_second": 315.647,
3087
  "eval_steps_per_second": 2.525,
3088
  "step": 500000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3089
  }
3090
  ],
3091
  "max_steps": 1000000,
3092
  "num_train_epochs": 9223372036854775807,
3093
- "total_flos": 8.432136486912e+18,
3094
  "trial_name": null,
3095
  "trial_params": null
3096
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.55,
5
+ "global_step": 550000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3086
  "eval_samples_per_second": 315.647,
3087
  "eval_steps_per_second": 2.525,
3088
  "step": 500000
3089
+ },
3090
+ {
3091
+ "epoch": 0.5,
3092
+ "learning_rate": 5.396416275909779e-05,
3093
+ "loss": 0.8201,
3094
+ "step": 501000
3095
+ },
3096
+ {
3097
+ "epoch": 0.5,
3098
+ "learning_rate": 5.379931489313016e-05,
3099
+ "loss": 0.8238,
3100
+ "step": 502000
3101
+ },
3102
+ {
3103
+ "epoch": 0.5,
3104
+ "learning_rate": 5.363442547846356e-05,
3105
+ "loss": 0.8011,
3106
+ "step": 503000
3107
+ },
3108
+ {
3109
+ "epoch": 0.5,
3110
+ "learning_rate": 5.3469496318302204e-05,
3111
+ "loss": 0.7981,
3112
+ "step": 504000
3113
+ },
3114
+ {
3115
+ "epoch": 0.51,
3116
+ "learning_rate": 5.330452921628497e-05,
3117
+ "loss": 0.8106,
3118
+ "step": 505000
3119
+ },
3120
+ {
3121
+ "epoch": 0.51,
3122
+ "learning_rate": 5.313952597646568e-05,
3123
+ "loss": 0.7583,
3124
+ "step": 506000
3125
+ },
3126
+ {
3127
+ "epoch": 0.51,
3128
+ "learning_rate": 5.297448840329329e-05,
3129
+ "loss": 0.7629,
3130
+ "step": 507000
3131
+ },
3132
+ {
3133
+ "epoch": 0.51,
3134
+ "learning_rate": 5.280941830159227e-05,
3135
+ "loss": 0.7515,
3136
+ "step": 508000
3137
+ },
3138
+ {
3139
+ "epoch": 0.51,
3140
+ "learning_rate": 5.264431747654284e-05,
3141
+ "loss": 0.7617,
3142
+ "step": 509000
3143
+ },
3144
+ {
3145
+ "epoch": 0.51,
3146
+ "learning_rate": 5.247918773366112e-05,
3147
+ "loss": 0.7944,
3148
+ "step": 510000
3149
+ },
3150
+ {
3151
+ "epoch": 0.51,
3152
+ "learning_rate": 5.231403087877955e-05,
3153
+ "loss": 0.8162,
3154
+ "step": 511000
3155
+ },
3156
+ {
3157
+ "epoch": 0.51,
3158
+ "learning_rate": 5.214884871802703e-05,
3159
+ "loss": 0.749,
3160
+ "step": 512000
3161
+ },
3162
+ {
3163
+ "epoch": 0.51,
3164
+ "learning_rate": 5.198364305780922e-05,
3165
+ "loss": 0.751,
3166
+ "step": 513000
3167
+ },
3168
+ {
3169
+ "epoch": 0.51,
3170
+ "learning_rate": 5.1818415704788725e-05,
3171
+ "loss": 0.8097,
3172
+ "step": 514000
3173
+ },
3174
+ {
3175
+ "epoch": 0.52,
3176
+ "learning_rate": 5.165316846586541e-05,
3177
+ "loss": 0.8309,
3178
+ "step": 515000
3179
+ },
3180
+ {
3181
+ "epoch": 0.52,
3182
+ "learning_rate": 5.148790314815663e-05,
3183
+ "loss": 0.816,
3184
+ "step": 516000
3185
+ },
3186
+ {
3187
+ "epoch": 0.52,
3188
+ "learning_rate": 5.132262155897739e-05,
3189
+ "loss": 0.8112,
3190
+ "step": 517000
3191
+ },
3192
+ {
3193
+ "epoch": 0.52,
3194
+ "learning_rate": 5.1157325505820694e-05,
3195
+ "loss": 0.7831,
3196
+ "step": 518000
3197
+ },
3198
+ {
3199
+ "epoch": 0.52,
3200
+ "learning_rate": 5.0992016796337686e-05,
3201
+ "loss": 0.8395,
3202
+ "step": 519000
3203
+ },
3204
+ {
3205
+ "epoch": 0.52,
3206
+ "learning_rate": 5.0826697238317935e-05,
3207
+ "loss": 0.8362,
3208
+ "step": 520000
3209
+ },
3210
+ {
3211
+ "epoch": 0.52,
3212
+ "learning_rate": 5.066136863966963e-05,
3213
+ "loss": 0.8187,
3214
+ "step": 521000
3215
+ },
3216
+ {
3217
+ "epoch": 0.52,
3218
+ "learning_rate": 5.0496032808399815e-05,
3219
+ "loss": 0.796,
3220
+ "step": 522000
3221
+ },
3222
+ {
3223
+ "epoch": 0.52,
3224
+ "learning_rate": 5.033069155259471e-05,
3225
+ "loss": 0.7194,
3226
+ "step": 523000
3227
+ },
3228
+ {
3229
+ "epoch": 0.52,
3230
+ "learning_rate": 5.016534668039976e-05,
3231
+ "loss": 0.8022,
3232
+ "step": 524000
3233
+ },
3234
+ {
3235
+ "epoch": 0.53,
3236
+ "learning_rate": 5e-05,
3237
+ "loss": 0.8106,
3238
+ "step": 525000
3239
+ },
3240
+ {
3241
+ "epoch": 0.53,
3242
+ "learning_rate": 4.9834653319600246e-05,
3243
+ "loss": 0.8439,
3244
+ "step": 526000
3245
+ },
3246
+ {
3247
+ "epoch": 0.53,
3248
+ "learning_rate": 4.96693084474053e-05,
3249
+ "loss": 0.8022,
3250
+ "step": 527000
3251
+ },
3252
+ {
3253
+ "epoch": 0.53,
3254
+ "learning_rate": 4.950396719160018e-05,
3255
+ "loss": 0.7898,
3256
+ "step": 528000
3257
+ },
3258
+ {
3259
+ "epoch": 0.53,
3260
+ "learning_rate": 4.93386313603304e-05,
3261
+ "loss": 0.83,
3262
+ "step": 529000
3263
+ },
3264
+ {
3265
+ "epoch": 0.53,
3266
+ "learning_rate": 4.917330276168208e-05,
3267
+ "loss": 0.8825,
3268
+ "step": 530000
3269
+ },
3270
+ {
3271
+ "epoch": 0.53,
3272
+ "learning_rate": 4.9007983203662326e-05,
3273
+ "loss": 0.8647,
3274
+ "step": 531000
3275
+ },
3276
+ {
3277
+ "epoch": 0.53,
3278
+ "learning_rate": 4.884267449417931e-05,
3279
+ "loss": 0.8069,
3280
+ "step": 532000
3281
+ },
3282
+ {
3283
+ "epoch": 0.53,
3284
+ "learning_rate": 4.867737844102261e-05,
3285
+ "loss": 0.7821,
3286
+ "step": 533000
3287
+ },
3288
+ {
3289
+ "epoch": 0.53,
3290
+ "learning_rate": 4.851209685184338e-05,
3291
+ "loss": 0.8202,
3292
+ "step": 534000
3293
+ },
3294
+ {
3295
+ "epoch": 0.54,
3296
+ "learning_rate": 4.834683153413459e-05,
3297
+ "loss": 0.847,
3298
+ "step": 535000
3299
+ },
3300
+ {
3301
+ "epoch": 0.54,
3302
+ "learning_rate": 4.818158429521129e-05,
3303
+ "loss": 0.8438,
3304
+ "step": 536000
3305
+ },
3306
+ {
3307
+ "epoch": 0.54,
3308
+ "learning_rate": 4.801635694219079e-05,
3309
+ "loss": 0.8496,
3310
+ "step": 537000
3311
+ },
3312
+ {
3313
+ "epoch": 0.54,
3314
+ "learning_rate": 4.785115128197298e-05,
3315
+ "loss": 0.8661,
3316
+ "step": 538000
3317
+ },
3318
+ {
3319
+ "epoch": 0.54,
3320
+ "learning_rate": 4.7685969121220456e-05,
3321
+ "loss": 0.8841,
3322
+ "step": 539000
3323
+ },
3324
+ {
3325
+ "epoch": 0.54,
3326
+ "learning_rate": 4.7520812266338885e-05,
3327
+ "loss": 0.8682,
3328
+ "step": 540000
3329
+ },
3330
+ {
3331
+ "epoch": 0.54,
3332
+ "learning_rate": 4.735568252345718e-05,
3333
+ "loss": 0.8794,
3334
+ "step": 541000
3335
+ },
3336
+ {
3337
+ "epoch": 0.54,
3338
+ "learning_rate": 4.7190581698407725e-05,
3339
+ "loss": 0.8304,
3340
+ "step": 542000
3341
+ },
3342
+ {
3343
+ "epoch": 0.54,
3344
+ "learning_rate": 4.702551159670672e-05,
3345
+ "loss": 0.7666,
3346
+ "step": 543000
3347
+ },
3348
+ {
3349
+ "epoch": 0.54,
3350
+ "learning_rate": 4.6860474023534335e-05,
3351
+ "loss": 0.7485,
3352
+ "step": 544000
3353
+ },
3354
+ {
3355
+ "epoch": 0.55,
3356
+ "learning_rate": 4.669547078371504e-05,
3357
+ "loss": 0.7481,
3358
+ "step": 545000
3359
+ },
3360
+ {
3361
+ "epoch": 0.55,
3362
+ "learning_rate": 4.65305036816978e-05,
3363
+ "loss": 0.7797,
3364
+ "step": 546000
3365
+ },
3366
+ {
3367
+ "epoch": 0.55,
3368
+ "learning_rate": 4.6365574521536445e-05,
3369
+ "loss": 0.7444,
3370
+ "step": 547000
3371
+ },
3372
+ {
3373
+ "epoch": 0.55,
3374
+ "learning_rate": 4.620068510686985e-05,
3375
+ "loss": 0.7772,
3376
+ "step": 548000
3377
+ },
3378
+ {
3379
+ "epoch": 0.55,
3380
+ "learning_rate": 4.60358372409022e-05,
3381
+ "loss": 0.7954,
3382
+ "step": 549000
3383
+ },
3384
+ {
3385
+ "epoch": 0.55,
3386
+ "learning_rate": 4.5871032726383386e-05,
3387
+ "loss": 0.8096,
3388
+ "step": 550000
3389
+ },
3390
+ {
3391
+ "epoch": 0.55,
3392
+ "eval_loss": 0.6220114231109619,
3393
+ "eval_runtime": 17.5668,
3394
+ "eval_samples_per_second": 284.628,
3395
+ "eval_steps_per_second": 2.277,
3396
+ "step": 550000
3397
  }
3398
  ],
3399
  "max_steps": 1000000,
3400
  "num_train_epochs": 9223372036854775807,
3401
+ "total_flos": 9.2753501356032e+18,
3402
  "trial_name": null,
3403
  "trial_params": null
3404
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fbc9a1b6ca9b1946a07e482e5b1ab192e6e4c3ccbddb52ac6d681ffe0482303
3
  size 737971755
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9c8933e89b36b6243a69421363ed0e3f7593acb8f0d9962f799658502da2cb7
3
  size 737971755
runs/Dec28_00-25-33_t1v-n-07cfb9e3-w-0/events.out.tfevents.1672187175.t1v-n-07cfb9e3-w-0.13817.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f5bd9ddecadba1e373e203ce73a773557de15252d97e7ae002faffd980eaae7
3
- size 86484
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f0060ff0ced6fd9e52bafc8acce0e7d78c67a648a33965e65230ea78a3469ea
3
+ size 94760