MarkelFe commited on
Commit
93546d2
1 Parent(s): f6b6805

Training in progress, step 420000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c55164556dd25d6a582fef22bc2c651808a168730ec26b5c6008062fd0f7cc5d
3
  size 995605445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8d8ec6028dead3d065f5e19f39c225d85816b62dfc54fdff7b0a67b5e1553ac
3
  size 995605445
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d13f3b70adc2f936ec547de1ec36f77495ca229446b2ed5ee40a8227e58a819b
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:177da09f25973ce374c55c97d3fcb0a54d0e03455809b93e6dc65d55606d23b1
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc51395fb5526d062f2906017676468fbf5f119fecfd463d76e3f9fc8a940a31
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0a0d52267e40d2e8071377ef892e2dec84343c6b0d715ab78d160ed1be31b19
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.707358651387443,
5
- "global_step": 410000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5260,11 +5260,139 @@
5260
  "eval_samples_per_second": 166.314,
5261
  "eval_steps_per_second": 20.794,
5262
  "step": 410000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5263
  }
5264
  ],
5265
  "max_steps": 633540,
5266
  "num_train_epochs": 15,
5267
- "total_flos": 8.6548090457088e+16,
5268
  "trial_name": null,
5269
  "trial_params": null
5270
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.944123496543233,
5
+ "global_step": 420000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
5260
  "eval_samples_per_second": 166.314,
5261
  "eval_steps_per_second": 20.794,
5262
  "step": 410000
5263
+ },
5264
+ {
5265
+ "epoch": 9.72,
5266
+ "learning_rate": 0.0,
5267
+ "loss": 2.4291,
5268
+ "step": 410500
5269
+ },
5270
+ {
5271
+ "epoch": 9.73,
5272
+ "learning_rate": 0.0,
5273
+ "loss": 2.4174,
5274
+ "step": 411000
5275
+ },
5276
+ {
5277
+ "epoch": 9.74,
5278
+ "learning_rate": 0.0,
5279
+ "loss": 2.4336,
5280
+ "step": 411500
5281
+ },
5282
+ {
5283
+ "epoch": 9.75,
5284
+ "learning_rate": 0.0,
5285
+ "loss": 2.4315,
5286
+ "step": 412000
5287
+ },
5288
+ {
5289
+ "epoch": 9.77,
5290
+ "learning_rate": 0.0,
5291
+ "loss": 2.4305,
5292
+ "step": 412500
5293
+ },
5294
+ {
5295
+ "epoch": 9.78,
5296
+ "learning_rate": 0.0,
5297
+ "loss": 2.4303,
5298
+ "step": 413000
5299
+ },
5300
+ {
5301
+ "epoch": 9.79,
5302
+ "learning_rate": 0.0,
5303
+ "loss": 2.4342,
5304
+ "step": 413500
5305
+ },
5306
+ {
5307
+ "epoch": 9.8,
5308
+ "learning_rate": 0.0,
5309
+ "loss": 2.4348,
5310
+ "step": 414000
5311
+ },
5312
+ {
5313
+ "epoch": 9.81,
5314
+ "learning_rate": 0.0,
5315
+ "loss": 2.4026,
5316
+ "step": 414500
5317
+ },
5318
+ {
5319
+ "epoch": 9.83,
5320
+ "learning_rate": 0.0,
5321
+ "loss": 2.4215,
5322
+ "step": 415000
5323
+ },
5324
+ {
5325
+ "epoch": 9.84,
5326
+ "learning_rate": 0.0,
5327
+ "loss": 2.4197,
5328
+ "step": 415500
5329
+ },
5330
+ {
5331
+ "epoch": 9.85,
5332
+ "learning_rate": 0.0,
5333
+ "loss": 2.4167,
5334
+ "step": 416000
5335
+ },
5336
+ {
5337
+ "epoch": 9.86,
5338
+ "learning_rate": 0.0,
5339
+ "loss": 2.4285,
5340
+ "step": 416500
5341
+ },
5342
+ {
5343
+ "epoch": 9.87,
5344
+ "learning_rate": 0.0,
5345
+ "loss": 2.4292,
5346
+ "step": 417000
5347
+ },
5348
+ {
5349
+ "epoch": 9.88,
5350
+ "learning_rate": 0.0,
5351
+ "loss": 2.4294,
5352
+ "step": 417500
5353
+ },
5354
+ {
5355
+ "epoch": 9.9,
5356
+ "learning_rate": 0.0,
5357
+ "loss": 2.418,
5358
+ "step": 418000
5359
+ },
5360
+ {
5361
+ "epoch": 9.91,
5362
+ "learning_rate": 0.0,
5363
+ "loss": 2.437,
5364
+ "step": 418500
5365
+ },
5366
+ {
5367
+ "epoch": 9.92,
5368
+ "learning_rate": 0.0,
5369
+ "loss": 2.4259,
5370
+ "step": 419000
5371
+ },
5372
+ {
5373
+ "epoch": 9.93,
5374
+ "learning_rate": 0.0,
5375
+ "loss": 2.4406,
5376
+ "step": 419500
5377
+ },
5378
+ {
5379
+ "epoch": 9.94,
5380
+ "learning_rate": 0.0,
5381
+ "loss": 2.4385,
5382
+ "step": 420000
5383
+ },
5384
+ {
5385
+ "epoch": 9.94,
5386
+ "eval_loss": 3.1522228717803955,
5387
+ "eval_runtime": 112.8353,
5388
+ "eval_samples_per_second": 166.366,
5389
+ "eval_steps_per_second": 20.8,
5390
+ "step": 420000
5391
  }
5392
  ],
5393
  "max_steps": 633540,
5394
  "num_train_epochs": 15,
5395
+ "total_flos": 8.8658966716416e+16,
5396
  "trial_name": null,
5397
  "trial_params": null
5398
  }