MarkelFe commited on
Commit
e2ad0aa
1 Parent(s): 69d7590

Training in progress, step 340000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3af69497cd7909d18e142fdb28af685d5fd4b6d1152a10069e6f9547b752a3d5
3
  size 995605445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40fe0403207cad2dc3e39d3c817503e5448a77df290f57cccfb2e64288c41acd
3
  size 995605445
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:926d62aa8c0e8eb933d167c471771b77e84d87c1ca180d71a4153e846dcaa5bc
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e08a500181a834c33953e56c8723ea880cc02afa481dcf62ae8634373aca3933
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dac0a7c6aeb1853f8f712f2c9f2553cbe98523717c3c148ee76debf888894532
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a4e01eb4a07664003103c8d71136270f0f89ab5c55108a56ff916a5b2e06d40
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.813239890141112,
5
- "global_step": 330000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4236,11 +4236,139 @@
4236
  "eval_samples_per_second": 166.406,
4237
  "eval_steps_per_second": 20.805,
4238
  "step": 330000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4239
  }
4240
  ],
4241
  "max_steps": 633540,
4242
  "num_train_epochs": 15,
4243
- "total_flos": 6.971360467392e+16,
4244
  "trial_name": null,
4245
  "trial_params": null
4246
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.050004735296904,
5
+ "global_step": 340000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4236
  "eval_samples_per_second": 166.406,
4237
  "eval_steps_per_second": 20.805,
4238
  "step": 330000
4239
+ },
4240
+ {
4241
+ "epoch": 7.83,
4242
+ "learning_rate": 0.0,
4243
+ "loss": 2.4532,
4244
+ "step": 330500
4245
+ },
4246
+ {
4247
+ "epoch": 7.84,
4248
+ "learning_rate": 0.0,
4249
+ "loss": 2.4236,
4250
+ "step": 331000
4251
+ },
4252
+ {
4253
+ "epoch": 7.85,
4254
+ "learning_rate": 0.0,
4255
+ "loss": 2.4411,
4256
+ "step": 331500
4257
+ },
4258
+ {
4259
+ "epoch": 7.86,
4260
+ "learning_rate": 0.0,
4261
+ "loss": 2.4269,
4262
+ "step": 332000
4263
+ },
4264
+ {
4265
+ "epoch": 7.87,
4266
+ "learning_rate": 0.0,
4267
+ "loss": 2.4323,
4268
+ "step": 332500
4269
+ },
4270
+ {
4271
+ "epoch": 7.88,
4272
+ "learning_rate": 0.0,
4273
+ "loss": 2.4343,
4274
+ "step": 333000
4275
+ },
4276
+ {
4277
+ "epoch": 7.9,
4278
+ "learning_rate": 0.0,
4279
+ "loss": 2.4647,
4280
+ "step": 333500
4281
+ },
4282
+ {
4283
+ "epoch": 7.91,
4284
+ "learning_rate": 0.0,
4285
+ "loss": 2.4331,
4286
+ "step": 334000
4287
+ },
4288
+ {
4289
+ "epoch": 7.92,
4290
+ "learning_rate": 0.0,
4291
+ "loss": 2.439,
4292
+ "step": 334500
4293
+ },
4294
+ {
4295
+ "epoch": 7.93,
4296
+ "learning_rate": 0.0,
4297
+ "loss": 2.4262,
4298
+ "step": 335000
4299
+ },
4300
+ {
4301
+ "epoch": 7.94,
4302
+ "learning_rate": 0.0,
4303
+ "loss": 2.4369,
4304
+ "step": 335500
4305
+ },
4306
+ {
4307
+ "epoch": 7.96,
4308
+ "learning_rate": 0.0,
4309
+ "loss": 2.4286,
4310
+ "step": 336000
4311
+ },
4312
+ {
4313
+ "epoch": 7.97,
4314
+ "learning_rate": 0.0,
4315
+ "loss": 2.4323,
4316
+ "step": 336500
4317
+ },
4318
+ {
4319
+ "epoch": 7.98,
4320
+ "learning_rate": 0.0,
4321
+ "loss": 2.4318,
4322
+ "step": 337000
4323
+ },
4324
+ {
4325
+ "epoch": 7.99,
4326
+ "learning_rate": 0.0,
4327
+ "loss": 2.4301,
4328
+ "step": 337500
4329
+ },
4330
+ {
4331
+ "epoch": 8.0,
4332
+ "learning_rate": 0.0,
4333
+ "loss": 2.436,
4334
+ "step": 338000
4335
+ },
4336
+ {
4337
+ "epoch": 8.01,
4338
+ "learning_rate": 0.0,
4339
+ "loss": 2.4323,
4340
+ "step": 338500
4341
+ },
4342
+ {
4343
+ "epoch": 8.03,
4344
+ "learning_rate": 0.0,
4345
+ "loss": 2.4118,
4346
+ "step": 339000
4347
+ },
4348
+ {
4349
+ "epoch": 8.04,
4350
+ "learning_rate": 0.0,
4351
+ "loss": 2.4001,
4352
+ "step": 339500
4353
+ },
4354
+ {
4355
+ "epoch": 8.05,
4356
+ "learning_rate": 0.0,
4357
+ "loss": 2.4181,
4358
+ "step": 340000
4359
+ },
4360
+ {
4361
+ "epoch": 8.05,
4362
+ "eval_loss": 3.1522228717803955,
4363
+ "eval_runtime": 112.9345,
4364
+ "eval_samples_per_second": 166.22,
4365
+ "eval_steps_per_second": 20.782,
4366
+ "step": 340000
4367
  }
4368
  ],
4369
  "max_steps": 633540,
4370
  "num_train_epochs": 15,
4371
+ "total_flos": 7.1815677126912e+16,
4372
  "trial_name": null,
4373
  "trial_params": null
4374
  }