MarkelFe commited on
Commit
a3b0e88
1 Parent(s): e2ad0aa

Training in progress, step 350000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40fe0403207cad2dc3e39d3c817503e5448a77df290f57cccfb2e64288c41acd
3
  size 995605445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c89ba467d9f15e7532272ecaf0ccadb771ab579cd44d7321fc1ad8b066ae1525
3
  size 995605445
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e08a500181a834c33953e56c8723ea880cc02afa481dcf62ae8634373aca3933
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d51593bf5f22eb880085627e2794b144f3f333b26c86dcfc3ce4c512cf0ec47e
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a4e01eb4a07664003103c8d71136270f0f89ab5c55108a56ff916a5b2e06d40
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5e8328bc686039137ad7f1d537dc750264b54ac42c8dd2a22d6dbf651312dee
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.050004735296904,
5
- "global_step": 340000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4364,11 +4364,139 @@
4364
  "eval_samples_per_second": 166.22,
4365
  "eval_steps_per_second": 20.782,
4366
  "step": 340000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4367
  }
4368
  ],
4369
  "max_steps": 633540,
4370
  "num_train_epochs": 15,
4371
- "total_flos": 7.1815677126912e+16,
4372
  "trial_name": null,
4373
  "trial_params": null
4374
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.286769580452695,
5
+ "global_step": 350000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4364
  "eval_samples_per_second": 166.22,
4365
  "eval_steps_per_second": 20.782,
4366
  "step": 340000
4367
+ },
4368
+ {
4369
+ "epoch": 8.06,
4370
+ "learning_rate": 0.0,
4371
+ "loss": 2.4242,
4372
+ "step": 340500
4373
+ },
4374
+ {
4375
+ "epoch": 8.07,
4376
+ "learning_rate": 0.0,
4377
+ "loss": 2.4302,
4378
+ "step": 341000
4379
+ },
4380
+ {
4381
+ "epoch": 8.09,
4382
+ "learning_rate": 0.0,
4383
+ "loss": 2.4193,
4384
+ "step": 341500
4385
+ },
4386
+ {
4387
+ "epoch": 8.1,
4388
+ "learning_rate": 0.0,
4389
+ "loss": 2.4164,
4390
+ "step": 342000
4391
+ },
4392
+ {
4393
+ "epoch": 8.11,
4394
+ "learning_rate": 0.0,
4395
+ "loss": 2.4209,
4396
+ "step": 342500
4397
+ },
4398
+ {
4399
+ "epoch": 8.12,
4400
+ "learning_rate": 0.0,
4401
+ "loss": 2.4416,
4402
+ "step": 343000
4403
+ },
4404
+ {
4405
+ "epoch": 8.13,
4406
+ "learning_rate": 0.0,
4407
+ "loss": 2.4181,
4408
+ "step": 343500
4409
+ },
4410
+ {
4411
+ "epoch": 8.14,
4412
+ "learning_rate": 0.0,
4413
+ "loss": 2.4254,
4414
+ "step": 344000
4415
+ },
4416
+ {
4417
+ "epoch": 8.16,
4418
+ "learning_rate": 0.0,
4419
+ "loss": 2.4127,
4420
+ "step": 344500
4421
+ },
4422
+ {
4423
+ "epoch": 8.17,
4424
+ "learning_rate": 0.0,
4425
+ "loss": 2.4414,
4426
+ "step": 345000
4427
+ },
4428
+ {
4429
+ "epoch": 8.18,
4430
+ "learning_rate": 0.0,
4431
+ "loss": 2.4366,
4432
+ "step": 345500
4433
+ },
4434
+ {
4435
+ "epoch": 8.19,
4436
+ "learning_rate": 0.0,
4437
+ "loss": 2.433,
4438
+ "step": 346000
4439
+ },
4440
+ {
4441
+ "epoch": 8.2,
4442
+ "learning_rate": 0.0,
4443
+ "loss": 2.4206,
4444
+ "step": 346500
4445
+ },
4446
+ {
4447
+ "epoch": 8.22,
4448
+ "learning_rate": 0.0,
4449
+ "loss": 2.4424,
4450
+ "step": 347000
4451
+ },
4452
+ {
4453
+ "epoch": 8.23,
4454
+ "learning_rate": 0.0,
4455
+ "loss": 2.4264,
4456
+ "step": 347500
4457
+ },
4458
+ {
4459
+ "epoch": 8.24,
4460
+ "learning_rate": 0.0,
4461
+ "loss": 2.433,
4462
+ "step": 348000
4463
+ },
4464
+ {
4465
+ "epoch": 8.25,
4466
+ "learning_rate": 0.0,
4467
+ "loss": 2.4261,
4468
+ "step": 348500
4469
+ },
4470
+ {
4471
+ "epoch": 8.26,
4472
+ "learning_rate": 0.0,
4473
+ "loss": 2.4225,
4474
+ "step": 349000
4475
+ },
4476
+ {
4477
+ "epoch": 8.27,
4478
+ "learning_rate": 0.0,
4479
+ "loss": 2.4319,
4480
+ "step": 349500
4481
+ },
4482
+ {
4483
+ "epoch": 8.29,
4484
+ "learning_rate": 0.0,
4485
+ "loss": 2.4333,
4486
+ "step": 350000
4487
+ },
4488
+ {
4489
+ "epoch": 8.29,
4490
+ "eval_loss": 3.1522228717803955,
4491
+ "eval_runtime": 113.0119,
4492
+ "eval_samples_per_second": 166.106,
4493
+ "eval_steps_per_second": 20.768,
4494
+ "step": 350000
4495
  }
4496
  ],
4497
  "max_steps": 633540,
4498
  "num_train_epochs": 15,
4499
+ "total_flos": 7.3929090776832e+16,
4500
  "trial_name": null,
4501
  "trial_params": null
4502
  }