jflotz commited on
Commit
87e5678
1 Parent(s): 43c740a

Training in progress, step 230000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f433a536827dfe71e22cc9ae1b5c51f05bbb1bc2aaf2cf12bf5dc6dc12de9e5
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a86561053adee90a2878214be6885b067bd76d574ba4d67a50de8ddc88d4e78d
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03379cedc2b74fd0a67255e5dc1c2b3e73605ea40a87df2cb44842e9ffa68b98
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bd756189a715f251243356fce3a9e9698eaa1c9541391c5ada1cd1bb927f2ca
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:314d5ecbfbb389ee2a386d01176df549ad45d90c274986b7501f19ad88f8a435
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3ae95e0a0d1a9ec67488894572c036dc35bd82e51ffb0fbd0fcc0a47e5aff06
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:314d5ecbfbb389ee2a386d01176df549ad45d90c274986b7501f19ad88f8a435
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3ae95e0a0d1a9ec67488894572c036dc35bd82e51ffb0fbd0fcc0a47e5aff06
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:314d5ecbfbb389ee2a386d01176df549ad45d90c274986b7501f19ad88f8a435
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3ae95e0a0d1a9ec67488894572c036dc35bd82e51ffb0fbd0fcc0a47e5aff06
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:314d5ecbfbb389ee2a386d01176df549ad45d90c274986b7501f19ad88f8a435
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3ae95e0a0d1a9ec67488894572c036dc35bd82e51ffb0fbd0fcc0a47e5aff06
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:314d5ecbfbb389ee2a386d01176df549ad45d90c274986b7501f19ad88f8a435
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3ae95e0a0d1a9ec67488894572c036dc35bd82e51ffb0fbd0fcc0a47e5aff06
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:314d5ecbfbb389ee2a386d01176df549ad45d90c274986b7501f19ad88f8a435
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3ae95e0a0d1a9ec67488894572c036dc35bd82e51ffb0fbd0fcc0a47e5aff06
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:314d5ecbfbb389ee2a386d01176df549ad45d90c274986b7501f19ad88f8a435
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3ae95e0a0d1a9ec67488894572c036dc35bd82e51ffb0fbd0fcc0a47e5aff06
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:314d5ecbfbb389ee2a386d01176df549ad45d90c274986b7501f19ad88f8a435
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3ae95e0a0d1a9ec67488894572c036dc35bd82e51ffb0fbd0fcc0a47e5aff06
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49c292ab14a993919881f01d5b74688df2db2f4ea7c017c2175d0fea64e57565
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cde8387b01007811ac1b94d9590ed8f2f119f8e7b49d5ce42fb838c1939b856b
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.814418272662383,
5
- "global_step": 220000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4406,11 +4406,211 @@
4406
  "eval_samples_per_second": 1040.301,
4407
  "eval_steps_per_second": 16.304,
4408
  "step": 220000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4409
  }
4410
  ],
4411
  "max_steps": 250000,
4412
  "num_train_epochs": 12,
4413
- "total_flos": 3.5236318752081277e+21,
4414
  "trial_name": null,
4415
  "trial_params": null
4416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.260528194147037,
5
+ "global_step": 230000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4406
  "eval_samples_per_second": 1040.301,
4407
  "eval_steps_per_second": 16.304,
4408
  "step": 220000
4409
+ },
4410
+ {
4411
+ "epoch": 9.84,
4412
+ "learning_rate": 3.2176382151888054e-05,
4413
+ "loss": 0.365,
4414
+ "step": 220500
4415
+ },
4416
+ {
4417
+ "epoch": 9.86,
4418
+ "learning_rate": 3.1440176210975204e-05,
4419
+ "loss": 0.3649,
4420
+ "step": 221000
4421
+ },
4422
+ {
4423
+ "epoch": 9.86,
4424
+ "eval_loss": 0.3377821445465088,
4425
+ "eval_runtime": 2.2679,
4426
+ "eval_samples_per_second": 1012.829,
4427
+ "eval_steps_per_second": 15.874,
4428
+ "step": 221000
4429
+ },
4430
+ {
4431
+ "epoch": 9.88,
4432
+ "learning_rate": 3.071593666296585e-05,
4433
+ "loss": 0.3648,
4434
+ "step": 221500
4435
+ },
4436
+ {
4437
+ "epoch": 9.9,
4438
+ "learning_rate": 3.000369518844396e-05,
4439
+ "loss": 0.3649,
4440
+ "step": 222000
4441
+ },
4442
+ {
4443
+ "epoch": 9.9,
4444
+ "eval_loss": 0.33791235089302063,
4445
+ "eval_runtime": 2.2027,
4446
+ "eval_samples_per_second": 1042.832,
4447
+ "eval_steps_per_second": 16.344,
4448
+ "step": 222000
4449
+ },
4450
+ {
4451
+ "epoch": 9.93,
4452
+ "learning_rate": 2.9303482943159077e-05,
4453
+ "loss": 0.3648,
4454
+ "step": 222500
4455
+ },
4456
+ {
4457
+ "epoch": 9.95,
4458
+ "learning_rate": 2.861533055666306e-05,
4459
+ "loss": 0.3646,
4460
+ "step": 223000
4461
+ },
4462
+ {
4463
+ "epoch": 9.95,
4464
+ "eval_loss": 0.33819380402565,
4465
+ "eval_runtime": 2.1945,
4466
+ "eval_samples_per_second": 1046.691,
4467
+ "eval_steps_per_second": 16.404,
4468
+ "step": 223000
4469
+ },
4470
+ {
4471
+ "epoch": 9.97,
4472
+ "learning_rate": 2.793926813097066e-05,
4473
+ "loss": 0.3645,
4474
+ "step": 223500
4475
+ },
4476
+ {
4477
+ "epoch": 9.99,
4478
+ "learning_rate": 2.7275325239242546e-05,
4479
+ "loss": 0.3647,
4480
+ "step": 224000
4481
+ },
4482
+ {
4483
+ "epoch": 9.99,
4484
+ "eval_loss": 0.33774814009666443,
4485
+ "eval_runtime": 2.2427,
4486
+ "eval_samples_per_second": 1024.194,
4487
+ "eval_steps_per_second": 16.052,
4488
+ "step": 224000
4489
+ },
4490
+ {
4491
+ "epoch": 10.02,
4492
+ "learning_rate": 2.6623530924491626e-05,
4493
+ "loss": 0.3645,
4494
+ "step": 224500
4495
+ },
4496
+ {
4497
+ "epoch": 10.04,
4498
+ "learning_rate": 2.5983913698312782e-05,
4499
+ "loss": 0.3644,
4500
+ "step": 225000
4501
+ },
4502
+ {
4503
+ "epoch": 10.04,
4504
+ "eval_loss": 0.33505749702453613,
4505
+ "eval_runtime": 2.2238,
4506
+ "eval_samples_per_second": 1032.916,
4507
+ "eval_steps_per_second": 16.188,
4508
+ "step": 225000
4509
+ },
4510
+ {
4511
+ "epoch": 10.06,
4512
+ "learning_rate": 2.5356501539635512e-05,
4513
+ "loss": 0.3644,
4514
+ "step": 225500
4515
+ },
4516
+ {
4517
+ "epoch": 10.08,
4518
+ "learning_rate": 2.4741321893500244e-05,
4519
+ "loss": 0.3644,
4520
+ "step": 226000
4521
+ },
4522
+ {
4523
+ "epoch": 10.08,
4524
+ "eval_loss": 0.337401807308197,
4525
+ "eval_runtime": 2.2527,
4526
+ "eval_samples_per_second": 1019.654,
4527
+ "eval_steps_per_second": 15.981,
4528
+ "step": 226000
4529
+ },
4530
+ {
4531
+ "epoch": 10.1,
4532
+ "learning_rate": 2.4138401669857587e-05,
4533
+ "loss": 0.3644,
4534
+ "step": 226500
4535
+ },
4536
+ {
4537
+ "epoch": 10.13,
4538
+ "learning_rate": 2.3547767242391212e-05,
4539
+ "loss": 0.3644,
4540
+ "step": 227000
4541
+ },
4542
+ {
4543
+ "epoch": 10.13,
4544
+ "eval_loss": 0.33791208267211914,
4545
+ "eval_runtime": 2.1965,
4546
+ "eval_samples_per_second": 1045.741,
4547
+ "eval_steps_per_second": 16.389,
4548
+ "step": 227000
4549
+ },
4550
+ {
4551
+ "epoch": 10.15,
4552
+ "learning_rate": 2.2969444447364498e-05,
4553
+ "loss": 0.3642,
4554
+ "step": 227500
4555
+ },
4556
+ {
4557
+ "epoch": 10.17,
4558
+ "learning_rate": 2.240345858248992e-05,
4559
+ "loss": 0.3651,
4560
+ "step": 228000
4561
+ },
4562
+ {
4563
+ "epoch": 10.17,
4564
+ "eval_loss": 0.33648931980133057,
4565
+ "eval_runtime": 2.2235,
4566
+ "eval_samples_per_second": 1033.033,
4567
+ "eval_steps_per_second": 16.19,
4568
+ "step": 228000
4569
+ },
4570
+ {
4571
+ "epoch": 10.19,
4572
+ "learning_rate": 2.184983440582284e-05,
4573
+ "loss": 0.3644,
4574
+ "step": 228500
4575
+ },
4576
+ {
4577
+ "epoch": 10.22,
4578
+ "learning_rate": 2.1308596134678134e-05,
4579
+ "loss": 0.3643,
4580
+ "step": 229000
4581
+ },
4582
+ {
4583
+ "epoch": 10.22,
4584
+ "eval_loss": 0.3360297977924347,
4585
+ "eval_runtime": 2.2193,
4586
+ "eval_samples_per_second": 1035.0,
4587
+ "eval_steps_per_second": 16.221,
4588
+ "step": 229000
4589
+ },
4590
+ {
4591
+ "epoch": 10.24,
4592
+ "learning_rate": 2.0779767444571236e-05,
4593
+ "loss": 0.3643,
4594
+ "step": 229500
4595
+ },
4596
+ {
4597
+ "epoch": 10.26,
4598
+ "learning_rate": 2.0263371468182175e-05,
4599
+ "loss": 0.3642,
4600
+ "step": 230000
4601
+ },
4602
+ {
4603
+ "epoch": 10.26,
4604
+ "eval_loss": 0.33706384897232056,
4605
+ "eval_runtime": 2.2761,
4606
+ "eval_samples_per_second": 1009.19,
4607
+ "eval_steps_per_second": 15.817,
4608
+ "step": 230000
4609
  }
4610
  ],
4611
  "max_steps": 250000,
4612
  "num_train_epochs": 12,
4613
+ "total_flos": 3.683792746091758e+21,
4614
  "trial_name": null,
4615
  "trial_params": null
4616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03379cedc2b74fd0a67255e5dc1c2b3e73605ea40a87df2cb44842e9ffa68b98
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bd756189a715f251243356fce3a9e9698eaa1c9541391c5ada1cd1bb927f2ca
3
  size 25761253