plip commited on
Commit
93574ed
1 Parent(s): 49f0230

Training in progress, step 330000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feec96829044f69c5632c270eedd009034bbe0cf717e555992fa10aeea97b864
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54534335b10706006d453ec56a979a5d5c461f57d0f68d1a6ad0a77c57a0aecf
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d15795c67ef8a474ec8269e65cb5cae595586b0dd6a18f0656a533551a730789
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ea75c11d2eaacb0e2bbd235a00be3c1d10c576c2381b03ccce251e6969bea54
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:001dab7e0bd0507b251f8661de959d0c23f7918aaaae6122c9ffe6e68407c81b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0f29d45171c4938599319a84794d73d3d2336609c8cb89948f6f9bcd906a6a
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5673377a057c7734bd1a0ee14d972f6f3bfc67bb8208ac49ae618347d18d616b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:636cb28fce30ad56f68aface20193360fd815697da4c2ec39f5ca647b5e6b45b
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.15535960038738,
5
- "global_step": 320000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -6406,11 +6406,211 @@
6406
  "eval_samples_per_second": 802.848,
6407
  "eval_steps_per_second": 12.846,
6408
  "step": 320000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6409
  }
6410
  ],
6411
  "max_steps": 500000,
6412
  "num_train_epochs": 13,
6413
- "total_flos": 1.022348060947964e+22,
6414
  "trial_name": null,
6415
  "trial_params": null
6416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.410214587899485,
5
+ "global_step": 330000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
6406
  "eval_samples_per_second": 802.848,
6407
  "eval_steps_per_second": 12.846,
6408
  "step": 320000
6409
+ },
6410
+ {
6411
+ "epoch": 8.17,
6412
+ "learning_rate": 0.00010073144655927253,
6413
+ "loss": 0.2809,
6414
+ "step": 320500
6415
+ },
6416
+ {
6417
+ "epoch": 8.18,
6418
+ "learning_rate": 0.0001002870876838929,
6419
+ "loss": 0.28,
6420
+ "step": 321000
6421
+ },
6422
+ {
6423
+ "epoch": 8.18,
6424
+ "eval_loss": 0.8134902119636536,
6425
+ "eval_runtime": 1.3191,
6426
+ "eval_samples_per_second": 758.118,
6427
+ "eval_steps_per_second": 12.13,
6428
+ "step": 321000
6429
+ },
6430
+ {
6431
+ "epoch": 8.19,
6432
+ "learning_rate": 9.984332714015662e-05,
6433
+ "loss": 0.2799,
6434
+ "step": 321500
6435
+ },
6436
+ {
6437
+ "epoch": 8.21,
6438
+ "learning_rate": 9.94001697809578e-05,
6439
+ "loss": 0.2796,
6440
+ "step": 322000
6441
+ },
6442
+ {
6443
+ "epoch": 8.21,
6444
+ "eval_loss": 0.8164393305778503,
6445
+ "eval_runtime": 1.2535,
6446
+ "eval_samples_per_second": 797.791,
6447
+ "eval_steps_per_second": 12.765,
6448
+ "step": 322000
6449
+ },
6450
+ {
6451
+ "epoch": 8.22,
6452
+ "learning_rate": 9.895762045259445e-05,
6453
+ "loss": 0.2797,
6454
+ "step": 322500
6455
+ },
6456
+ {
6457
+ "epoch": 8.23,
6458
+ "learning_rate": 9.851568399471498e-05,
6459
+ "loss": 0.2793,
6460
+ "step": 323000
6461
+ },
6462
+ {
6463
+ "epoch": 8.23,
6464
+ "eval_loss": 0.8119146823883057,
6465
+ "eval_runtime": 1.2514,
6466
+ "eval_samples_per_second": 799.097,
6467
+ "eval_steps_per_second": 12.786,
6468
+ "step": 323000
6469
+ },
6470
+ {
6471
+ "epoch": 8.24,
6472
+ "learning_rate": 9.807436524026574e-05,
6473
+ "loss": 0.2797,
6474
+ "step": 323500
6475
+ },
6476
+ {
6477
+ "epoch": 8.26,
6478
+ "learning_rate": 9.763366901543801e-05,
6479
+ "loss": 0.2791,
6480
+ "step": 324000
6481
+ },
6482
+ {
6483
+ "epoch": 8.26,
6484
+ "eval_loss": 0.8064904808998108,
6485
+ "eval_runtime": 1.2268,
6486
+ "eval_samples_per_second": 815.12,
6487
+ "eval_steps_per_second": 13.042,
6488
+ "step": 324000
6489
+ },
6490
+ {
6491
+ "epoch": 8.27,
6492
+ "learning_rate": 9.719360013961495e-05,
6493
+ "loss": 0.2793,
6494
+ "step": 324500
6495
+ },
6496
+ {
6497
+ "epoch": 8.28,
6498
+ "learning_rate": 9.675416342531944e-05,
6499
+ "loss": 0.2793,
6500
+ "step": 325000
6501
+ },
6502
+ {
6503
+ "epoch": 8.28,
6504
+ "eval_loss": 0.8141771554946899,
6505
+ "eval_runtime": 1.2692,
6506
+ "eval_samples_per_second": 787.921,
6507
+ "eval_steps_per_second": 12.607,
6508
+ "step": 325000
6509
+ },
6510
+ {
6511
+ "epoch": 8.3,
6512
+ "learning_rate": 9.631536367816086e-05,
6513
+ "loss": 0.2798,
6514
+ "step": 325500
6515
+ },
6516
+ {
6517
+ "epoch": 8.31,
6518
+ "learning_rate": 9.587720569678299e-05,
6519
+ "loss": 0.2794,
6520
+ "step": 326000
6521
+ },
6522
+ {
6523
+ "epoch": 8.31,
6524
+ "eval_loss": 0.803835391998291,
6525
+ "eval_runtime": 1.24,
6526
+ "eval_samples_per_second": 806.471,
6527
+ "eval_steps_per_second": 12.904,
6528
+ "step": 326000
6529
+ },
6530
+ {
6531
+ "epoch": 8.32,
6532
+ "learning_rate": 9.543969427281131e-05,
6533
+ "loss": 0.2791,
6534
+ "step": 326500
6535
+ },
6536
+ {
6537
+ "epoch": 8.33,
6538
+ "learning_rate": 9.500283419080062e-05,
6539
+ "loss": 0.2792,
6540
+ "step": 327000
6541
+ },
6542
+ {
6543
+ "epoch": 8.33,
6544
+ "eval_loss": 0.81174635887146,
6545
+ "eval_runtime": 1.301,
6546
+ "eval_samples_per_second": 768.659,
6547
+ "eval_steps_per_second": 12.299,
6548
+ "step": 327000
6549
+ },
6550
+ {
6551
+ "epoch": 8.35,
6552
+ "learning_rate": 9.45666302281829e-05,
6553
+ "loss": 0.2787,
6554
+ "step": 327500
6555
+ },
6556
+ {
6557
+ "epoch": 8.36,
6558
+ "learning_rate": 9.413108715521467e-05,
6559
+ "loss": 0.2789,
6560
+ "step": 328000
6561
+ },
6562
+ {
6563
+ "epoch": 8.36,
6564
+ "eval_loss": 0.8118357062339783,
6565
+ "eval_runtime": 1.2706,
6566
+ "eval_samples_per_second": 787.037,
6567
+ "eval_steps_per_second": 12.593,
6568
+ "step": 328000
6569
+ },
6570
+ {
6571
+ "epoch": 8.37,
6572
+ "learning_rate": 9.369620973492525e-05,
6573
+ "loss": 0.2794,
6574
+ "step": 328500
6575
+ },
6576
+ {
6577
+ "epoch": 8.38,
6578
+ "learning_rate": 9.326200272306445e-05,
6579
+ "loss": 0.2793,
6580
+ "step": 329000
6581
+ },
6582
+ {
6583
+ "epoch": 8.38,
6584
+ "eval_loss": 0.809190034866333,
6585
+ "eval_runtime": 1.2623,
6586
+ "eval_samples_per_second": 792.184,
6587
+ "eval_steps_per_second": 12.675,
6588
+ "step": 329000
6589
+ },
6590
+ {
6591
+ "epoch": 8.4,
6592
+ "learning_rate": 9.282847086805059e-05,
6593
+ "loss": 0.2788,
6594
+ "step": 329500
6595
+ },
6596
+ {
6597
+ "epoch": 8.41,
6598
+ "learning_rate": 9.239561891091853e-05,
6599
+ "loss": 0.279,
6600
+ "step": 330000
6601
+ },
6602
+ {
6603
+ "epoch": 8.41,
6604
+ "eval_loss": 0.8081182837486267,
6605
+ "eval_runtime": 1.2445,
6606
+ "eval_samples_per_second": 803.539,
6607
+ "eval_steps_per_second": 12.857,
6608
+ "step": 330000
6609
  }
6610
  ],
6611
  "max_steps": 500000,
6612
  "num_train_epochs": 13,
6613
+ "total_flos": 1.054296774825414e+22,
6614
  "trial_name": null,
6615
  "trial_params": null
6616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d15795c67ef8a474ec8269e65cb5cae595586b0dd6a18f0656a533551a730789
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ea75c11d2eaacb0e2bbd235a00be3c1d10c576c2381b03ccce251e6969bea54
3
  size 102501541