Training in progress, step 290000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f66cb466fc715e72c4ec81e60ba44dcf7a28e75138eb49dc636ee2010b29cf2d
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5cd5f151c22a36aa02e644aa26cc06f8e87218fd6115b2ca7b3a1e141911eae
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47154c3a981c14b1d0cedd6ba126beed80c62fc2e1c96712050a23feb40f3503
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47154c3a981c14b1d0cedd6ba126beed80c62fc2e1c96712050a23feb40f3503
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47154c3a981c14b1d0cedd6ba126beed80c62fc2e1c96712050a23feb40f3503
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47154c3a981c14b1d0cedd6ba126beed80c62fc2e1c96712050a23feb40f3503
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47154c3a981c14b1d0cedd6ba126beed80c62fc2e1c96712050a23feb40f3503
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47154c3a981c14b1d0cedd6ba126beed80c62fc2e1c96712050a23feb40f3503
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47154c3a981c14b1d0cedd6ba126beed80c62fc2e1c96712050a23feb40f3503
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47154c3a981c14b1d0cedd6ba126beed80c62fc2e1c96712050a23feb40f3503
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39bc196d8aea9810b9698ff8cd04e2aeef8774f706fbd61ae0f0055bbacd0eaf
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 7.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -5606,11 +5606,211 @@
|
|
5606 |
"eval_samples_per_second": 788.963,
|
5607 |
"eval_steps_per_second": 12.623,
|
5608 |
"step": 280000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5609 |
}
|
5610 |
],
|
5611 |
"max_steps": 500000,
|
5612 |
"num_train_epochs": 13,
|
5613 |
-
"total_flos":
|
5614 |
"trial_name": null,
|
5615 |
"trial_params": null
|
5616 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 7.390794637851062,
|
5 |
+
"global_step": 290000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
5606 |
"eval_samples_per_second": 788.963,
|
5607 |
"eval_steps_per_second": 12.623,
|
5608 |
"step": 280000
|
5609 |
+
},
|
5610 |
+
{
|
5611 |
+
"epoch": 7.15,
|
5612 |
+
"learning_rate": 0.00013777852207079235,
|
5613 |
+
"loss": 0.2848,
|
5614 |
+
"step": 280500
|
5615 |
+
},
|
5616 |
+
{
|
5617 |
+
"epoch": 7.16,
|
5618 |
+
"learning_rate": 0.00013730250483349825,
|
5619 |
+
"loss": 0.2836,
|
5620 |
+
"step": 281000
|
5621 |
+
},
|
5622 |
+
{
|
5623 |
+
"epoch": 7.16,
|
5624 |
+
"eval_loss": 0.8174564242362976,
|
5625 |
+
"eval_runtime": 1.3231,
|
5626 |
+
"eval_samples_per_second": 755.81,
|
5627 |
+
"eval_steps_per_second": 12.093,
|
5628 |
+
"step": 281000
|
5629 |
+
},
|
5630 |
+
{
|
5631 |
+
"epoch": 7.17,
|
5632 |
+
"learning_rate": 0.00013682668113317584,
|
5633 |
+
"loss": 0.2841,
|
5634 |
+
"step": 281500
|
5635 |
+
},
|
5636 |
+
{
|
5637 |
+
"epoch": 7.19,
|
5638 |
+
"learning_rate": 0.00013635105617335703,
|
5639 |
+
"loss": 0.2839,
|
5640 |
+
"step": 282000
|
5641 |
+
},
|
5642 |
+
{
|
5643 |
+
"epoch": 7.19,
|
5644 |
+
"eval_loss": 0.8130354881286621,
|
5645 |
+
"eval_runtime": 1.2454,
|
5646 |
+
"eval_samples_per_second": 802.941,
|
5647 |
+
"eval_steps_per_second": 12.847,
|
5648 |
+
"step": 282000
|
5649 |
+
},
|
5650 |
+
{
|
5651 |
+
"epoch": 7.2,
|
5652 |
+
"learning_rate": 0.00013587563515539996,
|
5653 |
+
"loss": 0.284,
|
5654 |
+
"step": 282500
|
5655 |
+
},
|
5656 |
+
{
|
5657 |
+
"epoch": 7.21,
|
5658 |
+
"learning_rate": 0.00013540042327843296,
|
5659 |
+
"loss": 0.284,
|
5660 |
+
"step": 283000
|
5661 |
+
},
|
5662 |
+
{
|
5663 |
+
"epoch": 7.21,
|
5664 |
+
"eval_loss": 0.8058344721794128,
|
5665 |
+
"eval_runtime": 1.2695,
|
5666 |
+
"eval_samples_per_second": 787.692,
|
5667 |
+
"eval_steps_per_second": 12.603,
|
5668 |
+
"step": 283000
|
5669 |
+
},
|
5670 |
+
{
|
5671 |
+
"epoch": 7.23,
|
5672 |
+
"learning_rate": 0.00013492542573929678,
|
5673 |
+
"loss": 0.284,
|
5674 |
+
"step": 283500
|
5675 |
+
},
|
5676 |
+
{
|
5677 |
+
"epoch": 7.24,
|
5678 |
+
"learning_rate": 0.00013445064773248846,
|
5679 |
+
"loss": 0.2839,
|
5680 |
+
"step": 284000
|
5681 |
+
},
|
5682 |
+
{
|
5683 |
+
"epoch": 7.24,
|
5684 |
+
"eval_loss": 0.8160645961761475,
|
5685 |
+
"eval_runtime": 1.2481,
|
5686 |
+
"eval_samples_per_second": 801.208,
|
5687 |
+
"eval_steps_per_second": 12.819,
|
5688 |
+
"step": 284000
|
5689 |
+
},
|
5690 |
+
{
|
5691 |
+
"epoch": 7.25,
|
5692 |
+
"learning_rate": 0.00013397609445010432,
|
5693 |
+
"loss": 0.2843,
|
5694 |
+
"step": 284500
|
5695 |
+
},
|
5696 |
+
{
|
5697 |
+
"epoch": 7.26,
|
5698 |
+
"learning_rate": 0.00013350177108178288,
|
5699 |
+
"loss": 0.2842,
|
5700 |
+
"step": 285000
|
5701 |
+
},
|
5702 |
+
{
|
5703 |
+
"epoch": 7.26,
|
5704 |
+
"eval_loss": 0.8231968879699707,
|
5705 |
+
"eval_runtime": 1.3325,
|
5706 |
+
"eval_samples_per_second": 750.455,
|
5707 |
+
"eval_steps_per_second": 12.007,
|
5708 |
+
"step": 285000
|
5709 |
+
},
|
5710 |
+
{
|
5711 |
+
"epoch": 7.28,
|
5712 |
+
"learning_rate": 0.00013302768281464863,
|
5713 |
+
"loss": 0.2839,
|
5714 |
+
"step": 285500
|
5715 |
+
},
|
5716 |
+
{
|
5717 |
+
"epoch": 7.29,
|
5718 |
+
"learning_rate": 0.0001325538348332548,
|
5719 |
+
"loss": 0.2835,
|
5720 |
+
"step": 286000
|
5721 |
+
},
|
5722 |
+
{
|
5723 |
+
"epoch": 7.29,
|
5724 |
+
"eval_loss": 0.8185608386993408,
|
5725 |
+
"eval_runtime": 1.2436,
|
5726 |
+
"eval_samples_per_second": 804.112,
|
5727 |
+
"eval_steps_per_second": 12.866,
|
5728 |
+
"step": 286000
|
5729 |
+
},
|
5730 |
+
{
|
5731 |
+
"epoch": 7.3,
|
5732 |
+
"learning_rate": 0.00013208023231952706,
|
5733 |
+
"loss": 0.2839,
|
5734 |
+
"step": 286500
|
5735 |
+
},
|
5736 |
+
{
|
5737 |
+
"epoch": 7.31,
|
5738 |
+
"learning_rate": 0.0001316068804527066,
|
5739 |
+
"loss": 0.2837,
|
5740 |
+
"step": 287000
|
5741 |
+
},
|
5742 |
+
{
|
5743 |
+
"epoch": 7.31,
|
5744 |
+
"eval_loss": 0.8180215954780579,
|
5745 |
+
"eval_runtime": 1.2676,
|
5746 |
+
"eval_samples_per_second": 788.901,
|
5747 |
+
"eval_steps_per_second": 12.622,
|
5748 |
+
"step": 287000
|
5749 |
+
},
|
5750 |
+
{
|
5751 |
+
"epoch": 7.33,
|
5752 |
+
"learning_rate": 0.00013113378440929353,
|
5753 |
+
"loss": 0.2841,
|
5754 |
+
"step": 287500
|
5755 |
+
},
|
5756 |
+
{
|
5757 |
+
"epoch": 7.34,
|
5758 |
+
"learning_rate": 0.00013066094936299056,
|
5759 |
+
"loss": 0.2835,
|
5760 |
+
"step": 288000
|
5761 |
+
},
|
5762 |
+
{
|
5763 |
+
"epoch": 7.34,
|
5764 |
+
"eval_loss": 0.8164823651313782,
|
5765 |
+
"eval_runtime": 1.3849,
|
5766 |
+
"eval_samples_per_second": 722.072,
|
5767 |
+
"eval_steps_per_second": 11.553,
|
5768 |
+
"step": 288000
|
5769 |
+
},
|
5770 |
+
{
|
5771 |
+
"epoch": 7.35,
|
5772 |
+
"learning_rate": 0.00013018838048464582,
|
5773 |
+
"loss": 0.2834,
|
5774 |
+
"step": 288500
|
5775 |
+
},
|
5776 |
+
{
|
5777 |
+
"epoch": 7.37,
|
5778 |
+
"learning_rate": 0.00012971608294219702,
|
5779 |
+
"loss": 0.2835,
|
5780 |
+
"step": 289000
|
5781 |
+
},
|
5782 |
+
{
|
5783 |
+
"epoch": 7.37,
|
5784 |
+
"eval_loss": 0.8121501207351685,
|
5785 |
+
"eval_runtime": 1.2558,
|
5786 |
+
"eval_samples_per_second": 796.286,
|
5787 |
+
"eval_steps_per_second": 12.741,
|
5788 |
+
"step": 289000
|
5789 |
+
},
|
5790 |
+
{
|
5791 |
+
"epoch": 7.38,
|
5792 |
+
"learning_rate": 0.00012924406190061423,
|
5793 |
+
"loss": 0.2835,
|
5794 |
+
"step": 289500
|
5795 |
+
},
|
5796 |
+
{
|
5797 |
+
"epoch": 7.39,
|
5798 |
+
"learning_rate": 0.0001287723225218441,
|
5799 |
+
"loss": 0.2832,
|
5800 |
+
"step": 290000
|
5801 |
+
},
|
5802 |
+
{
|
5803 |
+
"epoch": 7.39,
|
5804 |
+
"eval_loss": 0.8192352056503296,
|
5805 |
+
"eval_runtime": 1.3002,
|
5806 |
+
"eval_samples_per_second": 769.098,
|
5807 |
+
"eval_steps_per_second": 12.306,
|
5808 |
+
"step": 290000
|
5809 |
}
|
5810 |
],
|
5811 |
"max_steps": 500000,
|
5812 |
"num_train_epochs": 13,
|
5813 |
+
"total_flos": 9.265032672069185e+21,
|
5814 |
"trial_name": null,
|
5815 |
"trial_params": null
|
5816 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5cd5f151c22a36aa02e644aa26cc06f8e87218fd6115b2ca7b3a1e141911eae
|
3 |
size 102501541
|