Training in progress, step 390000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:016da91c7614d6a57ffcc5938a0b68675bc967aee110a7fe186334a10cf0dffd
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da03f29f9d43fcaa0d12888b54defcd8a6a1be294c2c1e7d74429358a9688082
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c4ee1f5ea05fd5b5af4e685b13d34537bf6d8bfafcf82bbf12ab3c078f9c700
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:864647684ab3694f7aa2a258c1806e10c4abf99f67ed5e54443050e485ac9436
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 9.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -7606,11 +7606,211 @@
|
|
7606 |
"eval_samples_per_second": 758.149,
|
7607 |
"eval_steps_per_second": 12.13,
|
7608 |
"step": 380000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7609 |
}
|
7610 |
],
|
7611 |
"max_steps": 500000,
|
7612 |
"num_train_epochs": 13,
|
7613 |
-
"total_flos": 1.
|
7614 |
"trial_name": null,
|
7615 |
"trial_params": null
|
7616 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 9.939344512972118,
|
5 |
+
"global_step": 390000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
7606 |
"eval_samples_per_second": 758.149,
|
7607 |
"eval_steps_per_second": 12.13,
|
7608 |
"step": 380000
|
7609 |
+
},
|
7610 |
+
{
|
7611 |
+
"epoch": 9.7,
|
7612 |
+
"learning_rate": 5.2979369233306834e-05,
|
7613 |
+
"loss": 0.2732,
|
7614 |
+
"step": 380500
|
7615 |
+
},
|
7616 |
+
{
|
7617 |
+
"epoch": 9.71,
|
7618 |
+
"learning_rate": 5.26391878673975e-05,
|
7619 |
+
"loss": 0.2736,
|
7620 |
+
"step": 381000
|
7621 |
+
},
|
7622 |
+
{
|
7623 |
+
"epoch": 9.71,
|
7624 |
+
"eval_loss": 0.8043585419654846,
|
7625 |
+
"eval_runtime": 1.3216,
|
7626 |
+
"eval_samples_per_second": 756.661,
|
7627 |
+
"eval_steps_per_second": 12.107,
|
7628 |
+
"step": 381000
|
7629 |
+
},
|
7630 |
+
{
|
7631 |
+
"epoch": 9.72,
|
7632 |
+
"learning_rate": 5.230012590292987e-05,
|
7633 |
+
"loss": 0.2736,
|
7634 |
+
"step": 381500
|
7635 |
+
},
|
7636 |
+
{
|
7637 |
+
"epoch": 9.74,
|
7638 |
+
"learning_rate": 5.1962187047831517e-05,
|
7639 |
+
"loss": 0.2739,
|
7640 |
+
"step": 382000
|
7641 |
+
},
|
7642 |
+
{
|
7643 |
+
"epoch": 9.74,
|
7644 |
+
"eval_loss": 0.8078375458717346,
|
7645 |
+
"eval_runtime": 1.2548,
|
7646 |
+
"eval_samples_per_second": 796.917,
|
7647 |
+
"eval_steps_per_second": 12.751,
|
7648 |
+
"step": 382000
|
7649 |
+
},
|
7650 |
+
{
|
7651 |
+
"epoch": 9.75,
|
7652 |
+
"learning_rate": 5.162537499774743e-05,
|
7653 |
+
"loss": 0.2736,
|
7654 |
+
"step": 382500
|
7655 |
+
},
|
7656 |
+
{
|
7657 |
+
"epoch": 9.76,
|
7658 |
+
"learning_rate": 5.128969343600032e-05,
|
7659 |
+
"loss": 0.2729,
|
7660 |
+
"step": 383000
|
7661 |
+
},
|
7662 |
+
{
|
7663 |
+
"epoch": 9.76,
|
7664 |
+
"eval_loss": 0.8074722290039062,
|
7665 |
+
"eval_runtime": 1.3011,
|
7666 |
+
"eval_samples_per_second": 768.601,
|
7667 |
+
"eval_steps_per_second": 12.298,
|
7668 |
+
"step": 383000
|
7669 |
+
},
|
7670 |
+
{
|
7671 |
+
"epoch": 9.77,
|
7672 |
+
"learning_rate": 5.09551460335499e-05,
|
7673 |
+
"loss": 0.2732,
|
7674 |
+
"step": 383500
|
7675 |
+
},
|
7676 |
+
{
|
7677 |
+
"epoch": 9.79,
|
7678 |
+
"learning_rate": 5.062173644895296e-05,
|
7679 |
+
"loss": 0.2735,
|
7680 |
+
"step": 384000
|
7681 |
+
},
|
7682 |
+
{
|
7683 |
+
"epoch": 9.79,
|
7684 |
+
"eval_loss": 0.8107377290725708,
|
7685 |
+
"eval_runtime": 1.3439,
|
7686 |
+
"eval_samples_per_second": 744.113,
|
7687 |
+
"eval_steps_per_second": 11.906,
|
7688 |
+
"step": 384000
|
7689 |
+
},
|
7690 |
+
{
|
7691 |
+
"epoch": 9.8,
|
7692 |
+
"learning_rate": 5.0289468328323434e-05,
|
7693 |
+
"loss": 0.2728,
|
7694 |
+
"step": 384500
|
7695 |
+
},
|
7696 |
+
{
|
7697 |
+
"epoch": 9.81,
|
7698 |
+
"learning_rate": 4.995834530529208e-05,
|
7699 |
+
"loss": 0.2729,
|
7700 |
+
"step": 385000
|
7701 |
+
},
|
7702 |
+
{
|
7703 |
+
"epoch": 9.81,
|
7704 |
+
"eval_loss": 0.8120101690292358,
|
7705 |
+
"eval_runtime": 1.2896,
|
7706 |
+
"eval_samples_per_second": 775.434,
|
7707 |
+
"eval_steps_per_second": 12.407,
|
7708 |
+
"step": 385000
|
7709 |
+
},
|
7710 |
+
{
|
7711 |
+
"epoch": 9.82,
|
7712 |
+
"learning_rate": 4.9628371000967394e-05,
|
7713 |
+
"loss": 0.2731,
|
7714 |
+
"step": 385500
|
7715 |
+
},
|
7716 |
+
{
|
7717 |
+
"epoch": 9.84,
|
7718 |
+
"learning_rate": 4.929954902389534e-05,
|
7719 |
+
"loss": 0.2731,
|
7720 |
+
"step": 386000
|
7721 |
+
},
|
7722 |
+
{
|
7723 |
+
"epoch": 9.84,
|
7724 |
+
"eval_loss": 0.8058800101280212,
|
7725 |
+
"eval_runtime": 1.3835,
|
7726 |
+
"eval_samples_per_second": 722.807,
|
7727 |
+
"eval_steps_per_second": 11.565,
|
7728 |
+
"step": 386000
|
7729 |
+
},
|
7730 |
+
{
|
7731 |
+
"epoch": 9.85,
|
7732 |
+
"learning_rate": 4.897188297002046e-05,
|
7733 |
+
"loss": 0.2732,
|
7734 |
+
"step": 386500
|
7735 |
+
},
|
7736 |
+
{
|
7737 |
+
"epoch": 9.86,
|
7738 |
+
"learning_rate": 4.8645376422646226e-05,
|
7739 |
+
"loss": 0.2727,
|
7740 |
+
"step": 387000
|
7741 |
+
},
|
7742 |
+
{
|
7743 |
+
"epoch": 9.86,
|
7744 |
+
"eval_loss": 0.8082349896430969,
|
7745 |
+
"eval_runtime": 1.3707,
|
7746 |
+
"eval_samples_per_second": 729.563,
|
7747 |
+
"eval_steps_per_second": 11.673,
|
7748 |
+
"step": 387000
|
7749 |
+
},
|
7750 |
+
{
|
7751 |
+
"epoch": 9.88,
|
7752 |
+
"learning_rate": 4.832003295239591e-05,
|
7753 |
+
"loss": 0.2728,
|
7754 |
+
"step": 387500
|
7755 |
+
},
|
7756 |
+
{
|
7757 |
+
"epoch": 9.89,
|
7758 |
+
"learning_rate": 4.7995856117173624e-05,
|
7759 |
+
"loss": 0.2726,
|
7760 |
+
"step": 388000
|
7761 |
+
},
|
7762 |
+
{
|
7763 |
+
"epoch": 9.89,
|
7764 |
+
"eval_loss": 0.8089985847473145,
|
7765 |
+
"eval_runtime": 1.3037,
|
7766 |
+
"eval_samples_per_second": 767.033,
|
7767 |
+
"eval_steps_per_second": 12.273,
|
7768 |
+
"step": 388000
|
7769 |
+
},
|
7770 |
+
{
|
7771 |
+
"epoch": 9.9,
|
7772 |
+
"learning_rate": 4.767284946212521e-05,
|
7773 |
+
"loss": 0.2727,
|
7774 |
+
"step": 388500
|
7775 |
+
},
|
7776 |
+
{
|
7777 |
+
"epoch": 9.91,
|
7778 |
+
"learning_rate": 4.735101651959977e-05,
|
7779 |
+
"loss": 0.2727,
|
7780 |
+
"step": 389000
|
7781 |
+
},
|
7782 |
+
{
|
7783 |
+
"epoch": 9.91,
|
7784 |
+
"eval_loss": 0.8019598126411438,
|
7785 |
+
"eval_runtime": 1.3529,
|
7786 |
+
"eval_samples_per_second": 739.159,
|
7787 |
+
"eval_steps_per_second": 11.827,
|
7788 |
+
"step": 389000
|
7789 |
+
},
|
7790 |
+
{
|
7791 |
+
"epoch": 9.93,
|
7792 |
+
"learning_rate": 4.7030360809110754e-05,
|
7793 |
+
"loss": 0.2726,
|
7794 |
+
"step": 389500
|
7795 |
+
},
|
7796 |
+
{
|
7797 |
+
"epoch": 9.94,
|
7798 |
+
"learning_rate": 4.6710885837297726e-05,
|
7799 |
+
"loss": 0.273,
|
7800 |
+
"step": 390000
|
7801 |
+
},
|
7802 |
+
{
|
7803 |
+
"epoch": 9.94,
|
7804 |
+
"eval_loss": 0.8114839792251587,
|
7805 |
+
"eval_runtime": 1.3138,
|
7806 |
+
"eval_samples_per_second": 761.14,
|
7807 |
+
"eval_steps_per_second": 12.178,
|
7808 |
+
"step": 390000
|
7809 |
}
|
7810 |
],
|
7811 |
"max_steps": 500000,
|
7812 |
"num_train_epochs": 13,
|
7813 |
+
"total_flos": 1.2459877101988095e+22,
|
7814 |
"trial_name": null,
|
7815 |
"trial_params": null
|
7816 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da03f29f9d43fcaa0d12888b54defcd8a6a1be294c2c1e7d74429358a9688082
|
3 |
size 102501541
|