Training in progress, step 1000000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3934f519240d590552d43746648c081056a7995bf6c44310ab67246f6ef8ad67
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:619263ccd39f733619bbbf55e178f9282f2d9680aa9481a120d8cd9e41fe0f1b
|
3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f12e8f0b7966c04954bff8e89ed067117d335fd21dca824245f60b5603214287
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d806e9f9f09813043b95cbeda18b18cdfb60c100fbde3239bf79ee81c659dc36
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 11.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -19806,11 +19806,211 @@
|
|
19806 |
"eval_samples_per_second": 878.327,
|
19807 |
"eval_steps_per_second": 13.766,
|
19808 |
"step": 990000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19809 |
}
|
19810 |
],
|
19811 |
"max_steps": 1000000,
|
19812 |
"num_train_epochs": 12,
|
19813 |
-
"total_flos":
|
19814 |
"trial_name": null,
|
19815 |
"trial_params": null
|
19816 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 11.152872422292361,
|
5 |
+
"global_step": 1000000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
19806 |
"eval_samples_per_second": 878.327,
|
19807 |
"eval_steps_per_second": 13.766,
|
19808 |
"step": 990000
|
19809 |
+
},
|
19810 |
+
{
|
19811 |
+
"epoch": 11.05,
|
19812 |
+
"learning_rate": 1.003454077439879e-05,
|
19813 |
+
"loss": 0.1795,
|
19814 |
+
"step": 990500
|
19815 |
+
},
|
19816 |
+
{
|
19817 |
+
"epoch": 11.05,
|
19818 |
+
"learning_rate": 1.0031000845556304e-05,
|
19819 |
+
"loss": 0.1792,
|
19820 |
+
"step": 991000
|
19821 |
+
},
|
19822 |
+
{
|
19823 |
+
"epoch": 11.05,
|
19824 |
+
"eval_loss": 0.17132483422756195,
|
19825 |
+
"eval_runtime": 2.6196,
|
19826 |
+
"eval_samples_per_second": 876.851,
|
19827 |
+
"eval_steps_per_second": 13.743,
|
19828 |
+
"step": 991000
|
19829 |
+
},
|
19830 |
+
{
|
19831 |
+
"epoch": 11.06,
|
19832 |
+
"learning_rate": 1.0027652209285743e-05,
|
19833 |
+
"loss": 0.1795,
|
19834 |
+
"step": 991500
|
19835 |
+
},
|
19836 |
+
{
|
19837 |
+
"epoch": 11.06,
|
19838 |
+
"learning_rate": 1.0024494874742152e-05,
|
19839 |
+
"loss": 0.1794,
|
19840 |
+
"step": 992000
|
19841 |
+
},
|
19842 |
+
{
|
19843 |
+
"epoch": 11.06,
|
19844 |
+
"eval_loss": 0.1712769716978073,
|
19845 |
+
"eval_runtime": 2.602,
|
19846 |
+
"eval_samples_per_second": 882.772,
|
19847 |
+
"eval_steps_per_second": 13.835,
|
19848 |
+
"step": 992000
|
19849 |
+
},
|
19850 |
+
{
|
19851 |
+
"epoch": 11.07,
|
19852 |
+
"learning_rate": 1.0021528850557572e-05,
|
19853 |
+
"loss": 0.1793,
|
19854 |
+
"step": 992500
|
19855 |
+
},
|
19856 |
+
{
|
19857 |
+
"epoch": 11.07,
|
19858 |
+
"learning_rate": 1.0018754144840986e-05,
|
19859 |
+
"loss": 0.1794,
|
19860 |
+
"step": 993000
|
19861 |
+
},
|
19862 |
+
{
|
19863 |
+
"epoch": 11.07,
|
19864 |
+
"eval_loss": 0.17019130289554596,
|
19865 |
+
"eval_runtime": 2.6352,
|
19866 |
+
"eval_samples_per_second": 871.66,
|
19867 |
+
"eval_steps_per_second": 13.661,
|
19868 |
+
"step": 993000
|
19869 |
+
},
|
19870 |
+
{
|
19871 |
+
"epoch": 11.08,
|
19872 |
+
"learning_rate": 1.0016170765178345e-05,
|
19873 |
+
"loss": 0.1796,
|
19874 |
+
"step": 993500
|
19875 |
+
},
|
19876 |
+
{
|
19877 |
+
"epoch": 11.09,
|
19878 |
+
"learning_rate": 1.0013778718632507e-05,
|
19879 |
+
"loss": 0.1795,
|
19880 |
+
"step": 994000
|
19881 |
+
},
|
19882 |
+
{
|
19883 |
+
"epoch": 11.09,
|
19884 |
+
"eval_loss": 0.16902120411396027,
|
19885 |
+
"eval_runtime": 2.6744,
|
19886 |
+
"eval_samples_per_second": 858.899,
|
19887 |
+
"eval_steps_per_second": 13.461,
|
19888 |
+
"step": 994000
|
19889 |
+
},
|
19890 |
+
{
|
19891 |
+
"epoch": 11.09,
|
19892 |
+
"learning_rate": 1.0011578011743233e-05,
|
19893 |
+
"loss": 0.1794,
|
19894 |
+
"step": 994500
|
19895 |
+
},
|
19896 |
+
{
|
19897 |
+
"epoch": 11.1,
|
19898 |
+
"learning_rate": 1.000956865052717e-05,
|
19899 |
+
"loss": 0.1795,
|
19900 |
+
"step": 995000
|
19901 |
+
},
|
19902 |
+
{
|
19903 |
+
"epoch": 11.1,
|
19904 |
+
"eval_loss": 0.17112106084823608,
|
19905 |
+
"eval_runtime": 2.6298,
|
19906 |
+
"eval_samples_per_second": 873.44,
|
19907 |
+
"eval_steps_per_second": 13.689,
|
19908 |
+
"step": 995000
|
19909 |
+
},
|
19910 |
+
{
|
19911 |
+
"epoch": 11.1,
|
19912 |
+
"learning_rate": 1.0007750640477843e-05,
|
19913 |
+
"loss": 0.1797,
|
19914 |
+
"step": 995500
|
19915 |
+
},
|
19916 |
+
{
|
19917 |
+
"epoch": 11.11,
|
19918 |
+
"learning_rate": 1.0006123986565623e-05,
|
19919 |
+
"loss": 0.1797,
|
19920 |
+
"step": 996000
|
19921 |
+
},
|
19922 |
+
{
|
19923 |
+
"epoch": 11.11,
|
19924 |
+
"eval_loss": 0.17197231948375702,
|
19925 |
+
"eval_runtime": 2.6674,
|
19926 |
+
"eval_samples_per_second": 861.138,
|
19927 |
+
"eval_steps_per_second": 13.496,
|
19928 |
+
"step": 996000
|
19929 |
+
},
|
19930 |
+
{
|
19931 |
+
"epoch": 11.11,
|
19932 |
+
"learning_rate": 1.0004688693237708e-05,
|
19933 |
+
"loss": 0.179,
|
19934 |
+
"step": 996500
|
19935 |
+
},
|
19936 |
+
{
|
19937 |
+
"epoch": 11.12,
|
19938 |
+
"learning_rate": 1.0003444764418138e-05,
|
19939 |
+
"loss": 0.1795,
|
19940 |
+
"step": 997000
|
19941 |
+
},
|
19942 |
+
{
|
19943 |
+
"epoch": 11.12,
|
19944 |
+
"eval_loss": 0.16935667395591736,
|
19945 |
+
"eval_runtime": 2.6744,
|
19946 |
+
"eval_samples_per_second": 858.882,
|
19947 |
+
"eval_steps_per_second": 13.461,
|
19948 |
+
"step": 997000
|
19949 |
+
},
|
19950 |
+
{
|
19951 |
+
"epoch": 11.12,
|
19952 |
+
"learning_rate": 1.0002392203507781e-05,
|
19953 |
+
"loss": 0.1796,
|
19954 |
+
"step": 997500
|
19955 |
+
},
|
19956 |
+
{
|
19957 |
+
"epoch": 11.13,
|
19958 |
+
"learning_rate": 1.000153101338428e-05,
|
19959 |
+
"loss": 0.1794,
|
19960 |
+
"step": 998000
|
19961 |
+
},
|
19962 |
+
{
|
19963 |
+
"epoch": 11.13,
|
19964 |
+
"eval_loss": 0.16944564878940582,
|
19965 |
+
"eval_runtime": 2.6058,
|
19966 |
+
"eval_samples_per_second": 881.508,
|
19967 |
+
"eval_steps_per_second": 13.816,
|
19968 |
+
"step": 998000
|
19969 |
+
},
|
19970 |
+
{
|
19971 |
+
"epoch": 11.14,
|
19972 |
+
"learning_rate": 1.00008611964021e-05,
|
19973 |
+
"loss": 0.1795,
|
19974 |
+
"step": 998500
|
19975 |
+
},
|
19976 |
+
{
|
19977 |
+
"epoch": 11.14,
|
19978 |
+
"learning_rate": 1.00003827543925e-05,
|
19979 |
+
"loss": 0.1797,
|
19980 |
+
"step": 999000
|
19981 |
+
},
|
19982 |
+
{
|
19983 |
+
"epoch": 11.14,
|
19984 |
+
"eval_loss": 0.1695910096168518,
|
19985 |
+
"eval_runtime": 2.6979,
|
19986 |
+
"eval_samples_per_second": 851.388,
|
19987 |
+
"eval_steps_per_second": 13.343,
|
19988 |
+
"step": 999000
|
19989 |
+
},
|
19990 |
+
{
|
19991 |
+
"epoch": 11.15,
|
19992 |
+
"learning_rate": 1.0000095688663532e-05,
|
19993 |
+
"loss": 0.1796,
|
19994 |
+
"step": 999500
|
19995 |
+
},
|
19996 |
+
{
|
19997 |
+
"epoch": 11.15,
|
19998 |
+
"learning_rate": 1e-05,
|
19999 |
+
"loss": 0.1796,
|
20000 |
+
"step": 1000000
|
20001 |
+
},
|
20002 |
+
{
|
20003 |
+
"epoch": 11.15,
|
20004 |
+
"eval_loss": 0.16828955709934235,
|
20005 |
+
"eval_runtime": 2.6549,
|
20006 |
+
"eval_samples_per_second": 865.189,
|
20007 |
+
"eval_steps_per_second": 13.56,
|
20008 |
+
"step": 1000000
|
20009 |
}
|
20010 |
],
|
20011 |
"max_steps": 1000000,
|
20012 |
"num_train_epochs": 12,
|
20013 |
+
"total_flos": 7.009965862112043e+22,
|
20014 |
"trial_name": null,
|
20015 |
"trial_params": null
|
20016 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:619263ccd39f733619bbbf55e178f9282f2d9680aa9481a120d8cd9e41fe0f1b
|
3 |
size 449471589
|