Upload folder using huggingface_hub
Browse files- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +3 -1403
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1603041146
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:671c007da97093393af79ca4122b30dbe0eacca718f9789ff8971d6c2e56056c
|
3 |
size 1603041146
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1253695538
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:201951c3307c5287d99dba3b404524cc00f37c2c3e376adb491bf561525119f2
|
3 |
size 1253695538
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51a8927aad5b8fa5acfc364555592057ea9824d9a07853b39462940bdb3c167b
|
3 |
size 14244
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:955040d0ab4a98e858e89bc7b9f85ba88a35b45c1ae13d3d89b36942f10d8f5f
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -173615,1406 +173615,6 @@
|
|
173615 |
"learning_rate": 7.797699192912955e-09,
|
173616 |
"loss": 98.4398,
|
173617 |
"step": 248000
|
173618 |
-
},
|
173619 |
-
{
|
173620 |
-
"epoch": 1.0019917823826243,
|
173621 |
-
"grad_norm": 375.5564270019531,
|
173622 |
-
"learning_rate": 7.719922147779723e-09,
|
173623 |
-
"loss": 62.2112,
|
173624 |
-
"step": 248010
|
173625 |
-
},
|
173626 |
-
{
|
173627 |
-
"epoch": 1.002032183647992,
|
173628 |
-
"grad_norm": 783.7318725585938,
|
173629 |
-
"learning_rate": 7.642534862439821e-09,
|
173630 |
-
"loss": 60.1914,
|
173631 |
-
"step": 248020
|
173632 |
-
},
|
173633 |
-
{
|
173634 |
-
"epoch": 1.0020725849133594,
|
173635 |
-
"grad_norm": 666.0960693359375,
|
173636 |
-
"learning_rate": 7.56553733840093e-09,
|
173637 |
-
"loss": 72.5856,
|
173638 |
-
"step": 248030
|
173639 |
-
},
|
173640 |
-
{
|
173641 |
-
"epoch": 1.002112986178727,
|
173642 |
-
"grad_norm": 825.030517578125,
|
173643 |
-
"learning_rate": 7.488929577164072e-09,
|
173644 |
-
"loss": 60.5071,
|
173645 |
-
"step": 248040
|
173646 |
-
},
|
173647 |
-
{
|
173648 |
-
"epoch": 1.0021533874440947,
|
173649 |
-
"grad_norm": 892.0487060546875,
|
173650 |
-
"learning_rate": 7.412711580225829e-09,
|
173651 |
-
"loss": 78.1599,
|
173652 |
-
"step": 248050
|
173653 |
-
},
|
173654 |
-
{
|
173655 |
-
"epoch": 1.0021937887094623,
|
173656 |
-
"grad_norm": 610.88330078125,
|
173657 |
-
"learning_rate": 7.336883349067236e-09,
|
173658 |
-
"loss": 81.9374,
|
173659 |
-
"step": 248060
|
173660 |
-
},
|
173661 |
-
{
|
173662 |
-
"epoch": 1.00223418997483,
|
173663 |
-
"grad_norm": 289.4344787597656,
|
173664 |
-
"learning_rate": 7.261444885169333e-09,
|
173665 |
-
"loss": 56.0395,
|
173666 |
-
"step": 248070
|
173667 |
-
},
|
173668 |
-
{
|
173669 |
-
"epoch": 1.0022745912401976,
|
173670 |
-
"grad_norm": 751.8075561523438,
|
173671 |
-
"learning_rate": 7.1863961900042746e-09,
|
173672 |
-
"loss": 76.6276,
|
173673 |
-
"step": 248080
|
173674 |
-
},
|
173675 |
-
{
|
173676 |
-
"epoch": 1.0023149925055652,
|
173677 |
-
"grad_norm": 472.09954833984375,
|
173678 |
-
"learning_rate": 7.111737265030893e-09,
|
173679 |
-
"loss": 47.1432,
|
173680 |
-
"step": 248090
|
173681 |
-
},
|
173682 |
-
{
|
173683 |
-
"epoch": 1.0023553937709329,
|
173684 |
-
"grad_norm": 536.2998657226562,
|
173685 |
-
"learning_rate": 7.037468111710244e-09,
|
173686 |
-
"loss": 63.2109,
|
173687 |
-
"step": 248100
|
173688 |
-
},
|
173689 |
-
{
|
173690 |
-
"epoch": 1.0023957950363005,
|
173691 |
-
"grad_norm": 521.3856201171875,
|
173692 |
-
"learning_rate": 6.963588731485616e-09,
|
173693 |
-
"loss": 47.3244,
|
173694 |
-
"step": 248110
|
173695 |
-
},
|
173696 |
-
{
|
173697 |
-
"epoch": 1.0024361963016681,
|
173698 |
-
"grad_norm": 557.798583984375,
|
173699 |
-
"learning_rate": 6.890099125798078e-09,
|
173700 |
-
"loss": 50.9857,
|
173701 |
-
"step": 248120
|
173702 |
-
},
|
173703 |
-
{
|
173704 |
-
"epoch": 1.0024765975670358,
|
173705 |
-
"grad_norm": 437.3095703125,
|
173706 |
-
"learning_rate": 6.816999296082039e-09,
|
173707 |
-
"loss": 64.5692,
|
173708 |
-
"step": 248130
|
173709 |
-
},
|
173710 |
-
{
|
173711 |
-
"epoch": 1.0025169988324034,
|
173712 |
-
"grad_norm": 338.9017639160156,
|
173713 |
-
"learning_rate": 6.744289243760804e-09,
|
173714 |
-
"loss": 31.7949,
|
173715 |
-
"step": 248140
|
173716 |
-
},
|
173717 |
-
{
|
173718 |
-
"epoch": 1.002557400097771,
|
173719 |
-
"grad_norm": 621.0401611328125,
|
173720 |
-
"learning_rate": 6.6719689702554605e-09,
|
173721 |
-
"loss": 75.7591,
|
173722 |
-
"step": 248150
|
173723 |
-
},
|
173724 |
-
{
|
173725 |
-
"epoch": 1.0025978013631387,
|
173726 |
-
"grad_norm": 769.9017944335938,
|
173727 |
-
"learning_rate": 6.600038476971548e-09,
|
173728 |
-
"loss": 56.7265,
|
173729 |
-
"step": 248160
|
173730 |
-
},
|
173731 |
-
{
|
173732 |
-
"epoch": 1.0026382026285063,
|
173733 |
-
"grad_norm": 756.1639404296875,
|
173734 |
-
"learning_rate": 6.528497765312392e-09,
|
173735 |
-
"loss": 47.5924,
|
173736 |
-
"step": 248170
|
173737 |
-
},
|
173738 |
-
{
|
173739 |
-
"epoch": 1.002678603893874,
|
173740 |
-
"grad_norm": 583.2656860351562,
|
173741 |
-
"learning_rate": 6.45734683667465e-09,
|
173742 |
-
"loss": 77.5252,
|
173743 |
-
"step": 248180
|
173744 |
-
},
|
173745 |
-
{
|
173746 |
-
"epoch": 1.0027190051592416,
|
173747 |
-
"grad_norm": 800.96337890625,
|
173748 |
-
"learning_rate": 6.38658569244388e-09,
|
173749 |
-
"loss": 75.3352,
|
173750 |
-
"step": 248190
|
173751 |
-
},
|
173752 |
-
{
|
173753 |
-
"epoch": 1.0027594064246093,
|
173754 |
-
"grad_norm": 373.50006103515625,
|
173755 |
-
"learning_rate": 6.316214334001203e-09,
|
173756 |
-
"loss": 42.8643,
|
173757 |
-
"step": 248200
|
173758 |
-
},
|
173759 |
-
{
|
173760 |
-
"epoch": 1.002799807689977,
|
173761 |
-
"grad_norm": 362.3075256347656,
|
173762 |
-
"learning_rate": 6.2462327627166304e-09,
|
173763 |
-
"loss": 45.8071,
|
173764 |
-
"step": 248210
|
173765 |
-
},
|
173766 |
-
{
|
173767 |
-
"epoch": 1.0028402089553445,
|
173768 |
-
"grad_norm": 957.6506958007812,
|
173769 |
-
"learning_rate": 6.176640979953519e-09,
|
173770 |
-
"loss": 70.7984,
|
173771 |
-
"step": 248220
|
173772 |
-
},
|
173773 |
-
{
|
173774 |
-
"epoch": 1.0028806102207122,
|
173775 |
-
"grad_norm": 591.532470703125,
|
173776 |
-
"learning_rate": 6.107438987073e-09,
|
173777 |
-
"loss": 69.2733,
|
173778 |
-
"step": 248230
|
173779 |
-
},
|
173780 |
-
{
|
173781 |
-
"epoch": 1.0029210114860798,
|
173782 |
-
"grad_norm": 2639.35009765625,
|
173783 |
-
"learning_rate": 6.038626785418444e-09,
|
173784 |
-
"loss": 87.1864,
|
173785 |
-
"step": 248240
|
173786 |
-
},
|
173787 |
-
{
|
173788 |
-
"epoch": 1.0029614127514475,
|
173789 |
-
"grad_norm": 594.7188110351562,
|
173790 |
-
"learning_rate": 5.970204376337663e-09,
|
173791 |
-
"loss": 69.4996,
|
173792 |
-
"step": 248250
|
173793 |
-
},
|
173794 |
-
{
|
173795 |
-
"epoch": 1.003001814016815,
|
173796 |
-
"grad_norm": 331.9031066894531,
|
173797 |
-
"learning_rate": 5.902171761160702e-09,
|
173798 |
-
"loss": 63.0195,
|
173799 |
-
"step": 248260
|
173800 |
-
},
|
173801 |
-
{
|
173802 |
-
"epoch": 1.0030422152821827,
|
173803 |
-
"grad_norm": 732.08984375,
|
173804 |
-
"learning_rate": 5.834528941213169e-09,
|
173805 |
-
"loss": 62.3124,
|
173806 |
-
"step": 248270
|
173807 |
-
},
|
173808 |
-
{
|
173809 |
-
"epoch": 1.0030826165475504,
|
173810 |
-
"grad_norm": 316.0459289550781,
|
173811 |
-
"learning_rate": 5.767275917816229e-09,
|
173812 |
-
"loss": 47.8008,
|
173813 |
-
"step": 248280
|
173814 |
-
},
|
173815 |
-
{
|
173816 |
-
"epoch": 1.0031230178129178,
|
173817 |
-
"grad_norm": 180.98361206054688,
|
173818 |
-
"learning_rate": 5.700412692279944e-09,
|
173819 |
-
"loss": 82.8906,
|
173820 |
-
"step": 248290
|
173821 |
-
},
|
173822 |
-
{
|
173823 |
-
"epoch": 1.0031634190782854,
|
173824 |
-
"grad_norm": 383.6042175292969,
|
173825 |
-
"learning_rate": 5.633939265905497e-09,
|
173826 |
-
"loss": 42.444,
|
173827 |
-
"step": 248300
|
173828 |
-
},
|
173829 |
-
{
|
173830 |
-
"epoch": 1.003203820343653,
|
173831 |
-
"grad_norm": 698.3505859375,
|
173832 |
-
"learning_rate": 5.5678556399940686e-09,
|
173833 |
-
"loss": 71.1799,
|
173834 |
-
"step": 248310
|
173835 |
-
},
|
173836 |
-
{
|
173837 |
-
"epoch": 1.0032442216090207,
|
173838 |
-
"grad_norm": 737.6282348632812,
|
173839 |
-
"learning_rate": 5.502161815829077e-09,
|
173840 |
-
"loss": 92.5681,
|
173841 |
-
"step": 248320
|
173842 |
-
},
|
173843 |
-
{
|
173844 |
-
"epoch": 1.0032846228743884,
|
173845 |
-
"grad_norm": 695.4033813476562,
|
173846 |
-
"learning_rate": 5.4368577946961596e-09,
|
173847 |
-
"loss": 52.5139,
|
173848 |
-
"step": 248330
|
173849 |
-
},
|
173850 |
-
{
|
173851 |
-
"epoch": 1.003325024139756,
|
173852 |
-
"grad_norm": 460.7320556640625,
|
173853 |
-
"learning_rate": 5.371943577863192e-09,
|
173854 |
-
"loss": 43.3595,
|
173855 |
-
"step": 248340
|
173856 |
-
},
|
173857 |
-
{
|
173858 |
-
"epoch": 1.0033654254051236,
|
173859 |
-
"grad_norm": 563.068115234375,
|
173860 |
-
"learning_rate": 5.3074191665980494e-09,
|
173861 |
-
"loss": 61.4819,
|
173862 |
-
"step": 248350
|
173863 |
-
},
|
173864 |
-
{
|
173865 |
-
"epoch": 1.0034058266704913,
|
173866 |
-
"grad_norm": 342.0809631347656,
|
173867 |
-
"learning_rate": 5.243284562159723e-09,
|
173868 |
-
"loss": 43.0043,
|
173869 |
-
"step": 248360
|
173870 |
-
},
|
173871 |
-
{
|
173872 |
-
"epoch": 1.003446227935859,
|
173873 |
-
"grad_norm": 455.55938720703125,
|
173874 |
-
"learning_rate": 5.179539765798325e-09,
|
173875 |
-
"loss": 58.0983,
|
173876 |
-
"step": 248370
|
173877 |
-
},
|
173878 |
-
{
|
173879 |
-
"epoch": 1.0034866292012266,
|
173880 |
-
"grad_norm": 1556.4609375,
|
173881 |
-
"learning_rate": 5.116184778755085e-09,
|
173882 |
-
"loss": 78.0046,
|
173883 |
-
"step": 248380
|
173884 |
-
},
|
173885 |
-
{
|
173886 |
-
"epoch": 1.0035270304665942,
|
173887 |
-
"grad_norm": 631.3849487304688,
|
173888 |
-
"learning_rate": 5.053219602264569e-09,
|
173889 |
-
"loss": 53.1356,
|
173890 |
-
"step": 248390
|
173891 |
-
},
|
173892 |
-
{
|
173893 |
-
"epoch": 1.0035674317319618,
|
173894 |
-
"grad_norm": 688.4679565429688,
|
173895 |
-
"learning_rate": 4.990644237556908e-09,
|
173896 |
-
"loss": 66.3212,
|
173897 |
-
"step": 248400
|
173898 |
-
},
|
173899 |
-
{
|
173900 |
-
"epoch": 1.0036078329973295,
|
173901 |
-
"grad_norm": 384.0335388183594,
|
173902 |
-
"learning_rate": 4.928458685851123e-09,
|
173903 |
-
"loss": 52.2714,
|
173904 |
-
"step": 248410
|
173905 |
-
},
|
173906 |
-
{
|
173907 |
-
"epoch": 1.0036482342626971,
|
173908 |
-
"grad_norm": 1073.441650390625,
|
173909 |
-
"learning_rate": 4.866662948359579e-09,
|
173910 |
-
"loss": 55.6296,
|
173911 |
-
"step": 248420
|
173912 |
-
},
|
173913 |
-
{
|
173914 |
-
"epoch": 1.0036886355280648,
|
173915 |
-
"grad_norm": 949.4934692382812,
|
173916 |
-
"learning_rate": 4.805257026283538e-09,
|
173917 |
-
"loss": 71.919,
|
173918 |
-
"step": 248430
|
173919 |
-
},
|
173920 |
-
{
|
173921 |
-
"epoch": 1.0037290367934324,
|
173922 |
-
"grad_norm": 911.975830078125,
|
173923 |
-
"learning_rate": 4.744240920826481e-09,
|
173924 |
-
"loss": 71.4097,
|
173925 |
-
"step": 248440
|
173926 |
-
},
|
173927 |
-
{
|
173928 |
-
"epoch": 1.0037694380588,
|
173929 |
-
"grad_norm": 1051.722900390625,
|
173930 |
-
"learning_rate": 4.683614633174128e-09,
|
173931 |
-
"loss": 46.7513,
|
173932 |
-
"step": 248450
|
173933 |
-
},
|
173934 |
-
{
|
173935 |
-
"epoch": 1.0038098393241677,
|
173936 |
-
"grad_norm": 786.8934326171875,
|
173937 |
-
"learning_rate": 4.623378164507752e-09,
|
173938 |
-
"loss": 65.1381,
|
173939 |
-
"step": 248460
|
173940 |
-
},
|
173941 |
-
{
|
173942 |
-
"epoch": 1.0038502405895353,
|
173943 |
-
"grad_norm": 1190.6405029296875,
|
173944 |
-
"learning_rate": 4.563531516004194e-09,
|
173945 |
-
"loss": 49.0076,
|
173946 |
-
"step": 248470
|
173947 |
-
},
|
173948 |
-
{
|
173949 |
-
"epoch": 1.003890641854903,
|
173950 |
-
"grad_norm": 609.81103515625,
|
173951 |
-
"learning_rate": 4.5040746888269646e-09,
|
173952 |
-
"loss": 94.319,
|
173953 |
-
"step": 248480
|
173954 |
-
},
|
173955 |
-
{
|
173956 |
-
"epoch": 1.0039310431202706,
|
173957 |
-
"grad_norm": 1157.087158203125,
|
173958 |
-
"learning_rate": 4.44500768413958e-09,
|
173959 |
-
"loss": 103.0867,
|
173960 |
-
"step": 248490
|
173961 |
-
},
|
173962 |
-
{
|
173963 |
-
"epoch": 1.0039714443856382,
|
173964 |
-
"grad_norm": 843.581298828125,
|
173965 |
-
"learning_rate": 4.386330503090008e-09,
|
173966 |
-
"loss": 72.9247,
|
173967 |
-
"step": 248500
|
173968 |
-
},
|
173969 |
-
{
|
173970 |
-
"epoch": 1.0040118456510059,
|
173971 |
-
"grad_norm": 507.43182373046875,
|
173972 |
-
"learning_rate": 4.328043146824002e-09,
|
173973 |
-
"loss": 54.9502,
|
173974 |
-
"step": 248510
|
173975 |
-
},
|
173976 |
-
{
|
173977 |
-
"epoch": 1.0040522469163735,
|
173978 |
-
"grad_norm": 915.3667602539062,
|
173979 |
-
"learning_rate": 4.2701456164762066e-09,
|
173980 |
-
"loss": 79.8888,
|
173981 |
-
"step": 248520
|
173982 |
-
},
|
173983 |
-
{
|
173984 |
-
"epoch": 1.0040926481817412,
|
173985 |
-
"grad_norm": 612.6775512695312,
|
173986 |
-
"learning_rate": 4.2126379131768314e-09,
|
173987 |
-
"loss": 51.1185,
|
173988 |
-
"step": 248530
|
173989 |
-
},
|
173990 |
-
{
|
173991 |
-
"epoch": 1.0041330494471086,
|
173992 |
-
"grad_norm": 921.3275146484375,
|
173993 |
-
"learning_rate": 4.155520038047201e-09,
|
173994 |
-
"loss": 61.4335,
|
173995 |
-
"step": 248540
|
173996 |
-
},
|
173997 |
-
{
|
173998 |
-
"epoch": 1.0041734507124762,
|
173999 |
-
"grad_norm": 454.0970458984375,
|
174000 |
-
"learning_rate": 4.098791992199758e-09,
|
174001 |
-
"loss": 66.9522,
|
174002 |
-
"step": 248550
|
174003 |
-
},
|
174004 |
-
{
|
174005 |
-
"epoch": 1.0042138519778439,
|
174006 |
-
"grad_norm": 594.523681640625,
|
174007 |
-
"learning_rate": 4.042453776740285e-09,
|
174008 |
-
"loss": 40.608,
|
174009 |
-
"step": 248560
|
174010 |
-
},
|
174011 |
-
{
|
174012 |
-
"epoch": 1.0042542532432115,
|
174013 |
-
"grad_norm": 807.0231323242188,
|
174014 |
-
"learning_rate": 3.986505392770124e-09,
|
174015 |
-
"loss": 93.9332,
|
174016 |
-
"step": 248570
|
174017 |
-
},
|
174018 |
-
{
|
174019 |
-
"epoch": 1.0042946545085791,
|
174020 |
-
"grad_norm": 307.3790283203125,
|
174021 |
-
"learning_rate": 3.9309468413750715e-09,
|
174022 |
-
"loss": 74.7959,
|
174023 |
-
"step": 248580
|
174024 |
-
},
|
174025 |
-
{
|
174026 |
-
"epoch": 1.0043350557739468,
|
174027 |
-
"grad_norm": 427.72808837890625,
|
174028 |
-
"learning_rate": 3.875778123643148e-09,
|
174029 |
-
"loss": 65.4532,
|
174030 |
-
"step": 248590
|
174031 |
-
},
|
174032 |
-
{
|
174033 |
-
"epoch": 1.0043754570393144,
|
174034 |
-
"grad_norm": 1061.6275634765625,
|
174035 |
-
"learning_rate": 3.820999240644608e-09,
|
174036 |
-
"loss": 71.2552,
|
174037 |
-
"step": 248600
|
174038 |
-
},
|
174039 |
-
{
|
174040 |
-
"epoch": 1.004415858304682,
|
174041 |
-
"grad_norm": 352.9907531738281,
|
174042 |
-
"learning_rate": 3.766610193454146e-09,
|
174043 |
-
"loss": 54.195,
|
174044 |
-
"step": 248610
|
174045 |
-
},
|
174046 |
-
{
|
174047 |
-
"epoch": 1.0044562595700497,
|
174048 |
-
"grad_norm": 668.2301635742188,
|
174049 |
-
"learning_rate": 3.7126109831264746e-09,
|
174050 |
-
"loss": 60.256,
|
174051 |
-
"step": 248620
|
174052 |
-
},
|
174053 |
-
{
|
174054 |
-
"epoch": 1.0044966608354173,
|
174055 |
-
"grad_norm": 228.10552978515625,
|
174056 |
-
"learning_rate": 3.6590016107163064e-09,
|
174057 |
-
"loss": 45.8084,
|
174058 |
-
"step": 248630
|
174059 |
-
},
|
174060 |
-
{
|
174061 |
-
"epoch": 1.004537062100785,
|
174062 |
-
"grad_norm": 237.22738647460938,
|
174063 |
-
"learning_rate": 3.60578207726725e-09,
|
174064 |
-
"loss": 40.9312,
|
174065 |
-
"step": 248640
|
174066 |
-
},
|
174067 |
-
{
|
174068 |
-
"epoch": 1.0045774633661526,
|
174069 |
-
"grad_norm": 550.7319946289062,
|
174070 |
-
"learning_rate": 3.552952383820696e-09,
|
174071 |
-
"loss": 66.7888,
|
174072 |
-
"step": 248650
|
174073 |
-
},
|
174074 |
-
{
|
174075 |
-
"epoch": 1.0046178646315203,
|
174076 |
-
"grad_norm": 951.9227294921875,
|
174077 |
-
"learning_rate": 3.500512531402489e-09,
|
174078 |
-
"loss": 81.9458,
|
174079 |
-
"step": 248660
|
174080 |
-
},
|
174081 |
-
{
|
174082 |
-
"epoch": 1.004658265896888,
|
174083 |
-
"grad_norm": 483.5680236816406,
|
174084 |
-
"learning_rate": 3.4484625210362554e-09,
|
174085 |
-
"loss": 55.9166,
|
174086 |
-
"step": 248670
|
174087 |
-
},
|
174088 |
-
{
|
174089 |
-
"epoch": 1.0046986671622555,
|
174090 |
-
"grad_norm": 1149.939697265625,
|
174091 |
-
"learning_rate": 3.3968023537389594e-09,
|
174092 |
-
"loss": 68.5474,
|
174093 |
-
"step": 248680
|
174094 |
-
},
|
174095 |
-
{
|
174096 |
-
"epoch": 1.0047390684276232,
|
174097 |
-
"grad_norm": 615.0205078125,
|
174098 |
-
"learning_rate": 3.3455320305142423e-09,
|
174099 |
-
"loss": 76.0233,
|
174100 |
-
"step": 248690
|
174101 |
-
},
|
174102 |
-
{
|
174103 |
-
"epoch": 1.0047794696929908,
|
174104 |
-
"grad_norm": 355.2969055175781,
|
174105 |
-
"learning_rate": 3.294651552365746e-09,
|
174106 |
-
"loss": 64.7722,
|
174107 |
-
"step": 248700
|
174108 |
-
},
|
174109 |
-
{
|
174110 |
-
"epoch": 1.0048198709583585,
|
174111 |
-
"grad_norm": 1062.1531982421875,
|
174112 |
-
"learning_rate": 3.2441609202815694e-09,
|
174113 |
-
"loss": 80.2818,
|
174114 |
-
"step": 248710
|
174115 |
-
},
|
174116 |
-
{
|
174117 |
-
"epoch": 1.004860272223726,
|
174118 |
-
"grad_norm": 651.049560546875,
|
174119 |
-
"learning_rate": 3.1940601352475895e-09,
|
174120 |
-
"loss": 52.2253,
|
174121 |
-
"step": 248720
|
174122 |
-
},
|
174123 |
-
{
|
174124 |
-
"epoch": 1.0049006734890937,
|
174125 |
-
"grad_norm": 588.716064453125,
|
174126 |
-
"learning_rate": 3.1443491982408035e-09,
|
174127 |
-
"loss": 68.1338,
|
174128 |
-
"step": 248730
|
174129 |
-
},
|
174130 |
-
{
|
174131 |
-
"epoch": 1.0049410747544614,
|
174132 |
-
"grad_norm": 757.9308471679688,
|
174133 |
-
"learning_rate": 3.0950281102293258e-09,
|
174134 |
-
"loss": 59.9994,
|
174135 |
-
"step": 248740
|
174136 |
-
},
|
174137 |
-
{
|
174138 |
-
"epoch": 1.004981476019829,
|
174139 |
-
"grad_norm": 682.371337890625,
|
174140 |
-
"learning_rate": 3.0460968721746086e-09,
|
174141 |
-
"loss": 41.9455,
|
174142 |
-
"step": 248750
|
174143 |
-
},
|
174144 |
-
{
|
174145 |
-
"epoch": 1.0050218772851967,
|
174146 |
-
"grad_norm": 850.54833984375,
|
174147 |
-
"learning_rate": 2.997555485033665e-09,
|
174148 |
-
"loss": 48.9406,
|
174149 |
-
"step": 248760
|
174150 |
-
},
|
174151 |
-
{
|
174152 |
-
"epoch": 1.0050622785505643,
|
174153 |
-
"grad_norm": 585.2266845703125,
|
174154 |
-
"learning_rate": 2.9494039497479645e-09,
|
174155 |
-
"loss": 70.8767,
|
174156 |
-
"step": 248770
|
174157 |
-
},
|
174158 |
-
{
|
174159 |
-
"epoch": 1.005102679815932,
|
174160 |
-
"grad_norm": 923.8726806640625,
|
174161 |
-
"learning_rate": 2.9016422672611954e-09,
|
174162 |
-
"loss": 71.7036,
|
174163 |
-
"step": 248780
|
174164 |
-
},
|
174165 |
-
{
|
174166 |
-
"epoch": 1.0051430810812996,
|
174167 |
-
"grad_norm": 811.24658203125,
|
174168 |
-
"learning_rate": 2.854270438501505e-09,
|
174169 |
-
"loss": 67.8668,
|
174170 |
-
"step": 248790
|
174171 |
-
},
|
174172 |
-
{
|
174173 |
-
"epoch": 1.005183482346667,
|
174174 |
-
"grad_norm": 653.4196166992188,
|
174175 |
-
"learning_rate": 2.807288464392599e-09,
|
174176 |
-
"loss": 57.9755,
|
174177 |
-
"step": 248800
|
174178 |
-
},
|
174179 |
-
{
|
174180 |
-
"epoch": 1.0052238836120346,
|
174181 |
-
"grad_norm": 1551.59375,
|
174182 |
-
"learning_rate": 2.7606963458493006e-09,
|
174183 |
-
"loss": 83.9572,
|
174184 |
-
"step": 248810
|
174185 |
-
},
|
174186 |
-
{
|
174187 |
-
"epoch": 1.0052642848774023,
|
174188 |
-
"grad_norm": 435.47625732421875,
|
174189 |
-
"learning_rate": 2.7144940837842138e-09,
|
174190 |
-
"loss": 54.7893,
|
174191 |
-
"step": 248820
|
174192 |
-
},
|
174193 |
-
{
|
174194 |
-
"epoch": 1.00530468614277,
|
174195 |
-
"grad_norm": 726.771484375,
|
174196 |
-
"learning_rate": 2.668681679094398e-09,
|
174197 |
-
"loss": 63.5434,
|
174198 |
-
"step": 248830
|
174199 |
-
},
|
174200 |
-
{
|
174201 |
-
"epoch": 1.0053450874081375,
|
174202 |
-
"grad_norm": 648.3975219726562,
|
174203 |
-
"learning_rate": 2.6232591326724733e-09,
|
174204 |
-
"loss": 60.4749,
|
174205 |
-
"step": 248840
|
174206 |
-
},
|
174207 |
-
{
|
174208 |
-
"epoch": 1.0053854886735052,
|
174209 |
-
"grad_norm": 644.8402709960938,
|
174210 |
-
"learning_rate": 2.5782264454066175e-09,
|
174211 |
-
"loss": 73.8123,
|
174212 |
-
"step": 248850
|
174213 |
-
},
|
174214 |
-
{
|
174215 |
-
"epoch": 1.0054258899388728,
|
174216 |
-
"grad_norm": 513.4389038085938,
|
174217 |
-
"learning_rate": 2.5335836181739072e-09,
|
174218 |
-
"loss": 50.9707,
|
174219 |
-
"step": 248860
|
174220 |
-
},
|
174221 |
-
{
|
174222 |
-
"epoch": 1.0054662912042405,
|
174223 |
-
"grad_norm": 479.2508239746094,
|
174224 |
-
"learning_rate": 2.489330651844757e-09,
|
174225 |
-
"loss": 45.7774,
|
174226 |
-
"step": 248870
|
174227 |
-
},
|
174228 |
-
{
|
174229 |
-
"epoch": 1.005506692469608,
|
174230 |
-
"grad_norm": 489.208984375,
|
174231 |
-
"learning_rate": 2.4454675472807e-09,
|
174232 |
-
"loss": 52.9505,
|
174233 |
-
"step": 248880
|
174234 |
-
},
|
174235 |
-
{
|
174236 |
-
"epoch": 1.0055470937349757,
|
174237 |
-
"grad_norm": 634.3941650390625,
|
174238 |
-
"learning_rate": 2.401994305336608e-09,
|
174239 |
-
"loss": 67.2182,
|
174240 |
-
"step": 248890
|
174241 |
-
},
|
174242 |
-
{
|
174243 |
-
"epoch": 1.0055874950003434,
|
174244 |
-
"grad_norm": 834.0480346679688,
|
174245 |
-
"learning_rate": 2.3589109268629116e-09,
|
174246 |
-
"loss": 61.1101,
|
174247 |
-
"step": 248900
|
174248 |
-
},
|
174249 |
-
{
|
174250 |
-
"epoch": 1.005627896265711,
|
174251 |
-
"grad_norm": 1048.023193359375,
|
174252 |
-
"learning_rate": 2.3162174126967196e-09,
|
174253 |
-
"loss": 73.4154,
|
174254 |
-
"step": 248910
|
174255 |
-
},
|
174256 |
-
{
|
174257 |
-
"epoch": 1.0056682975310787,
|
174258 |
-
"grad_norm": 400.6410827636719,
|
174259 |
-
"learning_rate": 2.273913763672919e-09,
|
174260 |
-
"loss": 68.9832,
|
174261 |
-
"step": 248920
|
174262 |
-
},
|
174263 |
-
{
|
174264 |
-
"epoch": 1.0057086987964463,
|
174265 |
-
"grad_norm": 622.4219970703125,
|
174266 |
-
"learning_rate": 2.2319999806130753e-09,
|
174267 |
-
"loss": 46.3725,
|
174268 |
-
"step": 248930
|
174269 |
-
},
|
174270 |
-
{
|
174271 |
-
"epoch": 1.005749100061814,
|
174272 |
-
"grad_norm": 1237.3565673828125,
|
174273 |
-
"learning_rate": 2.190476064336533e-09,
|
174274 |
-
"loss": 79.3537,
|
174275 |
-
"step": 248940
|
174276 |
-
},
|
174277 |
-
{
|
174278 |
-
"epoch": 1.0057895013271816,
|
174279 |
-
"grad_norm": 1718.1204833984375,
|
174280 |
-
"learning_rate": 2.1493420156515345e-09,
|
174281 |
-
"loss": 98.4554,
|
174282 |
-
"step": 248950
|
174283 |
-
},
|
174284 |
-
{
|
174285 |
-
"epoch": 1.0058299025925492,
|
174286 |
-
"grad_norm": 549.4330444335938,
|
174287 |
-
"learning_rate": 2.1085978353618807e-09,
|
174288 |
-
"loss": 53.079,
|
174289 |
-
"step": 248960
|
174290 |
-
},
|
174291 |
-
{
|
174292 |
-
"epoch": 1.0058703038579169,
|
174293 |
-
"grad_norm": 1439.4869384765625,
|
174294 |
-
"learning_rate": 2.0682435242602716e-09,
|
174295 |
-
"loss": 59.7202,
|
174296 |
-
"step": 248970
|
174297 |
-
},
|
174298 |
-
{
|
174299 |
-
"epoch": 1.0059107051232845,
|
174300 |
-
"grad_norm": 554.4739990234375,
|
174301 |
-
"learning_rate": 2.0282790831349653e-09,
|
174302 |
-
"loss": 81.9638,
|
174303 |
-
"step": 248980
|
174304 |
-
},
|
174305 |
-
{
|
174306 |
-
"epoch": 1.0059511063886521,
|
174307 |
-
"grad_norm": 586.0916748046875,
|
174308 |
-
"learning_rate": 1.988704512763118e-09,
|
174309 |
-
"loss": 53.7479,
|
174310 |
-
"step": 248990
|
174311 |
-
},
|
174312 |
-
{
|
174313 |
-
"epoch": 1.0059915076540198,
|
174314 |
-
"grad_norm": 230.25294494628906,
|
174315 |
-
"learning_rate": 1.949519813915224e-09,
|
174316 |
-
"loss": 53.467,
|
174317 |
-
"step": 249000
|
174318 |
-
},
|
174319 |
-
{
|
174320 |
-
"epoch": 1.0060319089193874,
|
174321 |
-
"grad_norm": 651.7587890625,
|
174322 |
-
"learning_rate": 1.910724987359558e-09,
|
174323 |
-
"loss": 75.1255,
|
174324 |
-
"step": 249010
|
174325 |
-
},
|
174326 |
-
{
|
174327 |
-
"epoch": 1.006072310184755,
|
174328 |
-
"grad_norm": 253.41368103027344,
|
174329 |
-
"learning_rate": 1.872320033848851e-09,
|
174330 |
-
"loss": 70.5722,
|
174331 |
-
"step": 249020
|
174332 |
-
},
|
174333 |
-
{
|
174334 |
-
"epoch": 1.0061127114501227,
|
174335 |
-
"grad_norm": 165.32437133789062,
|
174336 |
-
"learning_rate": 1.8343049541336143e-09,
|
174337 |
-
"loss": 37.0831,
|
174338 |
-
"step": 249030
|
174339 |
-
},
|
174340 |
-
{
|
174341 |
-
"epoch": 1.0061531127154903,
|
174342 |
-
"grad_norm": 707.3920288085938,
|
174343 |
-
"learning_rate": 1.796679748953256e-09,
|
174344 |
-
"loss": 56.9702,
|
174345 |
-
"step": 249040
|
174346 |
-
},
|
174347 |
-
{
|
174348 |
-
"epoch": 1.006193513980858,
|
174349 |
-
"grad_norm": 692.0291748046875,
|
174350 |
-
"learning_rate": 1.7594444190449645e-09,
|
174351 |
-
"loss": 56.6415,
|
174352 |
-
"step": 249050
|
174353 |
-
},
|
174354 |
-
{
|
174355 |
-
"epoch": 1.0062339152462254,
|
174356 |
-
"grad_norm": 469.0581359863281,
|
174357 |
-
"learning_rate": 1.7225989651303842e-09,
|
174358 |
-
"loss": 49.4065,
|
174359 |
-
"step": 249060
|
174360 |
-
},
|
174361 |
-
{
|
174362 |
-
"epoch": 1.006274316511593,
|
174363 |
-
"grad_norm": 331.9457092285156,
|
174364 |
-
"learning_rate": 1.68614338792894e-09,
|
174365 |
-
"loss": 54.5548,
|
174366 |
-
"step": 249070
|
174367 |
-
},
|
174368 |
-
{
|
174369 |
-
"epoch": 1.0063147177769607,
|
174370 |
-
"grad_norm": 404.8883056640625,
|
174371 |
-
"learning_rate": 1.6500776881511748e-09,
|
174372 |
-
"loss": 87.9609,
|
174373 |
-
"step": 249080
|
174374 |
-
},
|
174375 |
-
{
|
174376 |
-
"epoch": 1.0063551190423283,
|
174377 |
-
"grad_norm": 789.7308959960938,
|
174378 |
-
"learning_rate": 1.6144018665031901e-09,
|
174379 |
-
"loss": 39.0294,
|
174380 |
-
"step": 249090
|
174381 |
-
},
|
174382 |
-
{
|
174383 |
-
"epoch": 1.006395520307696,
|
174384 |
-
"grad_norm": 590.8009643554688,
|
174385 |
-
"learning_rate": 1.5791159236777654e-09,
|
174386 |
-
"loss": 64.4315,
|
174387 |
-
"step": 249100
|
174388 |
-
},
|
174389 |
-
{
|
174390 |
-
"epoch": 1.0064359215730636,
|
174391 |
-
"grad_norm": 393.2922058105469,
|
174392 |
-
"learning_rate": 1.5442198603632386e-09,
|
174393 |
-
"loss": 82.9317,
|
174394 |
-
"step": 249110
|
174395 |
-
},
|
174396 |
-
{
|
174397 |
-
"epoch": 1.0064763228384312,
|
174398 |
-
"grad_norm": 449.51104736328125,
|
174399 |
-
"learning_rate": 1.5097136772390663e-09,
|
174400 |
-
"loss": 63.2408,
|
174401 |
-
"step": 249120
|
174402 |
-
},
|
174403 |
-
{
|
174404 |
-
"epoch": 1.0065167241037989,
|
174405 |
-
"grad_norm": 662.6757202148438,
|
174406 |
-
"learning_rate": 1.4755973749802642e-09,
|
174407 |
-
"loss": 64.8274,
|
174408 |
-
"step": 249130
|
174409 |
-
},
|
174410 |
-
{
|
174411 |
-
"epoch": 1.0065571253691665,
|
174412 |
-
"grad_norm": 575.337646484375,
|
174413 |
-
"learning_rate": 1.441870954250746e-09,
|
174414 |
-
"loss": 76.7478,
|
174415 |
-
"step": 249140
|
174416 |
-
},
|
174417 |
-
{
|
174418 |
-
"epoch": 1.0065975266345342,
|
174419 |
-
"grad_norm": 450.4961853027344,
|
174420 |
-
"learning_rate": 1.4085344157055425e-09,
|
174421 |
-
"loss": 42.8611,
|
174422 |
-
"step": 249150
|
174423 |
-
},
|
174424 |
-
{
|
174425 |
-
"epoch": 1.0066379278999018,
|
174426 |
-
"grad_norm": 794.6490478515625,
|
174427 |
-
"learning_rate": 1.3755877599996858e-09,
|
174428 |
-
"loss": 64.4882,
|
174429 |
-
"step": 249160
|
174430 |
-
},
|
174431 |
-
{
|
174432 |
-
"epoch": 1.0066783291652694,
|
174433 |
-
"grad_norm": 371.93194580078125,
|
174434 |
-
"learning_rate": 1.3430309877726643e-09,
|
174435 |
-
"loss": 82.8797,
|
174436 |
-
"step": 249170
|
174437 |
-
},
|
174438 |
-
{
|
174439 |
-
"epoch": 1.006718730430637,
|
174440 |
-
"grad_norm": 459.426025390625,
|
174441 |
-
"learning_rate": 1.3108640996573053e-09,
|
174442 |
-
"loss": 80.5951,
|
174443 |
-
"step": 249180
|
174444 |
-
},
|
174445 |
-
{
|
174446 |
-
"epoch": 1.0067591316960047,
|
174447 |
-
"grad_norm": 829.705322265625,
|
174448 |
-
"learning_rate": 1.279087096284215e-09,
|
174449 |
-
"loss": 51.7935,
|
174450 |
-
"step": 249190
|
174451 |
-
},
|
174452 |
-
{
|
174453 |
-
"epoch": 1.0067995329613724,
|
174454 |
-
"grad_norm": 981.8072509765625,
|
174455 |
-
"learning_rate": 1.2476999782706779e-09,
|
174456 |
-
"loss": 48.9967,
|
174457 |
-
"step": 249200
|
174458 |
-
},
|
174459 |
-
{
|
174460 |
-
"epoch": 1.00683993422674,
|
174461 |
-
"grad_norm": 662.7955322265625,
|
174462 |
-
"learning_rate": 1.2167027462295366e-09,
|
174463 |
-
"loss": 48.9169,
|
174464 |
-
"step": 249210
|
174465 |
-
},
|
174466 |
-
{
|
174467 |
-
"epoch": 1.0068803354921076,
|
174468 |
-
"grad_norm": 498.3249816894531,
|
174469 |
-
"learning_rate": 1.186095400764753e-09,
|
174470 |
-
"loss": 65.781,
|
174471 |
-
"step": 249220
|
174472 |
-
},
|
174473 |
-
{
|
174474 |
-
"epoch": 1.0069207367574753,
|
174475 |
-
"grad_norm": 1017.97900390625,
|
174476 |
-
"learning_rate": 1.1558779424714063e-09,
|
174477 |
-
"loss": 74.366,
|
174478 |
-
"step": 249230
|
174479 |
-
},
|
174480 |
-
{
|
174481 |
-
"epoch": 1.006961138022843,
|
174482 |
-
"grad_norm": 566.4378662109375,
|
174483 |
-
"learning_rate": 1.1260503719423554e-09,
|
174484 |
-
"loss": 59.101,
|
174485 |
-
"step": 249240
|
174486 |
-
},
|
174487 |
-
{
|
174488 |
-
"epoch": 1.0070015392882106,
|
174489 |
-
"grad_norm": 690.8829345703125,
|
174490 |
-
"learning_rate": 1.0966126897571373e-09,
|
174491 |
-
"loss": 51.221,
|
174492 |
-
"step": 249250
|
174493 |
-
},
|
174494 |
-
{
|
174495 |
-
"epoch": 1.0070419405535782,
|
174496 |
-
"grad_norm": 530.546875,
|
174497 |
-
"learning_rate": 1.0675648964886265e-09,
|
174498 |
-
"loss": 71.1756,
|
174499 |
-
"step": 249260
|
174500 |
-
},
|
174501 |
-
{
|
174502 |
-
"epoch": 1.0070823418189458,
|
174503 |
-
"grad_norm": 482.6839599609375,
|
174504 |
-
"learning_rate": 1.0389069927052575e-09,
|
174505 |
-
"loss": 62.3818,
|
174506 |
-
"step": 249270
|
174507 |
-
},
|
174508 |
-
{
|
174509 |
-
"epoch": 1.0071227430843135,
|
174510 |
-
"grad_norm": 567.1624755859375,
|
174511 |
-
"learning_rate": 1.0106389789643622e-09,
|
174512 |
-
"loss": 61.6863,
|
174513 |
-
"step": 249280
|
174514 |
-
},
|
174515 |
-
{
|
174516 |
-
"epoch": 1.0071631443496811,
|
174517 |
-
"grad_norm": 506.1968688964844,
|
174518 |
-
"learning_rate": 9.827608558166113e-10,
|
174519 |
-
"loss": 54.4761,
|
174520 |
-
"step": 249290
|
174521 |
-
},
|
174522 |
-
{
|
174523 |
-
"epoch": 1.0072035456150488,
|
174524 |
-
"grad_norm": 541.2426147460938,
|
174525 |
-
"learning_rate": 9.55272623806014e-10,
|
174526 |
-
"loss": 37.5779,
|
174527 |
-
"step": 249300
|
174528 |
-
},
|
174529 |
-
{
|
174530 |
-
"epoch": 1.0072439468804162,
|
174531 |
-
"grad_norm": 716.0906982421875,
|
174532 |
-
"learning_rate": 9.281742834699181e-10,
|
174533 |
-
"loss": 70.1744,
|
174534 |
-
"step": 249310
|
174535 |
-
},
|
174536 |
-
{
|
174537 |
-
"epoch": 1.0072843481457838,
|
174538 |
-
"grad_norm": 703.7621459960938,
|
174539 |
-
"learning_rate": 9.014658353323491e-10,
|
174540 |
-
"loss": 60.6156,
|
174541 |
-
"step": 249320
|
174542 |
-
},
|
174543 |
-
{
|
174544 |
-
"epoch": 1.0073247494111515,
|
174545 |
-
"grad_norm": 586.1680297851562,
|
174546 |
-
"learning_rate": 8.751472799195527e-10,
|
174547 |
-
"loss": 70.0694,
|
174548 |
-
"step": 249330
|
174549 |
-
},
|
174550 |
-
{
|
174551 |
-
"epoch": 1.007365150676519,
|
174552 |
-
"grad_norm": 1170.073486328125,
|
174553 |
-
"learning_rate": 8.492186177422312e-10,
|
174554 |
-
"loss": 66.7577,
|
174555 |
-
"step": 249340
|
174556 |
-
},
|
174557 |
-
{
|
174558 |
-
"epoch": 1.0074055519418867,
|
174559 |
-
"grad_norm": 482.56781005859375,
|
174560 |
-
"learning_rate": 8.236798493044262e-10,
|
174561 |
-
"loss": 61.0401,
|
174562 |
-
"step": 249350
|
174563 |
-
},
|
174564 |
-
{
|
174565 |
-
"epoch": 1.0074459532072544,
|
174566 |
-
"grad_norm": 611.2447509765625,
|
174567 |
-
"learning_rate": 7.985309751057379e-10,
|
174568 |
-
"loss": 48.5732,
|
174569 |
-
"step": 249360
|
174570 |
-
},
|
174571 |
-
{
|
174572 |
-
"epoch": 1.007486354472622,
|
174573 |
-
"grad_norm": 923.9035034179688,
|
174574 |
-
"learning_rate": 7.737719956346646e-10,
|
174575 |
-
"loss": 68.4029,
|
174576 |
-
"step": 249370
|
174577 |
-
},
|
174578 |
-
{
|
174579 |
-
"epoch": 1.0075267557379897,
|
174580 |
-
"grad_norm": 511.7546081542969,
|
174581 |
-
"learning_rate": 7.494029113752632e-10,
|
174582 |
-
"loss": 48.3581,
|
174583 |
-
"step": 249380
|
174584 |
-
},
|
174585 |
-
{
|
174586 |
-
"epoch": 1.0075671570033573,
|
174587 |
-
"grad_norm": 711.5328369140625,
|
174588 |
-
"learning_rate": 7.254237228049298e-10,
|
174589 |
-
"loss": 86.4123,
|
174590 |
-
"step": 249390
|
174591 |
-
},
|
174592 |
-
{
|
174593 |
-
"epoch": 1.007607558268725,
|
174594 |
-
"grad_norm": 560.9522094726562,
|
174595 |
-
"learning_rate": 7.018344303877378e-10,
|
174596 |
-
"loss": 41.3287,
|
174597 |
-
"step": 249400
|
174598 |
-
},
|
174599 |
-
{
|
174600 |
-
"epoch": 1.0076479595340926,
|
174601 |
-
"grad_norm": 568.5785522460938,
|
174602 |
-
"learning_rate": 6.786350345833193e-10,
|
174603 |
-
"loss": 48.3969,
|
174604 |
-
"step": 249410
|
174605 |
-
},
|
174606 |
-
{
|
174607 |
-
"epoch": 1.0076883607994602,
|
174608 |
-
"grad_norm": 543.312744140625,
|
174609 |
-
"learning_rate": 6.558255358468657e-10,
|
174610 |
-
"loss": 57.2176,
|
174611 |
-
"step": 249420
|
174612 |
-
},
|
174613 |
-
{
|
174614 |
-
"epoch": 1.0077287620648279,
|
174615 |
-
"grad_norm": 801.2297973632812,
|
174616 |
-
"learning_rate": 6.334059346202459e-10,
|
174617 |
-
"loss": 77.0439,
|
174618 |
-
"step": 249430
|
174619 |
-
},
|
174620 |
-
{
|
174621 |
-
"epoch": 1.0077691633301955,
|
174622 |
-
"grad_norm": 127.95111846923828,
|
174623 |
-
"learning_rate": 6.113762313431082e-10,
|
174624 |
-
"loss": 52.8138,
|
174625 |
-
"step": 249440
|
174626 |
-
},
|
174627 |
-
{
|
174628 |
-
"epoch": 1.0078095645955631,
|
174629 |
-
"grad_norm": 485.751953125,
|
174630 |
-
"learning_rate": 5.897364264417782e-10,
|
174631 |
-
"loss": 65.7876,
|
174632 |
-
"step": 249450
|
174633 |
-
},
|
174634 |
-
{
|
174635 |
-
"epoch": 1.0078499658609308,
|
174636 |
-
"grad_norm": 1014.9998168945312,
|
174637 |
-
"learning_rate": 5.684865203403611e-10,
|
174638 |
-
"loss": 66.254,
|
174639 |
-
"step": 249460
|
174640 |
-
},
|
174641 |
-
{
|
174642 |
-
"epoch": 1.0078903671262984,
|
174643 |
-
"grad_norm": 660.0430297851562,
|
174644 |
-
"learning_rate": 5.476265134540804e-10,
|
174645 |
-
"loss": 58.4983,
|
174646 |
-
"step": 249470
|
174647 |
-
},
|
174648 |
-
{
|
174649 |
-
"epoch": 1.007930768391666,
|
174650 |
-
"grad_norm": 381.69024658203125,
|
174651 |
-
"learning_rate": 5.271564061870571e-10,
|
174652 |
-
"loss": 68.6676,
|
174653 |
-
"step": 249480
|
174654 |
-
},
|
174655 |
-
{
|
174656 |
-
"epoch": 1.0079711696570337,
|
174657 |
-
"grad_norm": 328.0639343261719,
|
174658 |
-
"learning_rate": 5.070761989411921e-10,
|
174659 |
-
"loss": 81.1313,
|
174660 |
-
"step": 249490
|
174661 |
-
},
|
174662 |
-
{
|
174663 |
-
"epoch": 1.0080115709224013,
|
174664 |
-
"grad_norm": 635.9850463867188,
|
174665 |
-
"learning_rate": 4.873858921050634e-10,
|
174666 |
-
"loss": 52.8049,
|
174667 |
-
"step": 249500
|
174668 |
-
},
|
174669 |
-
{
|
174670 |
-
"epoch": 1.008051972187769,
|
174671 |
-
"grad_norm": 556.7789306640625,
|
174672 |
-
"learning_rate": 4.680854860628081e-10,
|
174673 |
-
"loss": 54.1077,
|
174674 |
-
"step": 249510
|
174675 |
-
},
|
174676 |
-
{
|
174677 |
-
"epoch": 1.0080923734531366,
|
174678 |
-
"grad_norm": 1849.8094482421875,
|
174679 |
-
"learning_rate": 4.491749811919022e-10,
|
174680 |
-
"loss": 68.67,
|
174681 |
-
"step": 249520
|
174682 |
-
},
|
174683 |
-
{
|
174684 |
-
"epoch": 1.0081327747185043,
|
174685 |
-
"grad_norm": 424.52734375,
|
174686 |
-
"learning_rate": 4.3065437786316e-10,
|
174687 |
-
"loss": 57.378,
|
174688 |
-
"step": 249530
|
174689 |
-
},
|
174690 |
-
{
|
174691 |
-
"epoch": 1.008173175983872,
|
174692 |
-
"grad_norm": 415.8068542480469,
|
174693 |
-
"learning_rate": 4.1252367643185297e-10,
|
174694 |
-
"loss": 45.5723,
|
174695 |
-
"step": 249540
|
174696 |
-
},
|
174697 |
-
{
|
174698 |
-
"epoch": 1.0082135772492395,
|
174699 |
-
"grad_norm": 650.1827392578125,
|
174700 |
-
"learning_rate": 3.9478287725769337e-10,
|
174701 |
-
"loss": 53.6675,
|
174702 |
-
"step": 249550
|
174703 |
-
},
|
174704 |
-
{
|
174705 |
-
"epoch": 1.0082539785146072,
|
174706 |
-
"grad_norm": 313.34033203125,
|
174707 |
-
"learning_rate": 3.7743198068262986e-10,
|
174708 |
-
"loss": 51.5655,
|
174709 |
-
"step": 249560
|
174710 |
-
},
|
174711 |
-
{
|
174712 |
-
"epoch": 1.0082943797799746,
|
174713 |
-
"grad_norm": 754.9653930664062,
|
174714 |
-
"learning_rate": 3.604709870463907e-10,
|
174715 |
-
"loss": 60.8922,
|
174716 |
-
"step": 249570
|
174717 |
-
},
|
174718 |
-
{
|
174719 |
-
"epoch": 1.0083347810453422,
|
174720 |
-
"grad_norm": 707.226806640625,
|
174721 |
-
"learning_rate": 3.438998966776019e-10,
|
174722 |
-
"loss": 64.934,
|
174723 |
-
"step": 249580
|
174724 |
-
},
|
174725 |
-
{
|
174726 |
-
"epoch": 1.0083751823107099,
|
174727 |
-
"grad_norm": 1383.1534423828125,
|
174728 |
-
"learning_rate": 3.277187099026691e-10,
|
174729 |
-
"loss": 90.5521,
|
174730 |
-
"step": 249590
|
174731 |
-
},
|
174732 |
-
{
|
174733 |
-
"epoch": 1.0084155835760775,
|
174734 |
-
"grad_norm": 527.865966796875,
|
174735 |
-
"learning_rate": 3.11927427034675e-10,
|
174736 |
-
"loss": 70.7257,
|
174737 |
-
"step": 249600
|
174738 |
-
},
|
174739 |
-
{
|
174740 |
-
"epoch": 1.0084559848414452,
|
174741 |
-
"grad_norm": 454.8075256347656,
|
174742 |
-
"learning_rate": 2.965260483844823e-10,
|
174743 |
-
"loss": 71.8497,
|
174744 |
-
"step": 249610
|
174745 |
-
},
|
174746 |
-
{
|
174747 |
-
"epoch": 1.0084963861068128,
|
174748 |
-
"grad_norm": 461.6095275878906,
|
174749 |
-
"learning_rate": 2.8151457424741014e-10,
|
174750 |
-
"loss": 55.6032,
|
174751 |
-
"step": 249620
|
174752 |
-
},
|
174753 |
-
{
|
174754 |
-
"epoch": 1.0085367873721804,
|
174755 |
-
"grad_norm": 906.763916015625,
|
174756 |
-
"learning_rate": 2.668930049187779e-10,
|
174757 |
-
"loss": 84.5327,
|
174758 |
-
"step": 249630
|
174759 |
-
},
|
174760 |
-
{
|
174761 |
-
"epoch": 1.008577188637548,
|
174762 |
-
"grad_norm": 281.6214904785156,
|
174763 |
-
"learning_rate": 2.526613406850231e-10,
|
174764 |
-
"loss": 47.5958,
|
174765 |
-
"step": 249640
|
174766 |
-
},
|
174767 |
-
{
|
174768 |
-
"epoch": 1.0086175899029157,
|
174769 |
-
"grad_norm": 621.9278564453125,
|
174770 |
-
"learning_rate": 2.388195818214811e-10,
|
174771 |
-
"loss": 52.0255,
|
174772 |
-
"step": 249650
|
174773 |
-
},
|
174774 |
-
{
|
174775 |
-
"epoch": 1.0086579911682834,
|
174776 |
-
"grad_norm": 814.9580688476562,
|
174777 |
-
"learning_rate": 2.2536772859904632e-10,
|
174778 |
-
"loss": 50.8963,
|
174779 |
-
"step": 249660
|
174780 |
-
},
|
174781 |
-
{
|
174782 |
-
"epoch": 1.008698392433651,
|
174783 |
-
"grad_norm": 566.6548461914062,
|
174784 |
-
"learning_rate": 2.1230578127973134e-10,
|
174785 |
-
"loss": 70.7275,
|
174786 |
-
"step": 249670
|
174787 |
-
},
|
174788 |
-
{
|
174789 |
-
"epoch": 1.0087387936990186,
|
174790 |
-
"grad_norm": 1687.9730224609375,
|
174791 |
-
"learning_rate": 1.9963374011666703e-10,
|
174792 |
-
"loss": 78.5721,
|
174793 |
-
"step": 249680
|
174794 |
-
},
|
174795 |
-
{
|
174796 |
-
"epoch": 1.0087791949643863,
|
174797 |
-
"grad_norm": 486.9939270019531,
|
174798 |
-
"learning_rate": 1.873516053607638e-10,
|
174799 |
-
"loss": 57.9145,
|
174800 |
-
"step": 249690
|
174801 |
-
},
|
174802 |
-
{
|
174803 |
-
"epoch": 1.008819596229754,
|
174804 |
-
"grad_norm": 481.72943115234375,
|
174805 |
-
"learning_rate": 1.7545937724738894e-10,
|
174806 |
-
"loss": 57.3564,
|
174807 |
-
"step": 249700
|
174808 |
-
},
|
174809 |
-
{
|
174810 |
-
"epoch": 1.0088599974951216,
|
174811 |
-
"grad_norm": 582.765869140625,
|
174812 |
-
"learning_rate": 1.6395705600968926e-10,
|
174813 |
-
"loss": 66.8067,
|
174814 |
-
"step": 249710
|
174815 |
-
},
|
174816 |
-
{
|
174817 |
-
"epoch": 1.0089003987604892,
|
174818 |
-
"grad_norm": 722.7216186523438,
|
174819 |
-
"learning_rate": 1.5284464187192981e-10,
|
174820 |
-
"loss": 86.6796,
|
174821 |
-
"step": 249720
|
174822 |
-
},
|
174823 |
-
{
|
174824 |
-
"epoch": 1.0089408000258568,
|
174825 |
-
"grad_norm": 423.3017578125,
|
174826 |
-
"learning_rate": 1.421221350539348e-10,
|
174827 |
-
"loss": 36.3374,
|
174828 |
-
"step": 249730
|
174829 |
-
},
|
174830 |
-
{
|
174831 |
-
"epoch": 1.0089812012912245,
|
174832 |
-
"grad_norm": 765.08056640625,
|
174833 |
-
"learning_rate": 1.317895357599852e-10,
|
174834 |
-
"loss": 55.4276,
|
174835 |
-
"step": 249740
|
174836 |
-
},
|
174837 |
-
{
|
174838 |
-
"epoch": 1.0090216025565921,
|
174839 |
-
"grad_norm": 458.633544921875,
|
174840 |
-
"learning_rate": 1.2184684419214166e-10,
|
174841 |
-
"loss": 62.0634,
|
174842 |
-
"step": 249750
|
174843 |
-
},
|
174844 |
-
{
|
174845 |
-
"epoch": 1.0090620038219598,
|
174846 |
-
"grad_norm": 512.1096801757812,
|
174847 |
-
"learning_rate": 1.1229406054802383e-10,
|
174848 |
-
"loss": 72.1461,
|
174849 |
-
"step": 249760
|
174850 |
-
},
|
174851 |
-
{
|
174852 |
-
"epoch": 1.0091024050873274,
|
174853 |
-
"grad_norm": 575.4948120117188,
|
174854 |
-
"learning_rate": 1.0313118500970831e-10,
|
174855 |
-
"loss": 57.1718,
|
174856 |
-
"step": 249770
|
174857 |
-
},
|
174858 |
-
{
|
174859 |
-
"epoch": 1.009142806352695,
|
174860 |
-
"grad_norm": 362.7676086425781,
|
174861 |
-
"learning_rate": 9.435821775705123e-11,
|
174862 |
-
"loss": 44.035,
|
174863 |
-
"step": 249780
|
174864 |
-
},
|
174865 |
-
{
|
174866 |
-
"epoch": 1.0091832076180627,
|
174867 |
-
"grad_norm": 818.1105346679688,
|
174868 |
-
"learning_rate": 8.597515896324737e-11,
|
174869 |
-
"loss": 71.8585,
|
174870 |
-
"step": 249790
|
174871 |
-
},
|
174872 |
-
{
|
174873 |
-
"epoch": 1.0092236088834303,
|
174874 |
-
"grad_norm": 736.1803588867188,
|
174875 |
-
"learning_rate": 7.798200878816886e-11,
|
174876 |
-
"loss": 87.0728,
|
174877 |
-
"step": 249800
|
174878 |
-
},
|
174879 |
-
{
|
174880 |
-
"epoch": 1.009264010148798,
|
174881 |
-
"grad_norm": 216.5111541748047,
|
174882 |
-
"learning_rate": 7.03787673916878e-11,
|
174883 |
-
"loss": 63.7444,
|
174884 |
-
"step": 249810
|
174885 |
-
},
|
174886 |
-
{
|
174887 |
-
"epoch": 1.0093044114141656,
|
174888 |
-
"grad_norm": 384.854248046875,
|
174889 |
-
"learning_rate": 6.31654349181332e-11,
|
174890 |
-
"loss": 53.48,
|
174891 |
-
"step": 249820
|
174892 |
-
},
|
174893 |
-
{
|
174894 |
-
"epoch": 1.009344812679533,
|
174895 |
-
"grad_norm": 495.5076904296875,
|
174896 |
-
"learning_rate": 5.63420115096136e-11,
|
174897 |
-
"loss": 63.2487,
|
174898 |
-
"step": 249830
|
174899 |
-
},
|
174900 |
-
{
|
174901 |
-
"epoch": 1.0093852139449007,
|
174902 |
-
"grad_norm": 579.0645751953125,
|
174903 |
-
"learning_rate": 4.9908497301576206e-11,
|
174904 |
-
"loss": 50.9092,
|
174905 |
-
"step": 249840
|
174906 |
-
},
|
174907 |
-
{
|
174908 |
-
"epoch": 1.0094256152102683,
|
174909 |
-
"grad_norm": 675.76953125,
|
174910 |
-
"learning_rate": 4.386489241392511e-11,
|
174911 |
-
"loss": 63.6097,
|
174912 |
-
"step": 249850
|
174913 |
-
},
|
174914 |
-
{
|
174915 |
-
"epoch": 1.009466016475636,
|
174916 |
-
"grad_norm": 380.5902404785156,
|
174917 |
-
"learning_rate": 3.821119697100528e-11,
|
174918 |
-
"loss": 63.8182,
|
174919 |
-
"step": 249860
|
174920 |
-
},
|
174921 |
-
{
|
174922 |
-
"epoch": 1.0095064177410036,
|
174923 |
-
"grad_norm": 355.2414245605469,
|
174924 |
-
"learning_rate": 3.2947411077177694e-11,
|
174925 |
-
"loss": 55.9546,
|
174926 |
-
"step": 249870
|
174927 |
-
},
|
174928 |
-
{
|
174929 |
-
"epoch": 1.0095468190063712,
|
174930 |
-
"grad_norm": 267.0750427246094,
|
174931 |
-
"learning_rate": 2.8073534839023753e-11,
|
174932 |
-
"loss": 82.4405,
|
174933 |
-
"step": 249880
|
174934 |
-
},
|
174935 |
-
{
|
174936 |
-
"epoch": 1.0095872202717389,
|
174937 |
-
"grad_norm": 302.30224609375,
|
174938 |
-
"learning_rate": 2.358956835202264e-11,
|
174939 |
-
"loss": 58.0008,
|
174940 |
-
"step": 249890
|
174941 |
-
},
|
174942 |
-
{
|
174943 |
-
"epoch": 1.0096276215371065,
|
174944 |
-
"grad_norm": 669.5781860351562,
|
174945 |
-
"learning_rate": 1.949551169833086e-11,
|
174946 |
-
"loss": 48.5882,
|
174947 |
-
"step": 249900
|
174948 |
-
},
|
174949 |
-
{
|
174950 |
-
"epoch": 1.0096680228024741,
|
174951 |
-
"grad_norm": 618.9566040039062,
|
174952 |
-
"learning_rate": 1.579136496454581e-11,
|
174953 |
-
"loss": 39.3103,
|
174954 |
-
"step": 249910
|
174955 |
-
},
|
174956 |
-
{
|
174957 |
-
"epoch": 1.0097084240678418,
|
174958 |
-
"grad_norm": 2055.600830078125,
|
174959 |
-
"learning_rate": 1.2477128217280864e-11,
|
174960 |
-
"loss": 63.2549,
|
174961 |
-
"step": 249920
|
174962 |
-
},
|
174963 |
-
{
|
174964 |
-
"epoch": 1.0097488253332094,
|
174965 |
-
"grad_norm": 954.0150756835938,
|
174966 |
-
"learning_rate": 9.552801523149412e-12,
|
174967 |
-
"loss": 67.9715,
|
174968 |
-
"step": 249930
|
174969 |
-
},
|
174970 |
-
{
|
174971 |
-
"epoch": 1.009789226598577,
|
174972 |
-
"grad_norm": 901.2742309570312,
|
174973 |
-
"learning_rate": 7.018384942103496e-12,
|
174974 |
-
"loss": 66.1644,
|
174975 |
-
"step": 249940
|
174976 |
-
},
|
174977 |
-
{
|
174978 |
-
"epoch": 1.0098296278639447,
|
174979 |
-
"grad_norm": 663.9369506835938,
|
174980 |
-
"learning_rate": 4.873878518552033e-12,
|
174981 |
-
"loss": 60.1538,
|
174982 |
-
"step": 249950
|
174983 |
-
},
|
174984 |
-
{
|
174985 |
-
"epoch": 1.0098700291293123,
|
174986 |
-
"grad_norm": 809.4927978515625,
|
174987 |
-
"learning_rate": 3.119282296903947e-12,
|
174988 |
-
"loss": 78.8019,
|
174989 |
-
"step": 249960
|
174990 |
-
},
|
174991 |
-
{
|
174992 |
-
"epoch": 1.00991043039468,
|
174993 |
-
"grad_norm": 347.0174865722656,
|
174994 |
-
"learning_rate": 1.754596312686374e-12,
|
174995 |
-
"loss": 54.3162,
|
174996 |
-
"step": 249970
|
174997 |
-
},
|
174998 |
-
{
|
174999 |
-
"epoch": 1.0099508316600476,
|
175000 |
-
"grad_norm": 259.03460693359375,
|
175001 |
-
"learning_rate": 7.798205903242206e-13,
|
175002 |
-
"loss": 81.9569,
|
175003 |
-
"step": 249980
|
175004 |
-
},
|
175005 |
-
{
|
175006 |
-
"epoch": 1.0099912329254153,
|
175007 |
-
"grad_norm": 841.800537109375,
|
175008 |
-
"learning_rate": 1.9495514758105516e-13,
|
175009 |
-
"loss": 61.0793,
|
175010 |
-
"step": 249990
|
175011 |
-
},
|
175012 |
-
{
|
175013 |
-
"epoch": 1.010031634190783,
|
175014 |
-
"grad_norm": 511.8416442871094,
|
175015 |
-
"learning_rate": 0.0,
|
175016 |
-
"loss": 39.5704,
|
175017 |
-
"step": 250000
|
175018 |
}
|
175019 |
],
|
175020 |
"logging_steps": 10,
|
@@ -175029,7 +173629,7 @@
|
|
175029 |
"should_evaluate": false,
|
175030 |
"should_log": false,
|
175031 |
"should_save": true,
|
175032 |
-
"should_training_stop":
|
175033 |
},
|
175034 |
"attributes": {}
|
175035 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0019513811172567,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 248000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
173615 |
"learning_rate": 7.797699192912955e-09,
|
173616 |
"loss": 98.4398,
|
173617 |
"step": 248000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173618 |
}
|
173619 |
],
|
173620 |
"logging_steps": 10,
|
|
|
173629 |
"should_evaluate": false,
|
173630 |
"should_log": false,
|
173631 |
"should_save": true,
|
173632 |
+
"should_training_stop": false
|
173633 |
},
|
173634 |
"attributes": {}
|
173635 |
}
|