|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.976312925460403, |
|
"global_step": 1870000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.973325366509464e-05, |
|
"loss": 5.9346, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.946650733018929e-05, |
|
"loss": 4.8171, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.919976099528393e-05, |
|
"loss": 4.378, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.893301466037857e-05, |
|
"loss": 4.0841, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.866626832547321e-05, |
|
"loss": 3.8807, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.839952199056785e-05, |
|
"loss": 3.7164, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.8132775655662495e-05, |
|
"loss": 3.584, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.786602932075713e-05, |
|
"loss": 3.4784, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.759928298585178e-05, |
|
"loss": 3.3842, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.733253665094642e-05, |
|
"loss": 3.3004, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.7065790316041056e-05, |
|
"loss": 3.2337, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.6799043981135706e-05, |
|
"loss": 3.1757, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.653229764623034e-05, |
|
"loss": 3.1198, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.626555131132499e-05, |
|
"loss": 3.0722, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.5998804976419624e-05, |
|
"loss": 3.0281, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.573205864151427e-05, |
|
"loss": 2.9954, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.546531230660891e-05, |
|
"loss": 2.9605, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.519856597170355e-05, |
|
"loss": 2.9286, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.493181963679819e-05, |
|
"loss": 2.8946, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.4665073301892835e-05, |
|
"loss": 2.8688, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.439832696698747e-05, |
|
"loss": 2.8424, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.4131580632082116e-05, |
|
"loss": 2.8178, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.386483429717676e-05, |
|
"loss": 2.7994, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.35980879622714e-05, |
|
"loss": 2.784, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.333134162736604e-05, |
|
"loss": 2.7612, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.3064595292460684e-05, |
|
"loss": 2.7435, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.279784895755533e-05, |
|
"loss": 2.7275, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.2531102622649964e-05, |
|
"loss": 2.7117, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.226435628774461e-05, |
|
"loss": 2.6908, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.199760995283925e-05, |
|
"loss": 2.6787, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.173086361793389e-05, |
|
"loss": 2.6626, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 4.146411728302853e-05, |
|
"loss": 2.6431, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.1197370948123176e-05, |
|
"loss": 2.6329, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.093062461321781e-05, |
|
"loss": 2.6195, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 4.066387827831246e-05, |
|
"loss": 2.6042, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.03971319434071e-05, |
|
"loss": 2.5967, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.0130385608501744e-05, |
|
"loss": 2.5843, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.986363927359638e-05, |
|
"loss": 2.5683, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.9596892938691025e-05, |
|
"loss": 2.5605, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.933014660378567e-05, |
|
"loss": 2.5515, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.9063400268880305e-05, |
|
"loss": 2.5406, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.879665393397495e-05, |
|
"loss": 2.5315, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.852990759906959e-05, |
|
"loss": 2.5272, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.826316126416423e-05, |
|
"loss": 2.5199, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.799641492925887e-05, |
|
"loss": 2.5061, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.772966859435352e-05, |
|
"loss": 2.5007, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.746292225944816e-05, |
|
"loss": 2.4908, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.71961759245428e-05, |
|
"loss": 2.4817, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.6929429589637434e-05, |
|
"loss": 2.4731, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.6662683254732085e-05, |
|
"loss": 2.4659, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 3.639593691982672e-05, |
|
"loss": 2.4596, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.6129190584921365e-05, |
|
"loss": 2.4506, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.586244425001601e-05, |
|
"loss": 2.4425, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.5595697915110646e-05, |
|
"loss": 2.4378, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 3.532895158020529e-05, |
|
"loss": 2.4301, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.506220524529993e-05, |
|
"loss": 2.4195, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.479545891039458e-05, |
|
"loss": 2.4108, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.4528712575489214e-05, |
|
"loss": 2.4003, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 3.426196624058385e-05, |
|
"loss": 2.3948, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.39952199056785e-05, |
|
"loss": 2.3911, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 3.372847357077314e-05, |
|
"loss": 2.3805, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 3.346172723586778e-05, |
|
"loss": 2.3719, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.3194980900962426e-05, |
|
"loss": 2.3685, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 3.292823456605706e-05, |
|
"loss": 2.3668, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 3.2661488231151706e-05, |
|
"loss": 2.3584, |
|
"step": 650000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 3.239474189624634e-05, |
|
"loss": 2.3538, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 3.212799556134099e-05, |
|
"loss": 2.3481, |
|
"step": 670000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 3.186124922643563e-05, |
|
"loss": 2.3495, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 3.159450289153027e-05, |
|
"loss": 2.3413, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 3.132775655662492e-05, |
|
"loss": 2.3325, |
|
"step": 700000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 3.1061010221719555e-05, |
|
"loss": 2.3269, |
|
"step": 710000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 3.07942638868142e-05, |
|
"loss": 2.3255, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 3.052751755190884e-05, |
|
"loss": 2.3168, |
|
"step": 730000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 3.0260771217003482e-05, |
|
"loss": 2.3179, |
|
"step": 740000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.999402488209812e-05, |
|
"loss": 2.3098, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 2.972727854719276e-05, |
|
"loss": 2.2928, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 2.9460532212287407e-05, |
|
"loss": 2.2911, |
|
"step": 770000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 2.9193785877382047e-05, |
|
"loss": 2.2906, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 2.8927039542476687e-05, |
|
"loss": 2.2824, |
|
"step": 790000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.866029320757133e-05, |
|
"loss": 2.2766, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.839354687266597e-05, |
|
"loss": 2.2722, |
|
"step": 810000 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 2.812680053776061e-05, |
|
"loss": 2.2665, |
|
"step": 820000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 2.786005420285525e-05, |
|
"loss": 2.259, |
|
"step": 830000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 2.7593307867949895e-05, |
|
"loss": 2.2584, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 2.7326561533044536e-05, |
|
"loss": 2.2524, |
|
"step": 850000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 2.7059815198139176e-05, |
|
"loss": 2.2536, |
|
"step": 860000 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 2.6793068863233823e-05, |
|
"loss": 2.2446, |
|
"step": 870000 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 2.6526322528328463e-05, |
|
"loss": 2.2439, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2.6259576193423104e-05, |
|
"loss": 2.2389, |
|
"step": 890000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 2.5992829858517747e-05, |
|
"loss": 2.2362, |
|
"step": 900000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 2.5726083523612388e-05, |
|
"loss": 2.2313, |
|
"step": 910000 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 2.5459337188707028e-05, |
|
"loss": 2.2283, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 2.5192590853801668e-05, |
|
"loss": 2.2224, |
|
"step": 930000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 2.4925844518896312e-05, |
|
"loss": 2.2175, |
|
"step": 940000 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 2.4659098183990952e-05, |
|
"loss": 2.2067, |
|
"step": 950000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 2.4392351849085596e-05, |
|
"loss": 2.2029, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 2.4125605514180236e-05, |
|
"loss": 2.2014, |
|
"step": 970000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 2.385885917927488e-05, |
|
"loss": 2.2002, |
|
"step": 980000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 2.3592112844369517e-05, |
|
"loss": 2.195, |
|
"step": 990000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 2.332536650946416e-05, |
|
"loss": 2.1898, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 2.3058620174558804e-05, |
|
"loss": 2.1864, |
|
"step": 1010000 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 2.2791873839653444e-05, |
|
"loss": 2.1833, |
|
"step": 1020000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 2.2525127504748088e-05, |
|
"loss": 2.1799, |
|
"step": 1030000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 2.2258381169842725e-05, |
|
"loss": 2.1785, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 2.199163483493737e-05, |
|
"loss": 2.1756, |
|
"step": 1050000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 2.1724888500032012e-05, |
|
"loss": 2.1726, |
|
"step": 1060000 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 2.1458142165126653e-05, |
|
"loss": 2.1664, |
|
"step": 1070000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 2.1191395830221293e-05, |
|
"loss": 2.1651, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 2.0924649495315933e-05, |
|
"loss": 2.1577, |
|
"step": 1090000 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 2.0657903160410577e-05, |
|
"loss": 2.1472, |
|
"step": 1100000 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 2.039115682550522e-05, |
|
"loss": 2.1458, |
|
"step": 1110000 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 2.012441049059986e-05, |
|
"loss": 2.1432, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 1.98576641556945e-05, |
|
"loss": 2.134, |
|
"step": 1130000 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.959091782078914e-05, |
|
"loss": 2.1337, |
|
"step": 1140000 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 1.9324171485883785e-05, |
|
"loss": 2.1337, |
|
"step": 1150000 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 1.9057425150978425e-05, |
|
"loss": 2.128, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 1.879067881607307e-05, |
|
"loss": 2.1262, |
|
"step": 1170000 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 1.852393248116771e-05, |
|
"loss": 2.1209, |
|
"step": 1180000 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 1.825718614626235e-05, |
|
"loss": 2.1178, |
|
"step": 1190000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.7990439811356993e-05, |
|
"loss": 2.1179, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 1.7723693476451634e-05, |
|
"loss": 2.1123, |
|
"step": 1210000 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 1.7456947141546277e-05, |
|
"loss": 2.1098, |
|
"step": 1220000 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 1.7190200806640918e-05, |
|
"loss": 2.1115, |
|
"step": 1230000 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 1.6923454471735558e-05, |
|
"loss": 2.1068, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1.66567081368302e-05, |
|
"loss": 2.1002, |
|
"step": 1250000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 1.6389961801924842e-05, |
|
"loss": 2.0962, |
|
"step": 1260000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1.6123215467019486e-05, |
|
"loss": 2.0961, |
|
"step": 1270000 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 1.5856469132114126e-05, |
|
"loss": 2.0911, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 1.5589722797208766e-05, |
|
"loss": 2.0916, |
|
"step": 1290000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 1.532297646230341e-05, |
|
"loss": 2.0854, |
|
"step": 1300000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 1.505623012739805e-05, |
|
"loss": 2.0846, |
|
"step": 1310000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 1.4789483792492692e-05, |
|
"loss": 2.072, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 1.4522737457587332e-05, |
|
"loss": 2.0691, |
|
"step": 1330000 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 1.4255991122681974e-05, |
|
"loss": 2.072, |
|
"step": 1340000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 1.3989244787776618e-05, |
|
"loss": 2.0669, |
|
"step": 1350000 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 1.3722498452871257e-05, |
|
"loss": 2.0646, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 1.34557521179659e-05, |
|
"loss": 2.0573, |
|
"step": 1370000 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 1.318900578306054e-05, |
|
"loss": 2.0487, |
|
"step": 1380000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 1.2922259448155183e-05, |
|
"loss": 2.0529, |
|
"step": 1390000 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 1.2655513113249826e-05, |
|
"loss": 2.0468, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 1.2388766778344465e-05, |
|
"loss": 2.0455, |
|
"step": 1410000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 1.2122020443439109e-05, |
|
"loss": 2.046, |
|
"step": 1420000 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 1.1855274108533749e-05, |
|
"loss": 2.0398, |
|
"step": 1430000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 1.158852777362839e-05, |
|
"loss": 2.0391, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 1.1321781438723033e-05, |
|
"loss": 2.0345, |
|
"step": 1450000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 1.1055035103817673e-05, |
|
"loss": 2.038, |
|
"step": 1460000 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 1.0788288768912317e-05, |
|
"loss": 2.032, |
|
"step": 1470000 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 1.0521542434006957e-05, |
|
"loss": 2.0271, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 1.0254796099101599e-05, |
|
"loss": 2.0261, |
|
"step": 1490000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 9.988049764196241e-06, |
|
"loss": 2.0235, |
|
"step": 1500000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 9.721303429290881e-06, |
|
"loss": 2.0173, |
|
"step": 1510000 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 9.454557094385523e-06, |
|
"loss": 2.0178, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 9.187810759480165e-06, |
|
"loss": 2.0116, |
|
"step": 1530000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 8.921064424574807e-06, |
|
"loss": 2.0099, |
|
"step": 1540000 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 8.654318089669448e-06, |
|
"loss": 2.0105, |
|
"step": 1550000 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 8.38757175476409e-06, |
|
"loss": 2.0033, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 8.120825419858732e-06, |
|
"loss": 2.0033, |
|
"step": 1570000 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 7.854079084953374e-06, |
|
"loss": 2.0, |
|
"step": 1580000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 7.5873327500480155e-06, |
|
"loss": 1.9954, |
|
"step": 1590000 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 7.320586415142657e-06, |
|
"loss": 1.9964, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 7.053840080237298e-06, |
|
"loss": 1.9995, |
|
"step": 1610000 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 6.787093745331939e-06, |
|
"loss": 1.9878, |
|
"step": 1620000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 6.52034741042658e-06, |
|
"loss": 1.988, |
|
"step": 1630000 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 6.253601075521223e-06, |
|
"loss": 1.9861, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 5.986854740615865e-06, |
|
"loss": 1.9828, |
|
"step": 1650000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 5.720108405710506e-06, |
|
"loss": 1.9787, |
|
"step": 1660000 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 5.453362070805147e-06, |
|
"loss": 1.9794, |
|
"step": 1670000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 5.186615735899789e-06, |
|
"loss": 1.9756, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.91986940099443e-06, |
|
"loss": 1.9756, |
|
"step": 1690000 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 4.653123066089072e-06, |
|
"loss": 1.9676, |
|
"step": 1700000 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 4.386376731183714e-06, |
|
"loss": 1.967, |
|
"step": 1710000 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 4.119630396278355e-06, |
|
"loss": 1.9656, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 3.8528840613729966e-06, |
|
"loss": 1.9627, |
|
"step": 1730000 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 3.5861377264676385e-06, |
|
"loss": 1.964, |
|
"step": 1740000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 3.31939139156228e-06, |
|
"loss": 1.9646, |
|
"step": 1750000 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 3.0526450566569217e-06, |
|
"loss": 1.9591, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 2.7858987217515632e-06, |
|
"loss": 1.9623, |
|
"step": 1770000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 2.519152386846205e-06, |
|
"loss": 1.9544, |
|
"step": 1780000 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 2.2524060519408464e-06, |
|
"loss": 1.9572, |
|
"step": 1790000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 1.985659717035488e-06, |
|
"loss": 1.9545, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 1.7189133821301297e-06, |
|
"loss": 1.953, |
|
"step": 1810000 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 1.4521670472247713e-06, |
|
"loss": 1.9523, |
|
"step": 1820000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 1.1854207123194128e-06, |
|
"loss": 1.9528, |
|
"step": 1830000 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 9.186743774140545e-07, |
|
"loss": 1.9482, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 6.519280425086959e-07, |
|
"loss": 1.9511, |
|
"step": 1850000 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 3.8518170760333756e-07, |
|
"loss": 1.947, |
|
"step": 1860000 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 1.1843537269797913e-07, |
|
"loss": 1.9437, |
|
"step": 1870000 |
|
} |
|
], |
|
"max_steps": 1874440, |
|
"num_train_epochs": 10, |
|
"total_flos": 9.135177187546945e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|