|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.5449260292101905, |
|
"eval_steps": 5290, |
|
"global_step": 75000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.797255277633667, |
|
"eval_runtime": 160.9316, |
|
"eval_samples_per_second": 5.773, |
|
"eval_steps_per_second": 5.773, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9999916262476826e-06, |
|
"loss": 2.339, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9999626800634057e-06, |
|
"loss": 2.1788, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9999130584664085e-06, |
|
"loss": 2.1946, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9998427621406735e-06, |
|
"loss": 2.2431, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9997517920551614e-06, |
|
"loss": 2.1155, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9996401494637996e-06, |
|
"loss": 2.0998, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9995078359054642e-06, |
|
"loss": 2.0592, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.999354853203959e-06, |
|
"loss": 2.0821, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9991812034679892e-06, |
|
"loss": 1.8844, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9989868890911354e-06, |
|
"loss": 2.1784, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9987719127518173e-06, |
|
"loss": 2.0341, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9985362774132576e-06, |
|
"loss": 2.1155, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9982799863234435e-06, |
|
"loss": 2.0074, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.998003043015078e-06, |
|
"loss": 2.0324, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9977054513055346e-06, |
|
"loss": 1.9387, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.997387215296803e-06, |
|
"loss": 2.0548, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.997048339375433e-06, |
|
"loss": 2.0709, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9966888282124733e-06, |
|
"loss": 2.009, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9963086867634087e-06, |
|
"loss": 1.9616, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9959079202680905e-06, |
|
"loss": 1.983, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9954865342506646e-06, |
|
"loss": 2.0902, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9950445345194956e-06, |
|
"loss": 2.1337, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.994581927167085e-06, |
|
"loss": 1.9246, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.994098718569992e-06, |
|
"loss": 2.0217, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9935949153887393e-06, |
|
"loss": 2.0509, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.993070524567726e-06, |
|
"loss": 2.013, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.992525553335129e-06, |
|
"loss": 1.8444, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.991960009202806e-06, |
|
"loss": 1.9667, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.9913738999661895e-06, |
|
"loss": 1.9942, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.990767233704181e-06, |
|
"loss": 1.975, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.9901400187790383e-06, |
|
"loss": 2.015, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.989492263836262e-06, |
|
"loss": 2.122, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.9888239778044748e-06, |
|
"loss": 1.8877, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.988135169895298e-06, |
|
"loss": 2.0659, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.9874258496032273e-06, |
|
"loss": 1.8897, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.9866960267054987e-06, |
|
"loss": 1.9466, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.985945711261956e-06, |
|
"loss": 1.9438, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.9851749136149105e-06, |
|
"loss": 2.0251, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.984383644388999e-06, |
|
"loss": 2.0244, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.9835719144910395e-06, |
|
"loss": 1.9022, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.982739735109876e-06, |
|
"loss": 2.0163, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.98188711771623e-06, |
|
"loss": 1.9168, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.9810140740625364e-06, |
|
"loss": 1.9695, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.9801206161827883e-06, |
|
"loss": 1.9114, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.9792067563923653e-06, |
|
"loss": 2.0469, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.9782725072878657e-06, |
|
"loss": 1.8072, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.9773178817469342e-06, |
|
"loss": 1.8899, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.976342892928083e-06, |
|
"loss": 1.9418, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.9753475542705106e-06, |
|
"loss": 2.1559, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.974331879493916e-06, |
|
"loss": 2.001, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.973295882598313e-06, |
|
"loss": 2.051, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.9722395778638296e-06, |
|
"loss": 1.9767, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.4832066297531128, |
|
"eval_runtime": 162.8547, |
|
"eval_samples_per_second": 5.704, |
|
"eval_steps_per_second": 5.704, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.971162979850521e-06, |
|
"loss": 1.8538, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.9700661033981615e-06, |
|
"loss": 1.7968, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.9689489636260424e-06, |
|
"loss": 1.7703, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.967811575932764e-06, |
|
"loss": 1.9824, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.9666539559960238e-06, |
|
"loss": 1.9332, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.965476119772398e-06, |
|
"loss": 1.8362, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.964278083497125e-06, |
|
"loss": 1.7958, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.963059863683877e-06, |
|
"loss": 1.7677, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.9618214771245376e-06, |
|
"loss": 2.0132, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.9605629408889673e-06, |
|
"loss": 1.8406, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.9592842723247676e-06, |
|
"loss": 2.0235, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.9579854890570448e-06, |
|
"loss": 1.9383, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.956666608988164e-06, |
|
"loss": 1.8556, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.9553276502975034e-06, |
|
"loss": 1.8689, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.9539686314412053e-06, |
|
"loss": 2.0381, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.9525895711519195e-06, |
|
"loss": 2.0205, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.951190488438546e-06, |
|
"loss": 1.8647, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.9497714025859727e-06, |
|
"loss": 1.8074, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.94833233315481e-06, |
|
"loss": 1.9039, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.9468732999811216e-06, |
|
"loss": 2.0103, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.94539432317615e-06, |
|
"loss": 1.9635, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.943895423126038e-06, |
|
"loss": 1.8708, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.942376620491553e-06, |
|
"loss": 1.7572, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.940837936207796e-06, |
|
"loss": 1.9795, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.9392793914839165e-06, |
|
"loss": 2.0192, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.937701007802819e-06, |
|
"loss": 1.8849, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.9361028069208675e-06, |
|
"loss": 1.9925, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.934484810867586e-06, |
|
"loss": 2.004, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.9328470419453527e-06, |
|
"loss": 1.9084, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.9311895227290954e-06, |
|
"loss": 1.8507, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.929512276065978e-06, |
|
"loss": 1.8185, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.9278153250750875e-06, |
|
"loss": 1.7862, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.9260986931471136e-06, |
|
"loss": 1.8444, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.924362403944027e-06, |
|
"loss": 2.0304, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.922606481398755e-06, |
|
"loss": 1.9337, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.920830949714848e-06, |
|
"loss": 1.9937, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.919035833366148e-06, |
|
"loss": 1.9554, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.917221157096452e-06, |
|
"loss": 1.9068, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.9153869459191693e-06, |
|
"loss": 1.9063, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.913533225116978e-06, |
|
"loss": 1.9342, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.9116600202414754e-06, |
|
"loss": 2.0052, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.9097673571128266e-06, |
|
"loss": 1.8102, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.9078552618194086e-06, |
|
"loss": 1.9959, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.9059237607174494e-06, |
|
"loss": 1.9136, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9039728804306666e-06, |
|
"loss": 1.9124, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9020026478498988e-06, |
|
"loss": 1.9215, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9000130901327377e-06, |
|
"loss": 1.93, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.8980042347031482e-06, |
|
"loss": 1.82, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8959761092510978e-06, |
|
"loss": 1.8436, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8939287417321676e-06, |
|
"loss": 1.8995, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8918621603671737e-06, |
|
"loss": 1.9337, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8897763936417715e-06, |
|
"loss": 1.9088, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.88767147030607e-06, |
|
"loss": 1.8474, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.4355759620666504, |
|
"eval_runtime": 163.1072, |
|
"eval_samples_per_second": 5.696, |
|
"eval_steps_per_second": 5.696, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.885547419374229e-06, |
|
"loss": 1.9638, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.883404270124063e-06, |
|
"loss": 1.9945, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.881242052096638e-06, |
|
"loss": 1.8143, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.879060795095863e-06, |
|
"loss": 1.7915, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.8768605291880767e-06, |
|
"loss": 1.8868, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.8746412847016387e-06, |
|
"loss": 1.8033, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.8724030922265068e-06, |
|
"loss": 2.0053, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.870145982613818e-06, |
|
"loss": 1.867, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.867869986975461e-06, |
|
"loss": 1.8002, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.865575136683649e-06, |
|
"loss": 1.8835, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.863261463370487e-06, |
|
"loss": 1.7312, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.8609289989275353e-06, |
|
"loss": 1.8402, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.858577775505371e-06, |
|
"loss": 1.9007, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.856207825513144e-06, |
|
"loss": 1.8235, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.853819181618129e-06, |
|
"loss": 1.8568, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.851411876745278e-06, |
|
"loss": 1.9159, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.848985944076763e-06, |
|
"loss": 1.9857, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.846541417051524e-06, |
|
"loss": 1.8676, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.8440783293648015e-06, |
|
"loss": 1.8022, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.8415967149676773e-06, |
|
"loss": 1.8365, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.8390966080666035e-06, |
|
"loss": 1.8702, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.8365780431229317e-06, |
|
"loss": 1.8221, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.8340410548524395e-06, |
|
"loss": 1.8498, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.8314856782248494e-06, |
|
"loss": 1.8906, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.8289119484633485e-06, |
|
"loss": 2.0184, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.8263199010441038e-06, |
|
"loss": 1.8205, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.82370957169577e-06, |
|
"loss": 1.9686, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.8210809963990004e-06, |
|
"loss": 1.7651, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.8184342113859494e-06, |
|
"loss": 1.8216, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.815769253139773e-06, |
|
"loss": 1.8081, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.813086158394126e-06, |
|
"loss": 1.7233, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.8103849641326563e-06, |
|
"loss": 1.8446, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.807665707588494e-06, |
|
"loss": 1.8379, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.8049284262437393e-06, |
|
"loss": 1.8149, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.802173157828946e-06, |
|
"loss": 1.9463, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.799399940322599e-06, |
|
"loss": 1.8382, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.7966088119505945e-06, |
|
"loss": 1.8039, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.79379981118571e-06, |
|
"loss": 2.0244, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.7909729767470757e-06, |
|
"loss": 1.8587, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.7881283475996405e-06, |
|
"loss": 1.8551, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.7852659629536335e-06, |
|
"loss": 1.9153, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.782385862264027e-06, |
|
"loss": 1.7548, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.779488085229987e-06, |
|
"loss": 1.8052, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.7765726717943334e-06, |
|
"loss": 1.7594, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.773639662142983e-06, |
|
"loss": 1.8186, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.770689096704397e-06, |
|
"loss": 1.9036, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.7677210161490276e-06, |
|
"loss": 1.8217, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.7647354613887523e-06, |
|
"loss": 1.8397, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.761732473576313e-06, |
|
"loss": 1.7251, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.7587120941047475e-06, |
|
"loss": 1.8731, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.7556743646068202e-06, |
|
"loss": 1.805, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.752619326954447e-06, |
|
"loss": 1.8677, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.749547023258118e-06, |
|
"loss": 1.8121, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.4021737575531006, |
|
"eval_runtime": 163.1438, |
|
"eval_samples_per_second": 5.694, |
|
"eval_steps_per_second": 5.694, |
|
"step": 15870 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.7464574958663186e-06, |
|
"loss": 1.8015, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.743350787364944e-06, |
|
"loss": 1.7014, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.7402269405767133e-06, |
|
"loss": 1.7616, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.7370859985605794e-06, |
|
"loss": 1.7529, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.7339280046111336e-06, |
|
"loss": 1.7992, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.7307530022580115e-06, |
|
"loss": 1.5267, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.7275610352652913e-06, |
|
"loss": 1.6973, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.7243521476308908e-06, |
|
"loss": 1.813, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.721126383585962e-06, |
|
"loss": 1.842, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.7178837875942787e-06, |
|
"loss": 1.9349, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.7146244043516273e-06, |
|
"loss": 1.7218, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.7113482787851883e-06, |
|
"loss": 1.8096, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.7080554560529164e-06, |
|
"loss": 1.7827, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.7047459815429214e-06, |
|
"loss": 1.7434, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.7014199008728377e-06, |
|
"loss": 1.8203, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.698077259889201e-06, |
|
"loss": 1.7201, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.6947181046668113e-06, |
|
"loss": 1.8474, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.691342481508102e-06, |
|
"loss": 1.7868, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.6879504369424983e-06, |
|
"loss": 1.7272, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.6845420177257774e-06, |
|
"loss": 1.8764, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.6811172708394243e-06, |
|
"loss": 1.5964, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.6776762434899845e-06, |
|
"loss": 1.7725, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.6742189831084106e-06, |
|
"loss": 1.8118, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.6707455373494125e-06, |
|
"loss": 1.6714, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.667255954090798e-06, |
|
"loss": 1.7673, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6637502814328124e-06, |
|
"loss": 1.8517, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6602285676974786e-06, |
|
"loss": 1.7459, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.6566908614279262e-06, |
|
"loss": 1.8677, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.6531372113877273e-06, |
|
"loss": 1.8378, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.649567666560222e-06, |
|
"loss": 1.7712, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.645982276147842e-06, |
|
"loss": 1.7846, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.6423810895714345e-06, |
|
"loss": 1.7452, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.6387641564695807e-06, |
|
"loss": 1.8064, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.635131526697911e-06, |
|
"loss": 1.6403, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.631483250328417e-06, |
|
"loss": 1.7232, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.627819377648764e-06, |
|
"loss": 1.8836, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.6241399591615938e-06, |
|
"loss": 1.8373, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.620445045583833e-06, |
|
"loss": 1.7807, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.6167346878459907e-06, |
|
"loss": 1.8299, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.6130089370914575e-06, |
|
"loss": 1.8572, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.609267844675801e-06, |
|
"loss": 1.651, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.605511462166057e-06, |
|
"loss": 1.8989, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.6017398413400198e-06, |
|
"loss": 1.8421, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.597953034185528e-06, |
|
"loss": 1.8114, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.5941510928997473e-06, |
|
"loss": 1.8759, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.590334069888451e-06, |
|
"loss": 1.8544, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.5865020177652995e-06, |
|
"loss": 1.717, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.5826549893511133e-06, |
|
"loss": 1.7786, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.578793037673145e-06, |
|
"loss": 1.6818, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.574916215964348e-06, |
|
"loss": 1.6679, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.5710245776626463e-06, |
|
"loss": 1.8773, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.5671181764101916e-06, |
|
"loss": 1.6672, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.56319706605263e-06, |
|
"loss": 1.8333, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.367815613746643, |
|
"eval_runtime": 161.7042, |
|
"eval_samples_per_second": 5.745, |
|
"eval_steps_per_second": 5.745, |
|
"step": 21160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.5592613006383554e-06, |
|
"loss": 1.873, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.5553109344177676e-06, |
|
"loss": 1.7398, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.5513460218425225e-06, |
|
"loss": 1.8562, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.5473666175647824e-06, |
|
"loss": 1.8687, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.543372776436463e-06, |
|
"loss": 1.8159, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.539364553508476e-06, |
|
"loss": 1.7736, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 2.5353420040299714e-06, |
|
"loss": 1.8746, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.531305183447576e-06, |
|
"loss": 1.7582, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.527254147404629e-06, |
|
"loss": 1.9113, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.5231889517404136e-06, |
|
"loss": 1.8019, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.5191096524893894e-06, |
|
"loss": 1.8494, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.5150163058804203e-06, |
|
"loss": 1.698, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.5109089683359967e-06, |
|
"loss": 1.7218, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.5067876964714582e-06, |
|
"loss": 1.7944, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.502652547094218e-06, |
|
"loss": 1.8057, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.4985035772029737e-06, |
|
"loss": 1.677, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.4943408439869243e-06, |
|
"loss": 1.8319, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.490164404824983e-06, |
|
"loss": 1.742, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.485974317284983e-06, |
|
"loss": 1.7521, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.4817706391228884e-06, |
|
"loss": 1.8927, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.4775534282819945e-06, |
|
"loss": 1.6825, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.473322742892131e-06, |
|
"loss": 1.7289, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.4690786412688594e-06, |
|
"loss": 1.8572, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.4648211819126706e-06, |
|
"loss": 1.7959, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.460550423508178e-06, |
|
"loss": 1.765, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.4562664249233064e-06, |
|
"loss": 1.7334, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.451969245208486e-06, |
|
"loss": 1.6651, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.4476589435958323e-06, |
|
"loss": 1.7472, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 2.4433355794983336e-06, |
|
"loss": 1.8278, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.43899921250903e-06, |
|
"loss": 1.6537, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.4346499024001946e-06, |
|
"loss": 1.6281, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.430287709122506e-06, |
|
"loss": 1.8405, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.425912692804224e-06, |
|
"loss": 1.7661, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.4215249137503624e-06, |
|
"loss": 1.7644, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.417124432441853e-06, |
|
"loss": 1.6826, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.412711309534717e-06, |
|
"loss": 1.7262, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.4082856058592265e-06, |
|
"loss": 1.8845, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.4038473824190656e-06, |
|
"loss": 1.922, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.399396700390491e-06, |
|
"loss": 1.703, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.394933621121487e-06, |
|
"loss": 1.83, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.3904582061309217e-06, |
|
"loss": 1.6753, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.3859705171076983e-06, |
|
"loss": 1.8203, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.3814706159099038e-06, |
|
"loss": 1.7362, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.376958564563958e-06, |
|
"loss": 1.8836, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.372434425263757e-06, |
|
"loss": 1.7072, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.367898260369818e-06, |
|
"loss": 1.6916, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.3633501324084165e-06, |
|
"loss": 1.6549, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.358790104070728e-06, |
|
"loss": 1.7526, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.354218238211962e-06, |
|
"loss": 1.7785, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.349634597850495e-06, |
|
"loss": 1.7332, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.3450392461670026e-06, |
|
"loss": 1.7434, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.3404322465035903e-06, |
|
"loss": 1.8742, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.3358136623629167e-06, |
|
"loss": 1.6601, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 1.3507641553878784, |
|
"eval_runtime": 162.7404, |
|
"eval_samples_per_second": 5.708, |
|
"eval_steps_per_second": 5.708, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.331183557407322e-06, |
|
"loss": 1.7639, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.3265419954579467e-06, |
|
"loss": 1.849, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.321889040493856e-06, |
|
"loss": 1.9006, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.317224756651156e-06, |
|
"loss": 1.6524, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.3125492082221074e-06, |
|
"loss": 1.8237, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.307862459654243e-06, |
|
"loss": 1.7348, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.303164575549478e-06, |
|
"loss": 1.6887, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.298455620663217e-06, |
|
"loss": 1.7558, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.293735659903468e-06, |
|
"loss": 1.8181, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.2890047583299385e-06, |
|
"loss": 1.7344, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.284262981153147e-06, |
|
"loss": 1.8456, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.27951039373352e-06, |
|
"loss": 1.711, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.2747470615804907e-06, |
|
"loss": 1.7673, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.269973050351599e-06, |
|
"loss": 1.7957, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.265188425851583e-06, |
|
"loss": 1.6838, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.260393254031475e-06, |
|
"loss": 1.6342, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.2555876009876904e-06, |
|
"loss": 1.8296, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.250771532961118e-06, |
|
"loss": 1.7831, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.2459451163362036e-06, |
|
"loss": 1.7551, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.241108417640041e-06, |
|
"loss": 1.708, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.2362615035414496e-06, |
|
"loss": 1.7695, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.231404440850058e-06, |
|
"loss": 1.6231, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.2265372965153827e-06, |
|
"loss": 1.7269, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.2216601376259044e-06, |
|
"loss": 1.6641, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.2167730314081447e-06, |
|
"loss": 1.7724, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.211876045225738e-06, |
|
"loss": 1.909, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.2069692465785034e-06, |
|
"loss": 1.7163, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.202052703101516e-06, |
|
"loss": 1.857, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.1971264825641716e-06, |
|
"loss": 1.6806, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.1921906528692556e-06, |
|
"loss": 1.7828, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.187245282052004e-06, |
|
"loss": 1.7669, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.1822904382791686e-06, |
|
"loss": 1.7001, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.1773261898480747e-06, |
|
"loss": 1.6504, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.172352605185682e-06, |
|
"loss": 1.6888, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.167369752847639e-06, |
|
"loss": 1.6804, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.162377701517341e-06, |
|
"loss": 1.5615, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.1573765200049817e-06, |
|
"loss": 1.6089, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.1523662772466025e-06, |
|
"loss": 1.7575, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.1473470423031475e-06, |
|
"loss": 1.6443, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.1423188843595067e-06, |
|
"loss": 1.6201, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.1372818727235653e-06, |
|
"loss": 1.7594, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.132236076825247e-06, |
|
"loss": 1.6505, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.127181566215557e-06, |
|
"loss": 1.8139, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.122118410565624e-06, |
|
"loss": 1.738, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.11704667966574e-06, |
|
"loss": 1.693, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.111966443424397e-06, |
|
"loss": 1.8003, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.1068777718673254e-06, |
|
"loss": 1.8407, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.101780735136526e-06, |
|
"loss": 1.5816, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.0966754034893047e-06, |
|
"loss": 1.6609, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.0915618472973062e-06, |
|
"loss": 1.7292, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.0864401370455406e-06, |
|
"loss": 1.7347, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.081310343331413e-06, |
|
"loss": 1.748, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.0761725368637496e-06, |
|
"loss": 1.5452, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 1.3357341289520264, |
|
"eval_runtime": 162.3538, |
|
"eval_samples_per_second": 5.722, |
|
"eval_steps_per_second": 5.722, |
|
"step": 31740 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.0710267884618273e-06, |
|
"loss": 1.6686, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.0658731690543905e-06, |
|
"loss": 1.72, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.0607117496786794e-06, |
|
"loss": 1.7252, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.0555426014794477e-06, |
|
"loss": 1.6562, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.050365795707983e-06, |
|
"loss": 1.6878, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.0451814037211256e-06, |
|
"loss": 1.7308, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.0399894969802814e-06, |
|
"loss": 1.6544, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.034790147050442e-06, |
|
"loss": 1.7115, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.0295834255991927e-06, |
|
"loss": 1.8076, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.024369404395731e-06, |
|
"loss": 1.6923, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.01914815530987e-06, |
|
"loss": 1.8198, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.013919750311055e-06, |
|
"loss": 1.5914, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.008684261467365e-06, |
|
"loss": 1.7334, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.003441760944525e-06, |
|
"loss": 1.6914, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.998192321004908e-06, |
|
"loss": 1.5967, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.992936014006538e-06, |
|
"loss": 1.6271, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.9876729124020963e-06, |
|
"loss": 1.5439, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 1.982403088737918e-06, |
|
"loss": 1.5242, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.977126615652999e-06, |
|
"loss": 1.7863, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.9718435658779864e-06, |
|
"loss": 1.7852, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.9665540122341817e-06, |
|
"loss": 1.7474, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.9612580276325363e-06, |
|
"loss": 1.818, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.9559556850726433e-06, |
|
"loss": 1.8187, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.9506470576417362e-06, |
|
"loss": 1.6308, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.9453322185136772e-06, |
|
"loss": 1.5877, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.9400112409479507e-06, |
|
"loss": 1.5775, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.9346841982886527e-06, |
|
"loss": 1.6369, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.929351163963481e-06, |
|
"loss": 1.7436, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.924012211482721e-06, |
|
"loss": 1.7817, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.918667414438235e-06, |
|
"loss": 1.7958, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.9133168465024454e-06, |
|
"loss": 1.6632, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.907960581427321e-06, |
|
"loss": 1.7518, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.9025986930433594e-06, |
|
"loss": 1.7184, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.8972312552585695e-06, |
|
"loss": 1.6154, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.891858342057453e-06, |
|
"loss": 1.7069, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.8864800274999842e-06, |
|
"loss": 1.6902, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.8810963857205902e-06, |
|
"loss": 1.6736, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.8757074909271275e-06, |
|
"loss": 1.7893, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.8703134173998603e-06, |
|
"loss": 1.7374, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.864914239490436e-06, |
|
"loss": 1.7173, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.8595100316208608e-06, |
|
"loss": 1.6844, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.854100868282473e-06, |
|
"loss": 1.6794, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.8486868240349173e-06, |
|
"loss": 1.65, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.8432679735051177e-06, |
|
"loss": 1.6641, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.8378443913862453e-06, |
|
"loss": 1.6942, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.8324161524366935e-06, |
|
"loss": 1.782, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.8269833314790437e-06, |
|
"loss": 1.5728, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.8215460033990368e-06, |
|
"loss": 1.6751, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.8161042431445376e-06, |
|
"loss": 1.5691, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.8106581257245064e-06, |
|
"loss": 1.7601, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.8052077262079612e-06, |
|
"loss": 1.6157, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 1.799753119722943e-06, |
|
"loss": 1.7615, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.7942943814554837e-06, |
|
"loss": 1.7381, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.319101095199585, |
|
"eval_runtime": 162.3139, |
|
"eval_samples_per_second": 5.723, |
|
"eval_steps_per_second": 5.723, |
|
"step": 37030 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.7888315866485659e-06, |
|
"loss": 1.7177, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.7833648106010884e-06, |
|
"loss": 1.7527, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.7778941286668257e-06, |
|
"loss": 1.6938, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.772419616253393e-06, |
|
"loss": 1.7706, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.7669413488212027e-06, |
|
"loss": 1.6078, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.761459401882427e-06, |
|
"loss": 1.6867, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.755973850999957e-06, |
|
"loss": 1.6677, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.750484771786358e-06, |
|
"loss": 1.6582, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.7449922399028333e-06, |
|
"loss": 1.6047, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.7394963310581735e-06, |
|
"loss": 1.8746, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.733997121007721e-06, |
|
"loss": 1.549, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.7284946855523186e-06, |
|
"loss": 1.7323, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.7229891005372704e-06, |
|
"loss": 1.734, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.7174804418512918e-06, |
|
"loss": 1.6329, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.7119687854254674e-06, |
|
"loss": 1.5707, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.7064542072322015e-06, |
|
"loss": 1.7011, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.7009367832841715e-06, |
|
"loss": 1.6164, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.6954165896332817e-06, |
|
"loss": 1.6312, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.6898937023696123e-06, |
|
"loss": 1.7649, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.6843681976203744e-06, |
|
"loss": 1.6634, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.6788401515488557e-06, |
|
"loss": 1.6431, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.673309640353376e-06, |
|
"loss": 1.7147, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.6677767402662318e-06, |
|
"loss": 1.881, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.6622415275526502e-06, |
|
"loss": 1.6384, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.6567040785097333e-06, |
|
"loss": 1.6662, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.6511644694654109e-06, |
|
"loss": 1.6323, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.6456227767773842e-06, |
|
"loss": 1.7642, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.6400790768320761e-06, |
|
"loss": 1.6971, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.6345334460435775e-06, |
|
"loss": 1.7224, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.6289859608525936e-06, |
|
"loss": 1.7847, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.623436697725391e-06, |
|
"loss": 1.6998, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.6178857331527427e-06, |
|
"loss": 1.7637, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.6123331436488752e-06, |
|
"loss": 1.738, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.6067790057504125e-06, |
|
"loss": 1.8809, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.6012233960153213e-06, |
|
"loss": 1.6865, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.5956663910218566e-06, |
|
"loss": 1.7502, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.590108067367505e-06, |
|
"loss": 1.7131, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.58454850166793e-06, |
|
"loss": 1.6668, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.5789877705559149e-06, |
|
"loss": 1.6616, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.573425950680308e-06, |
|
"loss": 1.8484, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.567863118704963e-06, |
|
"loss": 1.722, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.562299351307686e-06, |
|
"loss": 1.6145, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.5567347251791773e-06, |
|
"loss": 1.744, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.5511693170219723e-06, |
|
"loss": 1.7476, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.5456032035493878e-06, |
|
"loss": 1.6705, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.5400364614844604e-06, |
|
"loss": 1.5381, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.5344691675588926e-06, |
|
"loss": 1.7072, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.5289013985119934e-06, |
|
"loss": 1.7217, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.5233332310896214e-06, |
|
"loss": 1.6447, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.5177647420431253e-06, |
|
"loss": 1.6961, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.5121960081282878e-06, |
|
"loss": 1.8037, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.5066271061042672e-06, |
|
"loss": 1.6076, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.5010581127325374e-06, |
|
"loss": 1.6256, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.309001088142395, |
|
"eval_runtime": 163.9053, |
|
"eval_samples_per_second": 5.668, |
|
"eval_steps_per_second": 5.668, |
|
"step": 42320 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.4954891047758328e-06, |
|
"loss": 1.6049, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.489920158997089e-06, |
|
"loss": 1.5866, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.4843513521583844e-06, |
|
"loss": 1.6174, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.4787827610198813e-06, |
|
"loss": 1.711, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.4732144623387696e-06, |
|
"loss": 1.6283, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.4676465328682085e-06, |
|
"loss": 1.7035, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.4620790493562662e-06, |
|
"loss": 1.6869, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.4565120885448656e-06, |
|
"loss": 1.6827, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.4509457271687238e-06, |
|
"loss": 1.7237, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.4453800419542962e-06, |
|
"loss": 1.6418, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.4398151096187167e-06, |
|
"loss": 1.7514, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.434251006868743e-06, |
|
"loss": 1.7102, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.4286878103996967e-06, |
|
"loss": 1.6147, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.4231255968944078e-06, |
|
"loss": 1.557, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.4175644430221568e-06, |
|
"loss": 1.6971, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.412004425437619e-06, |
|
"loss": 1.6645, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.4064456207798066e-06, |
|
"loss": 1.688, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.4008881056710125e-06, |
|
"loss": 1.7062, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.3953319567157556e-06, |
|
"loss": 1.5745, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.3897772504997228e-06, |
|
"loss": 1.5922, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.3842240635887154e-06, |
|
"loss": 1.7366, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.3786724725275911e-06, |
|
"loss": 1.7974, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.3731225538392125e-06, |
|
"loss": 1.7394, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.367574384023388e-06, |
|
"loss": 1.7766, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.3620280395558218e-06, |
|
"loss": 1.631, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.3564835968870557e-06, |
|
"loss": 1.6251, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.3509411324414191e-06, |
|
"loss": 1.6983, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.345400722615972e-06, |
|
"loss": 1.6382, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.3398624437794549e-06, |
|
"loss": 1.6588, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.3343263722712342e-06, |
|
"loss": 1.8123, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.3287925844002496e-06, |
|
"loss": 1.6796, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.3232611564439656e-06, |
|
"loss": 1.5431, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.3177321646473154e-06, |
|
"loss": 1.57, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.3122056852216538e-06, |
|
"loss": 1.6356, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.3066817943437054e-06, |
|
"loss": 1.6333, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3011605681545126e-06, |
|
"loss": 1.595, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.29564208275839e-06, |
|
"loss": 1.5615, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.2901264142218712e-06, |
|
"loss": 1.7929, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.2846136385726644e-06, |
|
"loss": 1.8091, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.2791038317986009e-06, |
|
"loss": 1.6715, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.2735970698465896e-06, |
|
"loss": 1.6615, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.2680934286215696e-06, |
|
"loss": 1.6615, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.2625929839854644e-06, |
|
"loss": 1.7039, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.2570958117561357e-06, |
|
"loss": 1.7209, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.2516019877063388e-06, |
|
"loss": 1.7251, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.2461115875626768e-06, |
|
"loss": 1.7202, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.2406246870045588e-06, |
|
"loss": 1.7948, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2351413616631561e-06, |
|
"loss": 1.6631, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2296616871203584e-06, |
|
"loss": 1.6321, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2241857389077332e-06, |
|
"loss": 1.7737, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2187135925054852e-06, |
|
"loss": 1.5694, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.2132453233414145e-06, |
|
"loss": 1.7562, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.207781006789877e-06, |
|
"loss": 1.5521, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 1.2960591316223145, |
|
"eval_runtime": 158.903, |
|
"eval_samples_per_second": 5.846, |
|
"eval_steps_per_second": 5.846, |
|
"step": 47610 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.202320718170748e-06, |
|
"loss": 1.6698, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.1968645327483792e-06, |
|
"loss": 1.5465, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.1914125257305654e-06, |
|
"loss": 1.6406, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.1859647722675075e-06, |
|
"loss": 1.6434, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.1805213474507738e-06, |
|
"loss": 1.5834, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.1750823263122683e-06, |
|
"loss": 1.683, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.169647783823193e-06, |
|
"loss": 1.5975, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1642177948930188e-06, |
|
"loss": 1.6729, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1587924343684486e-06, |
|
"loss": 1.688, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1533717770323887e-06, |
|
"loss": 1.6362, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1479558976029164e-06, |
|
"loss": 1.7004, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1425448707322505e-06, |
|
"loss": 1.6087, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.137138771005723e-06, |
|
"loss": 1.6815, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1317376729407493e-06, |
|
"loss": 1.5914, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1263416509858032e-06, |
|
"loss": 1.5619, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1209507795193888e-06, |
|
"loss": 1.6197, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1155651328490174e-06, |
|
"loss": 1.6824, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.11018478521018e-06, |
|
"loss": 1.7277, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.1048098107653282e-06, |
|
"loss": 1.6273, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.0994402836028472e-06, |
|
"loss": 1.6803, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0940762777360401e-06, |
|
"loss": 1.5929, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0887178671021024e-06, |
|
"loss": 1.6484, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0833651255611058e-06, |
|
"loss": 1.7423, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0780181268949805e-06, |
|
"loss": 1.6847, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0726769448064956e-06, |
|
"loss": 1.6074, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0673416529182462e-06, |
|
"loss": 1.7478, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0620123247716362e-06, |
|
"loss": 1.7042, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0566890338258655e-06, |
|
"loss": 1.6337, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0513718534569187e-06, |
|
"loss": 1.7174, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0460608569565506e-06, |
|
"loss": 1.6805, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0407561175312802e-06, |
|
"loss": 1.5872, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.035457708301377e-06, |
|
"loss": 1.7103, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0301657022998575e-06, |
|
"loss": 1.7544, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.0248801724714746e-06, |
|
"loss": 1.6165, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.019601191671715e-06, |
|
"loss": 1.5813, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0143288326657935e-06, |
|
"loss": 1.6332, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0090631681276508e-06, |
|
"loss": 1.7332, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0038042706389505e-06, |
|
"loss": 1.5387, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.985522126880806e-07, |
|
"loss": 1.5534, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.93307066669153e-07, |
|
"loss": 1.6457, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.880689048810049e-07, |
|
"loss": 1.6818, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.828377995262048e-07, |
|
"loss": 1.5609, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.77613822710054e-07, |
|
"loss": 1.7747, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.72397046439596e-07, |
|
"loss": 1.7221, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.671875426226204e-07, |
|
"loss": 1.7983, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.61985383066676e-07, |
|
"loss": 1.6314, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.567906394780763e-07, |
|
"loss": 1.6959, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.516033834609155e-07, |
|
"loss": 1.6105, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.464236865160779e-07, |
|
"loss": 1.573, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.412516200402556e-07, |
|
"loss": 1.6789, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 9.360872553249605e-07, |
|
"loss": 1.7057, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.30930663555545e-07, |
|
"loss": 1.6102, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.257819158102203e-07, |
|
"loss": 1.8318, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 1.2909756898880005, |
|
"eval_runtime": 158.004, |
|
"eval_samples_per_second": 5.88, |
|
"eval_steps_per_second": 5.88, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.206410830590746e-07, |
|
"loss": 1.6514, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.15508236163097e-07, |
|
"loss": 1.7379, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.103834458732002e-07, |
|
"loss": 1.6323, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 9.052667828292439e-07, |
|
"loss": 1.8245, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 9.001583175590636e-07, |
|
"loss": 1.5375, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.950581204774961e-07, |
|
"loss": 1.737, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 8.899662618854105e-07, |
|
"loss": 1.6755, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.848828119687375e-07, |
|
"loss": 1.6737, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.798078407975051e-07, |
|
"loss": 1.7876, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.747414183248682e-07, |
|
"loss": 1.6804, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.696836143861491e-07, |
|
"loss": 1.5951, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.646344986978708e-07, |
|
"loss": 1.6206, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 8.595941408567983e-07, |
|
"loss": 1.7823, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 8.545626103389805e-07, |
|
"loss": 1.6832, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 8.495399764987894e-07, |
|
"loss": 1.6455, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 8.445263085679645e-07, |
|
"loss": 1.6894, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 8.395216756546627e-07, |
|
"loss": 1.5944, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 8.345261467425003e-07, |
|
"loss": 1.7441, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 8.295397906896052e-07, |
|
"loss": 1.7046, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 8.245626762276663e-07, |
|
"loss": 1.6335, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 8.195948719609889e-07, |
|
"loss": 1.7515, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 8.146364463655458e-07, |
|
"loss": 1.6208, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 8.096874677880322e-07, |
|
"loss": 1.6655, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 8.047480044449309e-07, |
|
"loss": 1.7218, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.998181244215638e-07, |
|
"loss": 1.5814, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.948978956711576e-07, |
|
"loss": 1.7588, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.899873860139058e-07, |
|
"loss": 1.6841, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.850866631360363e-07, |
|
"loss": 1.6321, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 7.801957945888744e-07, |
|
"loss": 1.654, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 7.75314847787914e-07, |
|
"loss": 1.6165, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 7.704438900118902e-07, |
|
"loss": 1.7136, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 7.655829884018475e-07, |
|
"loss": 1.6892, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 7.607322099602175e-07, |
|
"loss": 1.6254, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 7.558916215498944e-07, |
|
"loss": 1.5811, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.510612898933145e-07, |
|
"loss": 1.6081, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 7.462412815715343e-07, |
|
"loss": 1.5603, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 7.414316630233144e-07, |
|
"loss": 1.7405, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 7.366325005442026e-07, |
|
"loss": 1.6653, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 7.318438602856225e-07, |
|
"loss": 1.6596, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 7.270658082539581e-07, |
|
"loss": 1.706, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 7.222984103096469e-07, |
|
"loss": 1.718, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 7.175417321662698e-07, |
|
"loss": 1.6861, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 7.127958393896484e-07, |
|
"loss": 1.668, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 7.080607973969376e-07, |
|
"loss": 1.7527, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 7.033366714557257e-07, |
|
"loss": 1.7254, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 6.986235266831368e-07, |
|
"loss": 1.5732, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 6.93921428044928e-07, |
|
"loss": 1.6163, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 6.892304403545984e-07, |
|
"loss": 1.7492, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 6.845506282724956e-07, |
|
"loss": 1.7095, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 6.798820563049212e-07, |
|
"loss": 1.7914, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 6.75224788803245e-07, |
|
"loss": 1.6378, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 6.70578889963015e-07, |
|
"loss": 1.6761, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_loss": 1.2901337146759033, |
|
"eval_runtime": 158.1238, |
|
"eval_samples_per_second": 5.875, |
|
"eval_steps_per_second": 5.875, |
|
"step": 58190 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 6.659444238230763e-07, |
|
"loss": 1.6017, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 6.613214542646845e-07, |
|
"loss": 1.5221, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 6.567100450106276e-07, |
|
"loss": 1.7276, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 6.521102596243459e-07, |
|
"loss": 1.5169, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 6.475221615090591e-07, |
|
"loss": 1.7469, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 6.429458139068882e-07, |
|
"loss": 1.646, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.383812798979856e-07, |
|
"loss": 1.6483, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.338286223996673e-07, |
|
"loss": 1.5527, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.29287904165543e-07, |
|
"loss": 1.6215, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.247591877846517e-07, |
|
"loss": 1.6239, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.202425356805997e-07, |
|
"loss": 1.6994, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.157380101107016e-07, |
|
"loss": 1.5472, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 6.112456731651181e-07, |
|
"loss": 1.589, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 6.067655867660037e-07, |
|
"loss": 1.6836, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 6.022978126666509e-07, |
|
"loss": 1.6906, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 5.978424124506421e-07, |
|
"loss": 1.7639, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.933994475309969e-07, |
|
"loss": 1.6307, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.889689791493279e-07, |
|
"loss": 1.6508, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.84551068374996e-07, |
|
"loss": 1.6107, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.801457761042689e-07, |
|
"loss": 1.6451, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 5.757531630594812e-07, |
|
"loss": 1.6345, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 5.71373289788197e-07, |
|
"loss": 1.5496, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 5.670062166623781e-07, |
|
"loss": 1.6161, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 5.626520038775476e-07, |
|
"loss": 1.618, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 5.583107114519624e-07, |
|
"loss": 1.5446, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 5.539823992257877e-07, |
|
"loss": 1.6561, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 5.496671268602682e-07, |
|
"loss": 1.7354, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 5.453649538369088e-07, |
|
"loss": 1.5153, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 5.410759394566529e-07, |
|
"loss": 1.6056, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 5.368001428390672e-07, |
|
"loss": 1.674, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 5.325376229215244e-07, |
|
"loss": 1.6993, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 5.282884384583917e-07, |
|
"loss": 1.6882, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 5.240526480202211e-07, |
|
"loss": 1.5872, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 5.198303099929429e-07, |
|
"loss": 1.6554, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 5.156214825770591e-07, |
|
"loss": 1.6168, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.114262237868423e-07, |
|
"loss": 1.5752, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.072445914495355e-07, |
|
"loss": 1.655, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 5.030766432045565e-07, |
|
"loss": 1.6429, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 4.989224365027009e-07, |
|
"loss": 1.6156, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.947820286053518e-07, |
|
"loss": 1.6634, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.906554765836916e-07, |
|
"loss": 1.7337, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.865428373179121e-07, |
|
"loss": 1.6085, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.824441674964334e-07, |
|
"loss": 1.6445, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.783595236151211e-07, |
|
"loss": 1.7347, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.7428896197650816e-07, |
|
"loss": 1.5851, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.702325386890184e-07, |
|
"loss": 1.6059, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.661903096661929e-07, |
|
"loss": 1.6562, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.6216233062592107e-07, |
|
"loss": 1.6983, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.581486570896701e-07, |
|
"loss": 1.7001, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.541493443817206e-07, |
|
"loss": 1.5994, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.501644476284045e-07, |
|
"loss": 1.6582, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.4619402175734606e-07, |
|
"loss": 1.7147, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.4223812149670195e-07, |
|
"loss": 1.6312, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.287863850593567, |
|
"eval_runtime": 157.981, |
|
"eval_samples_per_second": 5.88, |
|
"eval_steps_per_second": 5.88, |
|
"step": 63480 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.3829680137440883e-07, |
|
"loss": 1.5559, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.343701157174329e-07, |
|
"loss": 1.6739, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.3045811865101767e-07, |
|
"loss": 1.4717, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.265608640979411e-07, |
|
"loss": 1.6384, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 4.226784057777699e-07, |
|
"loss": 1.5138, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.1881079720612204e-07, |
|
"loss": 1.5968, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.149580916939255e-07, |
|
"loss": 1.5826, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.1112034234668615e-07, |
|
"loss": 1.7272, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 4.0729760206375404e-07, |
|
"loss": 1.722, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 4.0348992353759657e-07, |
|
"loss": 1.6016, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.9969735925306884e-07, |
|
"loss": 1.5948, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.95919961486693e-07, |
|
"loss": 1.4295, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.9215778230593563e-07, |
|
"loss": 1.6671, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.8841087356849295e-07, |
|
"loss": 1.6863, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.846792869215725e-07, |
|
"loss": 1.7321, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 3.8096307380118334e-07, |
|
"loss": 1.6549, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.7726228543142645e-07, |
|
"loss": 1.672, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.7357697282378916e-07, |
|
"loss": 1.5972, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.6990718677644103e-07, |
|
"loss": 1.6149, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.662529778735354e-07, |
|
"loss": 1.6483, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.6261439648450973e-07, |
|
"loss": 1.5785, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 3.5899149276339345e-07, |
|
"loss": 1.7107, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 3.553843166481148e-07, |
|
"loss": 1.6453, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 3.517929178598151e-07, |
|
"loss": 1.7473, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 3.4821734590216027e-07, |
|
"loss": 1.6577, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.4465765006066065e-07, |
|
"loss": 1.6899, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.4111387940199014e-07, |
|
"loss": 1.5638, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 3.3758608277331257e-07, |
|
"loss": 1.7071, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 3.3407430880160433e-07, |
|
"loss": 1.4997, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 3.3057860589298746e-07, |
|
"loss": 1.4916, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 3.2709902223206136e-07, |
|
"loss": 1.7187, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 3.2363560578123807e-07, |
|
"loss": 1.6423, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 3.2018840428008176e-07, |
|
"loss": 1.6532, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 3.167574652446497e-07, |
|
"loss": 1.7428, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 3.133428359668401e-07, |
|
"loss": 1.6502, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 3.099445635137365e-07, |
|
"loss": 1.6433, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 3.0656269472696136e-07, |
|
"loss": 1.8004, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 3.031972762220291e-07, |
|
"loss": 1.5027, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 2.998483543877065e-07, |
|
"loss": 1.7288, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 2.965159753853681e-07, |
|
"loss": 1.7243, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 2.93200185148364e-07, |
|
"loss": 1.7439, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.8990102938138685e-07, |
|
"loss": 1.6872, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.866185535598389e-07, |
|
"loss": 1.588, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.83352802929207e-07, |
|
"loss": 1.6774, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 2.801038225044403e-07, |
|
"loss": 1.663, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 2.7687165706932636e-07, |
|
"loss": 1.6019, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 2.7365635117587673e-07, |
|
"loss": 1.6143, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 2.704579491437113e-07, |
|
"loss": 1.7017, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 2.672764950594491e-07, |
|
"loss": 1.5511, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 2.641120327760981e-07, |
|
"loss": 1.6588, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 2.609646059124529e-07, |
|
"loss": 1.6361, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 2.578342578524922e-07, |
|
"loss": 1.6632, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 2.54721031744782e-07, |
|
"loss": 1.7003, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_loss": 1.2819619178771973, |
|
"eval_runtime": 159.0772, |
|
"eval_samples_per_second": 5.84, |
|
"eval_steps_per_second": 5.84, |
|
"step": 68770 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 2.516249705018797e-07, |
|
"loss": 1.6595, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 2.485461167997429e-07, |
|
"loss": 1.5922, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 2.4548451307714115e-07, |
|
"loss": 1.6189, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 2.4244020153507233e-07, |
|
"loss": 1.697, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 2.394132241361782e-07, |
|
"loss": 1.5139, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 2.364036226041679e-07, |
|
"loss": 1.5705, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 2.334114384232437e-07, |
|
"loss": 1.7863, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 2.3043671283752649e-07, |
|
"loss": 1.7356, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 2.274794868504891e-07, |
|
"loss": 1.7676, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 2.2453980122439088e-07, |
|
"loss": 1.6916, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.2161769647971637e-07, |
|
"loss": 1.6342, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.1871321289461466e-07, |
|
"loss": 1.6444, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 2.158263905043462e-07, |
|
"loss": 1.5611, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 2.1295726910073137e-07, |
|
"loss": 1.5657, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 2.101058882316e-07, |
|
"loss": 1.696, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 2.072722872002473e-07, |
|
"loss": 1.524, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 2.0445650506489188e-07, |
|
"loss": 1.5935, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 2.016585806381388e-07, |
|
"loss": 1.6433, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.988785524864421e-07, |
|
"loss": 1.7118, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.9611645892957414e-07, |
|
"loss": 1.6446, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.9337233804009918e-07, |
|
"loss": 1.6606, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.9064622764284618e-07, |
|
"loss": 1.7027, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.8793816531438797e-07, |
|
"loss": 1.5923, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.8524818838252422e-07, |
|
"loss": 1.6598, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.8257633392576656e-07, |
|
"loss": 1.6341, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.7992263877282706e-07, |
|
"loss": 1.64, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.7728713950211067e-07, |
|
"loss": 1.6781, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.7466987244121086e-07, |
|
"loss": 1.5765, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.7207087366641055e-07, |
|
"loss": 1.617, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.6949017900218217e-07, |
|
"loss": 1.6218, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.669278240206954e-07, |
|
"loss": 1.6219, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.64383844041327e-07, |
|
"loss": 1.6668, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.618582741301735e-07, |
|
"loss": 1.642, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.593511490995681e-07, |
|
"loss": 1.6472, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.568625035076e-07, |
|
"loss": 1.7778, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.5439237165763936e-07, |
|
"loss": 1.6262, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 1.519407875978636e-07, |
|
"loss": 1.7338, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.4950778512078788e-07, |
|
"loss": 1.6323, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.4709339776280102e-07, |
|
"loss": 1.5763, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.4469765880370056e-07, |
|
"loss": 1.5935, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.423206012662359e-07, |
|
"loss": 1.612, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.399622579156526e-07, |
|
"loss": 1.6644, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.376226612592413e-07, |
|
"loss": 1.5736, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.3530184354588837e-07, |
|
"loss": 1.7126, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.329998367656325e-07, |
|
"loss": 1.6738, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.3071667264922282e-07, |
|
"loss": 1.5867, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.284523826676835e-07, |
|
"loss": 1.7888, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.2620699803187724e-07, |
|
"loss": 1.5483, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.239805496920764e-07, |
|
"loss": 1.6639, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.2177306833753744e-07, |
|
"loss": 1.6227, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.1958458439607562e-07, |
|
"loss": 1.5036, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.1741512803364751e-07, |
|
"loss": 1.6368, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.1526472915393399e-07, |
|
"loss": 1.6915, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 1.2813829183578491, |
|
"eval_runtime": 158.5635, |
|
"eval_samples_per_second": 5.859, |
|
"eval_steps_per_second": 5.859, |
|
"step": 74060 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 1.1313341739792921e-07, |
|
"loss": 1.7454, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.1102122214353044e-07, |
|
"loss": 1.575, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.0892817250513476e-07, |
|
"loss": 1.6332, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.0685429733323632e-07, |
|
"loss": 1.5274, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.0479962521403036e-07, |
|
"loss": 1.6464, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 1.0276418446901703e-07, |
|
"loss": 1.7318, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 1.0074800315461302e-07, |
|
"loss": 1.5266, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 9.875110906176337e-08, |
|
"loss": 1.6372, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 9.67735297155593e-08, |
|
"loss": 1.557, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 9.481529237485865e-08, |
|
"loss": 1.6721, |
|
"step": 75000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 84628, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 5000, |
|
"total_flos": 4.445458032328704e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|