s3nh's picture
Upload folder using huggingface_hub
0f3d466
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.3085976272628446,
"eval_steps": 5290,
"global_step": 70000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"eval_loss": 1.797255277633667,
"eval_runtime": 160.9316,
"eval_samples_per_second": 5.773,
"eval_steps_per_second": 5.773,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 2.9999916262476826e-06,
"loss": 2.339,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 2.9999626800634057e-06,
"loss": 2.1788,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 2.9999130584664085e-06,
"loss": 2.1946,
"step": 300
},
{
"epoch": 0.02,
"learning_rate": 2.9998427621406735e-06,
"loss": 2.2431,
"step": 400
},
{
"epoch": 0.02,
"learning_rate": 2.9997517920551614e-06,
"loss": 2.1155,
"step": 500
},
{
"epoch": 0.03,
"learning_rate": 2.9996401494637996e-06,
"loss": 2.0998,
"step": 600
},
{
"epoch": 0.03,
"learning_rate": 2.9995078359054642e-06,
"loss": 2.0592,
"step": 700
},
{
"epoch": 0.04,
"learning_rate": 2.999354853203959e-06,
"loss": 2.0821,
"step": 800
},
{
"epoch": 0.04,
"learning_rate": 2.9991812034679892e-06,
"loss": 1.8844,
"step": 900
},
{
"epoch": 0.05,
"learning_rate": 2.9989868890911354e-06,
"loss": 2.1784,
"step": 1000
},
{
"epoch": 0.05,
"learning_rate": 2.9987719127518173e-06,
"loss": 2.0341,
"step": 1100
},
{
"epoch": 0.06,
"learning_rate": 2.9985362774132576e-06,
"loss": 2.1155,
"step": 1200
},
{
"epoch": 0.06,
"learning_rate": 2.9982799863234435e-06,
"loss": 2.0074,
"step": 1300
},
{
"epoch": 0.07,
"learning_rate": 2.998003043015078e-06,
"loss": 2.0324,
"step": 1400
},
{
"epoch": 0.07,
"learning_rate": 2.9977054513055346e-06,
"loss": 1.9387,
"step": 1500
},
{
"epoch": 0.08,
"learning_rate": 2.997387215296803e-06,
"loss": 2.0548,
"step": 1600
},
{
"epoch": 0.08,
"learning_rate": 2.997048339375433e-06,
"loss": 2.0709,
"step": 1700
},
{
"epoch": 0.09,
"learning_rate": 2.9966888282124733e-06,
"loss": 2.009,
"step": 1800
},
{
"epoch": 0.09,
"learning_rate": 2.9963086867634087e-06,
"loss": 1.9616,
"step": 1900
},
{
"epoch": 0.09,
"learning_rate": 2.9959079202680905e-06,
"loss": 1.983,
"step": 2000
},
{
"epoch": 0.1,
"learning_rate": 2.9954865342506646e-06,
"loss": 2.0902,
"step": 2100
},
{
"epoch": 0.1,
"learning_rate": 2.9950445345194956e-06,
"loss": 2.1337,
"step": 2200
},
{
"epoch": 0.11,
"learning_rate": 2.994581927167085e-06,
"loss": 1.9246,
"step": 2300
},
{
"epoch": 0.11,
"learning_rate": 2.994098718569992e-06,
"loss": 2.0217,
"step": 2400
},
{
"epoch": 0.12,
"learning_rate": 2.9935949153887393e-06,
"loss": 2.0509,
"step": 2500
},
{
"epoch": 0.12,
"learning_rate": 2.993070524567726e-06,
"loss": 2.013,
"step": 2600
},
{
"epoch": 0.13,
"learning_rate": 2.992525553335129e-06,
"loss": 1.8444,
"step": 2700
},
{
"epoch": 0.13,
"learning_rate": 2.991960009202806e-06,
"loss": 1.9667,
"step": 2800
},
{
"epoch": 0.14,
"learning_rate": 2.9913738999661895e-06,
"loss": 1.9942,
"step": 2900
},
{
"epoch": 0.14,
"learning_rate": 2.990767233704181e-06,
"loss": 1.975,
"step": 3000
},
{
"epoch": 0.15,
"learning_rate": 2.9901400187790383e-06,
"loss": 2.015,
"step": 3100
},
{
"epoch": 0.15,
"learning_rate": 2.989492263836262e-06,
"loss": 2.122,
"step": 3200
},
{
"epoch": 0.16,
"learning_rate": 2.9888239778044748e-06,
"loss": 1.8877,
"step": 3300
},
{
"epoch": 0.16,
"learning_rate": 2.988135169895298e-06,
"loss": 2.0659,
"step": 3400
},
{
"epoch": 0.17,
"learning_rate": 2.9874258496032273e-06,
"loss": 1.8897,
"step": 3500
},
{
"epoch": 0.17,
"learning_rate": 2.9866960267054987e-06,
"loss": 1.9466,
"step": 3600
},
{
"epoch": 0.17,
"learning_rate": 2.985945711261956e-06,
"loss": 1.9438,
"step": 3700
},
{
"epoch": 0.18,
"learning_rate": 2.9851749136149105e-06,
"loss": 2.0251,
"step": 3800
},
{
"epoch": 0.18,
"learning_rate": 2.984383644388999e-06,
"loss": 2.0244,
"step": 3900
},
{
"epoch": 0.19,
"learning_rate": 2.9835719144910395e-06,
"loss": 1.9022,
"step": 4000
},
{
"epoch": 0.19,
"learning_rate": 2.982739735109876e-06,
"loss": 2.0163,
"step": 4100
},
{
"epoch": 0.2,
"learning_rate": 2.98188711771623e-06,
"loss": 1.9168,
"step": 4200
},
{
"epoch": 0.2,
"learning_rate": 2.9810140740625364e-06,
"loss": 1.9695,
"step": 4300
},
{
"epoch": 0.21,
"learning_rate": 2.9801206161827883e-06,
"loss": 1.9114,
"step": 4400
},
{
"epoch": 0.21,
"learning_rate": 2.9792067563923653e-06,
"loss": 2.0469,
"step": 4500
},
{
"epoch": 0.22,
"learning_rate": 2.9782725072878657e-06,
"loss": 1.8072,
"step": 4600
},
{
"epoch": 0.22,
"learning_rate": 2.9773178817469342e-06,
"loss": 1.8899,
"step": 4700
},
{
"epoch": 0.23,
"learning_rate": 2.976342892928083e-06,
"loss": 1.9418,
"step": 4800
},
{
"epoch": 0.23,
"learning_rate": 2.9753475542705106e-06,
"loss": 2.1559,
"step": 4900
},
{
"epoch": 0.24,
"learning_rate": 2.974331879493916e-06,
"loss": 2.001,
"step": 5000
},
{
"epoch": 0.24,
"learning_rate": 2.973295882598313e-06,
"loss": 2.051,
"step": 5100
},
{
"epoch": 0.25,
"learning_rate": 2.9722395778638296e-06,
"loss": 1.9767,
"step": 5200
},
{
"epoch": 0.25,
"eval_loss": 1.4832066297531128,
"eval_runtime": 162.8547,
"eval_samples_per_second": 5.704,
"eval_steps_per_second": 5.704,
"step": 5290
},
{
"epoch": 0.25,
"learning_rate": 2.971162979850521e-06,
"loss": 1.8538,
"step": 5300
},
{
"epoch": 0.26,
"learning_rate": 2.9700661033981615e-06,
"loss": 1.7968,
"step": 5400
},
{
"epoch": 0.26,
"learning_rate": 2.9689489636260424e-06,
"loss": 1.7703,
"step": 5500
},
{
"epoch": 0.26,
"learning_rate": 2.967811575932764e-06,
"loss": 1.9824,
"step": 5600
},
{
"epoch": 0.27,
"learning_rate": 2.9666539559960238e-06,
"loss": 1.9332,
"step": 5700
},
{
"epoch": 0.27,
"learning_rate": 2.965476119772398e-06,
"loss": 1.8362,
"step": 5800
},
{
"epoch": 0.28,
"learning_rate": 2.964278083497125e-06,
"loss": 1.7958,
"step": 5900
},
{
"epoch": 0.28,
"learning_rate": 2.963059863683877e-06,
"loss": 1.7677,
"step": 6000
},
{
"epoch": 0.29,
"learning_rate": 2.9618214771245376e-06,
"loss": 2.0132,
"step": 6100
},
{
"epoch": 0.29,
"learning_rate": 2.9605629408889673e-06,
"loss": 1.8406,
"step": 6200
},
{
"epoch": 0.3,
"learning_rate": 2.9592842723247676e-06,
"loss": 2.0235,
"step": 6300
},
{
"epoch": 0.3,
"learning_rate": 2.9579854890570448e-06,
"loss": 1.9383,
"step": 6400
},
{
"epoch": 0.31,
"learning_rate": 2.956666608988164e-06,
"loss": 1.8556,
"step": 6500
},
{
"epoch": 0.31,
"learning_rate": 2.9553276502975034e-06,
"loss": 1.8689,
"step": 6600
},
{
"epoch": 0.32,
"learning_rate": 2.9539686314412053e-06,
"loss": 2.0381,
"step": 6700
},
{
"epoch": 0.32,
"learning_rate": 2.9525895711519195e-06,
"loss": 2.0205,
"step": 6800
},
{
"epoch": 0.33,
"learning_rate": 2.951190488438546e-06,
"loss": 1.8647,
"step": 6900
},
{
"epoch": 0.33,
"learning_rate": 2.9497714025859727e-06,
"loss": 1.8074,
"step": 7000
},
{
"epoch": 0.34,
"learning_rate": 2.94833233315481e-06,
"loss": 1.9039,
"step": 7100
},
{
"epoch": 0.34,
"learning_rate": 2.9468732999811216e-06,
"loss": 2.0103,
"step": 7200
},
{
"epoch": 0.35,
"learning_rate": 2.94539432317615e-06,
"loss": 1.9635,
"step": 7300
},
{
"epoch": 0.35,
"learning_rate": 2.943895423126038e-06,
"loss": 1.8708,
"step": 7400
},
{
"epoch": 0.35,
"learning_rate": 2.942376620491553e-06,
"loss": 1.7572,
"step": 7500
},
{
"epoch": 0.36,
"learning_rate": 2.940837936207796e-06,
"loss": 1.9795,
"step": 7600
},
{
"epoch": 0.36,
"learning_rate": 2.9392793914839165e-06,
"loss": 2.0192,
"step": 7700
},
{
"epoch": 0.37,
"learning_rate": 2.937701007802819e-06,
"loss": 1.8849,
"step": 7800
},
{
"epoch": 0.37,
"learning_rate": 2.9361028069208675e-06,
"loss": 1.9925,
"step": 7900
},
{
"epoch": 0.38,
"learning_rate": 2.934484810867586e-06,
"loss": 2.004,
"step": 8000
},
{
"epoch": 0.38,
"learning_rate": 2.9328470419453527e-06,
"loss": 1.9084,
"step": 8100
},
{
"epoch": 0.39,
"learning_rate": 2.9311895227290954e-06,
"loss": 1.8507,
"step": 8200
},
{
"epoch": 0.39,
"learning_rate": 2.929512276065978e-06,
"loss": 1.8185,
"step": 8300
},
{
"epoch": 0.4,
"learning_rate": 2.9278153250750875e-06,
"loss": 1.7862,
"step": 8400
},
{
"epoch": 0.4,
"learning_rate": 2.9260986931471136e-06,
"loss": 1.8444,
"step": 8500
},
{
"epoch": 0.41,
"learning_rate": 2.924362403944027e-06,
"loss": 2.0304,
"step": 8600
},
{
"epoch": 0.41,
"learning_rate": 2.922606481398755e-06,
"loss": 1.9337,
"step": 8700
},
{
"epoch": 0.42,
"learning_rate": 2.920830949714848e-06,
"loss": 1.9937,
"step": 8800
},
{
"epoch": 0.42,
"learning_rate": 2.919035833366148e-06,
"loss": 1.9554,
"step": 8900
},
{
"epoch": 0.43,
"learning_rate": 2.917221157096452e-06,
"loss": 1.9068,
"step": 9000
},
{
"epoch": 0.43,
"learning_rate": 2.9153869459191693e-06,
"loss": 1.9063,
"step": 9100
},
{
"epoch": 0.43,
"learning_rate": 2.913533225116978e-06,
"loss": 1.9342,
"step": 9200
},
{
"epoch": 0.44,
"learning_rate": 2.9116600202414754e-06,
"loss": 2.0052,
"step": 9300
},
{
"epoch": 0.44,
"learning_rate": 2.9097673571128266e-06,
"loss": 1.8102,
"step": 9400
},
{
"epoch": 0.45,
"learning_rate": 2.9078552618194086e-06,
"loss": 1.9959,
"step": 9500
},
{
"epoch": 0.45,
"learning_rate": 2.9059237607174494e-06,
"loss": 1.9136,
"step": 9600
},
{
"epoch": 0.46,
"learning_rate": 2.9039728804306666e-06,
"loss": 1.9124,
"step": 9700
},
{
"epoch": 0.46,
"learning_rate": 2.9020026478498988e-06,
"loss": 1.9215,
"step": 9800
},
{
"epoch": 0.47,
"learning_rate": 2.9000130901327377e-06,
"loss": 1.93,
"step": 9900
},
{
"epoch": 0.47,
"learning_rate": 2.8980042347031482e-06,
"loss": 1.82,
"step": 10000
},
{
"epoch": 0.48,
"learning_rate": 2.8959761092510978e-06,
"loss": 1.8436,
"step": 10100
},
{
"epoch": 0.48,
"learning_rate": 2.8939287417321676e-06,
"loss": 1.8995,
"step": 10200
},
{
"epoch": 0.49,
"learning_rate": 2.8918621603671737e-06,
"loss": 1.9337,
"step": 10300
},
{
"epoch": 0.49,
"learning_rate": 2.8897763936417715e-06,
"loss": 1.9088,
"step": 10400
},
{
"epoch": 0.5,
"learning_rate": 2.88767147030607e-06,
"loss": 1.8474,
"step": 10500
},
{
"epoch": 0.5,
"eval_loss": 1.4355759620666504,
"eval_runtime": 163.1072,
"eval_samples_per_second": 5.696,
"eval_steps_per_second": 5.696,
"step": 10580
},
{
"epoch": 0.5,
"learning_rate": 2.885547419374229e-06,
"loss": 1.9638,
"step": 10600
},
{
"epoch": 0.51,
"learning_rate": 2.883404270124063e-06,
"loss": 1.9945,
"step": 10700
},
{
"epoch": 0.51,
"learning_rate": 2.881242052096638e-06,
"loss": 1.8143,
"step": 10800
},
{
"epoch": 0.52,
"learning_rate": 2.879060795095863e-06,
"loss": 1.7915,
"step": 10900
},
{
"epoch": 0.52,
"learning_rate": 2.8768605291880767e-06,
"loss": 1.8868,
"step": 11000
},
{
"epoch": 0.52,
"learning_rate": 2.8746412847016387e-06,
"loss": 1.8033,
"step": 11100
},
{
"epoch": 0.53,
"learning_rate": 2.8724030922265068e-06,
"loss": 2.0053,
"step": 11200
},
{
"epoch": 0.53,
"learning_rate": 2.870145982613818e-06,
"loss": 1.867,
"step": 11300
},
{
"epoch": 0.54,
"learning_rate": 2.867869986975461e-06,
"loss": 1.8002,
"step": 11400
},
{
"epoch": 0.54,
"learning_rate": 2.865575136683649e-06,
"loss": 1.8835,
"step": 11500
},
{
"epoch": 0.55,
"learning_rate": 2.863261463370487e-06,
"loss": 1.7312,
"step": 11600
},
{
"epoch": 0.55,
"learning_rate": 2.8609289989275353e-06,
"loss": 1.8402,
"step": 11700
},
{
"epoch": 0.56,
"learning_rate": 2.858577775505371e-06,
"loss": 1.9007,
"step": 11800
},
{
"epoch": 0.56,
"learning_rate": 2.856207825513144e-06,
"loss": 1.8235,
"step": 11900
},
{
"epoch": 0.57,
"learning_rate": 2.853819181618129e-06,
"loss": 1.8568,
"step": 12000
},
{
"epoch": 0.57,
"learning_rate": 2.851411876745278e-06,
"loss": 1.9159,
"step": 12100
},
{
"epoch": 0.58,
"learning_rate": 2.848985944076763e-06,
"loss": 1.9857,
"step": 12200
},
{
"epoch": 0.58,
"learning_rate": 2.846541417051524e-06,
"loss": 1.8676,
"step": 12300
},
{
"epoch": 0.59,
"learning_rate": 2.8440783293648015e-06,
"loss": 1.8022,
"step": 12400
},
{
"epoch": 0.59,
"learning_rate": 2.8415967149676773e-06,
"loss": 1.8365,
"step": 12500
},
{
"epoch": 0.6,
"learning_rate": 2.8390966080666035e-06,
"loss": 1.8702,
"step": 12600
},
{
"epoch": 0.6,
"learning_rate": 2.8365780431229317e-06,
"loss": 1.8221,
"step": 12700
},
{
"epoch": 0.61,
"learning_rate": 2.8340410548524395e-06,
"loss": 1.8498,
"step": 12800
},
{
"epoch": 0.61,
"learning_rate": 2.8314856782248494e-06,
"loss": 1.8906,
"step": 12900
},
{
"epoch": 0.61,
"learning_rate": 2.8289119484633485e-06,
"loss": 2.0184,
"step": 13000
},
{
"epoch": 0.62,
"learning_rate": 2.8263199010441038e-06,
"loss": 1.8205,
"step": 13100
},
{
"epoch": 0.62,
"learning_rate": 2.82370957169577e-06,
"loss": 1.9686,
"step": 13200
},
{
"epoch": 0.63,
"learning_rate": 2.8210809963990004e-06,
"loss": 1.7651,
"step": 13300
},
{
"epoch": 0.63,
"learning_rate": 2.8184342113859494e-06,
"loss": 1.8216,
"step": 13400
},
{
"epoch": 0.64,
"learning_rate": 2.815769253139773e-06,
"loss": 1.8081,
"step": 13500
},
{
"epoch": 0.64,
"learning_rate": 2.813086158394126e-06,
"loss": 1.7233,
"step": 13600
},
{
"epoch": 0.65,
"learning_rate": 2.8103849641326563e-06,
"loss": 1.8446,
"step": 13700
},
{
"epoch": 0.65,
"learning_rate": 2.807665707588494e-06,
"loss": 1.8379,
"step": 13800
},
{
"epoch": 0.66,
"learning_rate": 2.8049284262437393e-06,
"loss": 1.8149,
"step": 13900
},
{
"epoch": 0.66,
"learning_rate": 2.802173157828946e-06,
"loss": 1.9463,
"step": 14000
},
{
"epoch": 0.67,
"learning_rate": 2.799399940322599e-06,
"loss": 1.8382,
"step": 14100
},
{
"epoch": 0.67,
"learning_rate": 2.7966088119505945e-06,
"loss": 1.8039,
"step": 14200
},
{
"epoch": 0.68,
"learning_rate": 2.79379981118571e-06,
"loss": 2.0244,
"step": 14300
},
{
"epoch": 0.68,
"learning_rate": 2.7909729767470757e-06,
"loss": 1.8587,
"step": 14400
},
{
"epoch": 0.69,
"learning_rate": 2.7881283475996405e-06,
"loss": 1.8551,
"step": 14500
},
{
"epoch": 0.69,
"learning_rate": 2.7852659629536335e-06,
"loss": 1.9153,
"step": 14600
},
{
"epoch": 0.69,
"learning_rate": 2.782385862264027e-06,
"loss": 1.7548,
"step": 14700
},
{
"epoch": 0.7,
"learning_rate": 2.779488085229987e-06,
"loss": 1.8052,
"step": 14800
},
{
"epoch": 0.7,
"learning_rate": 2.7765726717943334e-06,
"loss": 1.7594,
"step": 14900
},
{
"epoch": 0.71,
"learning_rate": 2.773639662142983e-06,
"loss": 1.8186,
"step": 15000
},
{
"epoch": 0.71,
"learning_rate": 2.770689096704397e-06,
"loss": 1.9036,
"step": 15100
},
{
"epoch": 0.72,
"learning_rate": 2.7677210161490276e-06,
"loss": 1.8217,
"step": 15200
},
{
"epoch": 0.72,
"learning_rate": 2.7647354613887523e-06,
"loss": 1.8397,
"step": 15300
},
{
"epoch": 0.73,
"learning_rate": 2.761732473576313e-06,
"loss": 1.7251,
"step": 15400
},
{
"epoch": 0.73,
"learning_rate": 2.7587120941047475e-06,
"loss": 1.8731,
"step": 15500
},
{
"epoch": 0.74,
"learning_rate": 2.7556743646068202e-06,
"loss": 1.805,
"step": 15600
},
{
"epoch": 0.74,
"learning_rate": 2.752619326954447e-06,
"loss": 1.8677,
"step": 15700
},
{
"epoch": 0.75,
"learning_rate": 2.749547023258118e-06,
"loss": 1.8121,
"step": 15800
},
{
"epoch": 0.75,
"eval_loss": 1.4021737575531006,
"eval_runtime": 163.1438,
"eval_samples_per_second": 5.694,
"eval_steps_per_second": 5.694,
"step": 15870
},
{
"epoch": 0.75,
"learning_rate": 2.7464574958663186e-06,
"loss": 1.8015,
"step": 15900
},
{
"epoch": 0.76,
"learning_rate": 2.743350787364944e-06,
"loss": 1.7014,
"step": 16000
},
{
"epoch": 0.76,
"learning_rate": 2.7402269405767133e-06,
"loss": 1.7616,
"step": 16100
},
{
"epoch": 0.77,
"learning_rate": 2.7370859985605794e-06,
"loss": 1.7529,
"step": 16200
},
{
"epoch": 0.77,
"learning_rate": 2.7339280046111336e-06,
"loss": 1.7992,
"step": 16300
},
{
"epoch": 0.78,
"learning_rate": 2.7307530022580115e-06,
"loss": 1.5267,
"step": 16400
},
{
"epoch": 0.78,
"learning_rate": 2.7275610352652913e-06,
"loss": 1.6973,
"step": 16500
},
{
"epoch": 0.78,
"learning_rate": 2.7243521476308908e-06,
"loss": 1.813,
"step": 16600
},
{
"epoch": 0.79,
"learning_rate": 2.721126383585962e-06,
"loss": 1.842,
"step": 16700
},
{
"epoch": 0.79,
"learning_rate": 2.7178837875942787e-06,
"loss": 1.9349,
"step": 16800
},
{
"epoch": 0.8,
"learning_rate": 2.7146244043516273e-06,
"loss": 1.7218,
"step": 16900
},
{
"epoch": 0.8,
"learning_rate": 2.7113482787851883e-06,
"loss": 1.8096,
"step": 17000
},
{
"epoch": 0.81,
"learning_rate": 2.7080554560529164e-06,
"loss": 1.7827,
"step": 17100
},
{
"epoch": 0.81,
"learning_rate": 2.7047459815429214e-06,
"loss": 1.7434,
"step": 17200
},
{
"epoch": 0.82,
"learning_rate": 2.7014199008728377e-06,
"loss": 1.8203,
"step": 17300
},
{
"epoch": 0.82,
"learning_rate": 2.698077259889201e-06,
"loss": 1.7201,
"step": 17400
},
{
"epoch": 0.83,
"learning_rate": 2.6947181046668113e-06,
"loss": 1.8474,
"step": 17500
},
{
"epoch": 0.83,
"learning_rate": 2.691342481508102e-06,
"loss": 1.7868,
"step": 17600
},
{
"epoch": 0.84,
"learning_rate": 2.6879504369424983e-06,
"loss": 1.7272,
"step": 17700
},
{
"epoch": 0.84,
"learning_rate": 2.6845420177257774e-06,
"loss": 1.8764,
"step": 17800
},
{
"epoch": 0.85,
"learning_rate": 2.6811172708394243e-06,
"loss": 1.5964,
"step": 17900
},
{
"epoch": 0.85,
"learning_rate": 2.6776762434899845e-06,
"loss": 1.7725,
"step": 18000
},
{
"epoch": 0.86,
"learning_rate": 2.6742189831084106e-06,
"loss": 1.8118,
"step": 18100
},
{
"epoch": 0.86,
"learning_rate": 2.6707455373494125e-06,
"loss": 1.6714,
"step": 18200
},
{
"epoch": 0.86,
"learning_rate": 2.667255954090798e-06,
"loss": 1.7673,
"step": 18300
},
{
"epoch": 0.87,
"learning_rate": 2.6637502814328124e-06,
"loss": 1.8517,
"step": 18400
},
{
"epoch": 0.87,
"learning_rate": 2.6602285676974786e-06,
"loss": 1.7459,
"step": 18500
},
{
"epoch": 0.88,
"learning_rate": 2.6566908614279262e-06,
"loss": 1.8677,
"step": 18600
},
{
"epoch": 0.88,
"learning_rate": 2.6531372113877273e-06,
"loss": 1.8378,
"step": 18700
},
{
"epoch": 0.89,
"learning_rate": 2.649567666560222e-06,
"loss": 1.7712,
"step": 18800
},
{
"epoch": 0.89,
"learning_rate": 2.645982276147842e-06,
"loss": 1.7846,
"step": 18900
},
{
"epoch": 0.9,
"learning_rate": 2.6423810895714345e-06,
"loss": 1.7452,
"step": 19000
},
{
"epoch": 0.9,
"learning_rate": 2.6387641564695807e-06,
"loss": 1.8064,
"step": 19100
},
{
"epoch": 0.91,
"learning_rate": 2.635131526697911e-06,
"loss": 1.6403,
"step": 19200
},
{
"epoch": 0.91,
"learning_rate": 2.631483250328417e-06,
"loss": 1.7232,
"step": 19300
},
{
"epoch": 0.92,
"learning_rate": 2.627819377648764e-06,
"loss": 1.8836,
"step": 19400
},
{
"epoch": 0.92,
"learning_rate": 2.6241399591615938e-06,
"loss": 1.8373,
"step": 19500
},
{
"epoch": 0.93,
"learning_rate": 2.620445045583833e-06,
"loss": 1.7807,
"step": 19600
},
{
"epoch": 0.93,
"learning_rate": 2.6167346878459907e-06,
"loss": 1.8299,
"step": 19700
},
{
"epoch": 0.94,
"learning_rate": 2.6130089370914575e-06,
"loss": 1.8572,
"step": 19800
},
{
"epoch": 0.94,
"learning_rate": 2.609267844675801e-06,
"loss": 1.651,
"step": 19900
},
{
"epoch": 0.95,
"learning_rate": 2.605511462166057e-06,
"loss": 1.8989,
"step": 20000
},
{
"epoch": 0.95,
"learning_rate": 2.6017398413400198e-06,
"loss": 1.8421,
"step": 20100
},
{
"epoch": 0.95,
"learning_rate": 2.597953034185528e-06,
"loss": 1.8114,
"step": 20200
},
{
"epoch": 0.96,
"learning_rate": 2.5941510928997473e-06,
"loss": 1.8759,
"step": 20300
},
{
"epoch": 0.96,
"learning_rate": 2.590334069888451e-06,
"loss": 1.8544,
"step": 20400
},
{
"epoch": 0.97,
"learning_rate": 2.5865020177652995e-06,
"loss": 1.717,
"step": 20500
},
{
"epoch": 0.97,
"learning_rate": 2.5826549893511133e-06,
"loss": 1.7786,
"step": 20600
},
{
"epoch": 0.98,
"learning_rate": 2.578793037673145e-06,
"loss": 1.6818,
"step": 20700
},
{
"epoch": 0.98,
"learning_rate": 2.574916215964348e-06,
"loss": 1.6679,
"step": 20800
},
{
"epoch": 0.99,
"learning_rate": 2.5710245776626463e-06,
"loss": 1.8773,
"step": 20900
},
{
"epoch": 0.99,
"learning_rate": 2.5671181764101916e-06,
"loss": 1.6672,
"step": 21000
},
{
"epoch": 1.0,
"learning_rate": 2.56319706605263e-06,
"loss": 1.8333,
"step": 21100
},
{
"epoch": 1.0,
"eval_loss": 1.367815613746643,
"eval_runtime": 161.7042,
"eval_samples_per_second": 5.745,
"eval_steps_per_second": 5.745,
"step": 21160
},
{
"epoch": 1.0,
"learning_rate": 2.5592613006383554e-06,
"loss": 1.873,
"step": 21200
},
{
"epoch": 1.01,
"learning_rate": 2.5553109344177676e-06,
"loss": 1.7398,
"step": 21300
},
{
"epoch": 1.01,
"learning_rate": 2.5513460218425225e-06,
"loss": 1.8562,
"step": 21400
},
{
"epoch": 1.02,
"learning_rate": 2.5473666175647824e-06,
"loss": 1.8687,
"step": 21500
},
{
"epoch": 1.02,
"learning_rate": 2.543372776436463e-06,
"loss": 1.8159,
"step": 21600
},
{
"epoch": 1.03,
"learning_rate": 2.539364553508476e-06,
"loss": 1.7736,
"step": 21700
},
{
"epoch": 1.03,
"learning_rate": 2.5353420040299714e-06,
"loss": 1.8746,
"step": 21800
},
{
"epoch": 1.04,
"learning_rate": 2.531305183447576e-06,
"loss": 1.7582,
"step": 21900
},
{
"epoch": 1.04,
"learning_rate": 2.527254147404629e-06,
"loss": 1.9113,
"step": 22000
},
{
"epoch": 1.04,
"learning_rate": 2.5231889517404136e-06,
"loss": 1.8019,
"step": 22100
},
{
"epoch": 1.05,
"learning_rate": 2.5191096524893894e-06,
"loss": 1.8494,
"step": 22200
},
{
"epoch": 1.05,
"learning_rate": 2.5150163058804203e-06,
"loss": 1.698,
"step": 22300
},
{
"epoch": 1.06,
"learning_rate": 2.5109089683359967e-06,
"loss": 1.7218,
"step": 22400
},
{
"epoch": 1.06,
"learning_rate": 2.5067876964714582e-06,
"loss": 1.7944,
"step": 22500
},
{
"epoch": 1.07,
"learning_rate": 2.502652547094218e-06,
"loss": 1.8057,
"step": 22600
},
{
"epoch": 1.07,
"learning_rate": 2.4985035772029737e-06,
"loss": 1.677,
"step": 22700
},
{
"epoch": 1.08,
"learning_rate": 2.4943408439869243e-06,
"loss": 1.8319,
"step": 22800
},
{
"epoch": 1.08,
"learning_rate": 2.490164404824983e-06,
"loss": 1.742,
"step": 22900
},
{
"epoch": 1.09,
"learning_rate": 2.485974317284983e-06,
"loss": 1.7521,
"step": 23000
},
{
"epoch": 1.09,
"learning_rate": 2.4817706391228884e-06,
"loss": 1.8927,
"step": 23100
},
{
"epoch": 1.1,
"learning_rate": 2.4775534282819945e-06,
"loss": 1.6825,
"step": 23200
},
{
"epoch": 1.1,
"learning_rate": 2.473322742892131e-06,
"loss": 1.7289,
"step": 23300
},
{
"epoch": 1.11,
"learning_rate": 2.4690786412688594e-06,
"loss": 1.8572,
"step": 23400
},
{
"epoch": 1.11,
"learning_rate": 2.4648211819126706e-06,
"loss": 1.7959,
"step": 23500
},
{
"epoch": 1.12,
"learning_rate": 2.460550423508178e-06,
"loss": 1.765,
"step": 23600
},
{
"epoch": 1.12,
"learning_rate": 2.4562664249233064e-06,
"loss": 1.7334,
"step": 23700
},
{
"epoch": 1.12,
"learning_rate": 2.451969245208486e-06,
"loss": 1.6651,
"step": 23800
},
{
"epoch": 1.13,
"learning_rate": 2.4476589435958323e-06,
"loss": 1.7472,
"step": 23900
},
{
"epoch": 1.13,
"learning_rate": 2.4433355794983336e-06,
"loss": 1.8278,
"step": 24000
},
{
"epoch": 1.14,
"learning_rate": 2.43899921250903e-06,
"loss": 1.6537,
"step": 24100
},
{
"epoch": 1.14,
"learning_rate": 2.4346499024001946e-06,
"loss": 1.6281,
"step": 24200
},
{
"epoch": 1.15,
"learning_rate": 2.430287709122506e-06,
"loss": 1.8405,
"step": 24300
},
{
"epoch": 1.15,
"learning_rate": 2.425912692804224e-06,
"loss": 1.7661,
"step": 24400
},
{
"epoch": 1.16,
"learning_rate": 2.4215249137503624e-06,
"loss": 1.7644,
"step": 24500
},
{
"epoch": 1.16,
"learning_rate": 2.417124432441853e-06,
"loss": 1.6826,
"step": 24600
},
{
"epoch": 1.17,
"learning_rate": 2.412711309534717e-06,
"loss": 1.7262,
"step": 24700
},
{
"epoch": 1.17,
"learning_rate": 2.4082856058592265e-06,
"loss": 1.8845,
"step": 24800
},
{
"epoch": 1.18,
"learning_rate": 2.4038473824190656e-06,
"loss": 1.922,
"step": 24900
},
{
"epoch": 1.18,
"learning_rate": 2.399396700390491e-06,
"loss": 1.703,
"step": 25000
},
{
"epoch": 1.19,
"learning_rate": 2.394933621121487e-06,
"loss": 1.83,
"step": 25100
},
{
"epoch": 1.19,
"learning_rate": 2.3904582061309217e-06,
"loss": 1.6753,
"step": 25200
},
{
"epoch": 1.2,
"learning_rate": 2.3859705171076983e-06,
"loss": 1.8203,
"step": 25300
},
{
"epoch": 1.2,
"learning_rate": 2.3814706159099038e-06,
"loss": 1.7362,
"step": 25400
},
{
"epoch": 1.21,
"learning_rate": 2.376958564563958e-06,
"loss": 1.8836,
"step": 25500
},
{
"epoch": 1.21,
"learning_rate": 2.372434425263757e-06,
"loss": 1.7072,
"step": 25600
},
{
"epoch": 1.21,
"learning_rate": 2.367898260369818e-06,
"loss": 1.6916,
"step": 25700
},
{
"epoch": 1.22,
"learning_rate": 2.3633501324084165e-06,
"loss": 1.6549,
"step": 25800
},
{
"epoch": 1.22,
"learning_rate": 2.358790104070728e-06,
"loss": 1.7526,
"step": 25900
},
{
"epoch": 1.23,
"learning_rate": 2.354218238211962e-06,
"loss": 1.7785,
"step": 26000
},
{
"epoch": 1.23,
"learning_rate": 2.349634597850495e-06,
"loss": 1.7332,
"step": 26100
},
{
"epoch": 1.24,
"learning_rate": 2.3450392461670026e-06,
"loss": 1.7434,
"step": 26200
},
{
"epoch": 1.24,
"learning_rate": 2.3404322465035903e-06,
"loss": 1.8742,
"step": 26300
},
{
"epoch": 1.25,
"learning_rate": 2.3358136623629167e-06,
"loss": 1.6601,
"step": 26400
},
{
"epoch": 1.25,
"eval_loss": 1.3507641553878784,
"eval_runtime": 162.7404,
"eval_samples_per_second": 5.708,
"eval_steps_per_second": 5.708,
"step": 26450
},
{
"epoch": 1.25,
"learning_rate": 2.331183557407322e-06,
"loss": 1.7639,
"step": 26500
},
{
"epoch": 1.26,
"learning_rate": 2.3265419954579467e-06,
"loss": 1.849,
"step": 26600
},
{
"epoch": 1.26,
"learning_rate": 2.321889040493856e-06,
"loss": 1.9006,
"step": 26700
},
{
"epoch": 1.27,
"learning_rate": 2.317224756651156e-06,
"loss": 1.6524,
"step": 26800
},
{
"epoch": 1.27,
"learning_rate": 2.3125492082221074e-06,
"loss": 1.8237,
"step": 26900
},
{
"epoch": 1.28,
"learning_rate": 2.307862459654243e-06,
"loss": 1.7348,
"step": 27000
},
{
"epoch": 1.28,
"learning_rate": 2.303164575549478e-06,
"loss": 1.6887,
"step": 27100
},
{
"epoch": 1.29,
"learning_rate": 2.298455620663217e-06,
"loss": 1.7558,
"step": 27200
},
{
"epoch": 1.29,
"learning_rate": 2.293735659903468e-06,
"loss": 1.8181,
"step": 27300
},
{
"epoch": 1.3,
"learning_rate": 2.2890047583299385e-06,
"loss": 1.7344,
"step": 27400
},
{
"epoch": 1.3,
"learning_rate": 2.284262981153147e-06,
"loss": 1.8456,
"step": 27500
},
{
"epoch": 1.3,
"learning_rate": 2.27951039373352e-06,
"loss": 1.711,
"step": 27600
},
{
"epoch": 1.31,
"learning_rate": 2.2747470615804907e-06,
"loss": 1.7673,
"step": 27700
},
{
"epoch": 1.31,
"learning_rate": 2.269973050351599e-06,
"loss": 1.7957,
"step": 27800
},
{
"epoch": 1.32,
"learning_rate": 2.265188425851583e-06,
"loss": 1.6838,
"step": 27900
},
{
"epoch": 1.32,
"learning_rate": 2.260393254031475e-06,
"loss": 1.6342,
"step": 28000
},
{
"epoch": 1.33,
"learning_rate": 2.2555876009876904e-06,
"loss": 1.8296,
"step": 28100
},
{
"epoch": 1.33,
"learning_rate": 2.250771532961118e-06,
"loss": 1.7831,
"step": 28200
},
{
"epoch": 1.34,
"learning_rate": 2.2459451163362036e-06,
"loss": 1.7551,
"step": 28300
},
{
"epoch": 1.34,
"learning_rate": 2.241108417640041e-06,
"loss": 1.708,
"step": 28400
},
{
"epoch": 1.35,
"learning_rate": 2.2362615035414496e-06,
"loss": 1.7695,
"step": 28500
},
{
"epoch": 1.35,
"learning_rate": 2.231404440850058e-06,
"loss": 1.6231,
"step": 28600
},
{
"epoch": 1.36,
"learning_rate": 2.2265372965153827e-06,
"loss": 1.7269,
"step": 28700
},
{
"epoch": 1.36,
"learning_rate": 2.2216601376259044e-06,
"loss": 1.6641,
"step": 28800
},
{
"epoch": 1.37,
"learning_rate": 2.2167730314081447e-06,
"loss": 1.7724,
"step": 28900
},
{
"epoch": 1.37,
"learning_rate": 2.211876045225738e-06,
"loss": 1.909,
"step": 29000
},
{
"epoch": 1.38,
"learning_rate": 2.2069692465785034e-06,
"loss": 1.7163,
"step": 29100
},
{
"epoch": 1.38,
"learning_rate": 2.202052703101516e-06,
"loss": 1.857,
"step": 29200
},
{
"epoch": 1.38,
"learning_rate": 2.1971264825641716e-06,
"loss": 1.6806,
"step": 29300
},
{
"epoch": 1.39,
"learning_rate": 2.1921906528692556e-06,
"loss": 1.7828,
"step": 29400
},
{
"epoch": 1.39,
"learning_rate": 2.187245282052004e-06,
"loss": 1.7669,
"step": 29500
},
{
"epoch": 1.4,
"learning_rate": 2.1822904382791686e-06,
"loss": 1.7001,
"step": 29600
},
{
"epoch": 1.4,
"learning_rate": 2.1773261898480747e-06,
"loss": 1.6504,
"step": 29700
},
{
"epoch": 1.41,
"learning_rate": 2.172352605185682e-06,
"loss": 1.6888,
"step": 29800
},
{
"epoch": 1.41,
"learning_rate": 2.167369752847639e-06,
"loss": 1.6804,
"step": 29900
},
{
"epoch": 1.42,
"learning_rate": 2.162377701517341e-06,
"loss": 1.5615,
"step": 30000
},
{
"epoch": 1.42,
"learning_rate": 2.1573765200049817e-06,
"loss": 1.6089,
"step": 30100
},
{
"epoch": 1.43,
"learning_rate": 2.1523662772466025e-06,
"loss": 1.7575,
"step": 30200
},
{
"epoch": 1.43,
"learning_rate": 2.1473470423031475e-06,
"loss": 1.6443,
"step": 30300
},
{
"epoch": 1.44,
"learning_rate": 2.1423188843595067e-06,
"loss": 1.6201,
"step": 30400
},
{
"epoch": 1.44,
"learning_rate": 2.1372818727235653e-06,
"loss": 1.7594,
"step": 30500
},
{
"epoch": 1.45,
"learning_rate": 2.132236076825247e-06,
"loss": 1.6505,
"step": 30600
},
{
"epoch": 1.45,
"learning_rate": 2.127181566215557e-06,
"loss": 1.8139,
"step": 30700
},
{
"epoch": 1.46,
"learning_rate": 2.122118410565624e-06,
"loss": 1.738,
"step": 30800
},
{
"epoch": 1.46,
"learning_rate": 2.11704667966574e-06,
"loss": 1.693,
"step": 30900
},
{
"epoch": 1.47,
"learning_rate": 2.111966443424397e-06,
"loss": 1.8003,
"step": 31000
},
{
"epoch": 1.47,
"learning_rate": 2.1068777718673254e-06,
"loss": 1.8407,
"step": 31100
},
{
"epoch": 1.47,
"learning_rate": 2.101780735136526e-06,
"loss": 1.5816,
"step": 31200
},
{
"epoch": 1.48,
"learning_rate": 2.0966754034893047e-06,
"loss": 1.6609,
"step": 31300
},
{
"epoch": 1.48,
"learning_rate": 2.0915618472973062e-06,
"loss": 1.7292,
"step": 31400
},
{
"epoch": 1.49,
"learning_rate": 2.0864401370455406e-06,
"loss": 1.7347,
"step": 31500
},
{
"epoch": 1.49,
"learning_rate": 2.081310343331413e-06,
"loss": 1.748,
"step": 31600
},
{
"epoch": 1.5,
"learning_rate": 2.0761725368637496e-06,
"loss": 1.5452,
"step": 31700
},
{
"epoch": 1.5,
"eval_loss": 1.3357341289520264,
"eval_runtime": 162.3538,
"eval_samples_per_second": 5.722,
"eval_steps_per_second": 5.722,
"step": 31740
},
{
"epoch": 1.5,
"learning_rate": 2.0710267884618273e-06,
"loss": 1.6686,
"step": 31800
},
{
"epoch": 1.51,
"learning_rate": 2.0658731690543905e-06,
"loss": 1.72,
"step": 31900
},
{
"epoch": 1.51,
"learning_rate": 2.0607117496786794e-06,
"loss": 1.7252,
"step": 32000
},
{
"epoch": 1.52,
"learning_rate": 2.0555426014794477e-06,
"loss": 1.6562,
"step": 32100
},
{
"epoch": 1.52,
"learning_rate": 2.050365795707983e-06,
"loss": 1.6878,
"step": 32200
},
{
"epoch": 1.53,
"learning_rate": 2.0451814037211256e-06,
"loss": 1.7308,
"step": 32300
},
{
"epoch": 1.53,
"learning_rate": 2.0399894969802814e-06,
"loss": 1.6544,
"step": 32400
},
{
"epoch": 1.54,
"learning_rate": 2.034790147050442e-06,
"loss": 1.7115,
"step": 32500
},
{
"epoch": 1.54,
"learning_rate": 2.0295834255991927e-06,
"loss": 1.8076,
"step": 32600
},
{
"epoch": 1.55,
"learning_rate": 2.024369404395731e-06,
"loss": 1.6923,
"step": 32700
},
{
"epoch": 1.55,
"learning_rate": 2.01914815530987e-06,
"loss": 1.8198,
"step": 32800
},
{
"epoch": 1.56,
"learning_rate": 2.013919750311055e-06,
"loss": 1.5914,
"step": 32900
},
{
"epoch": 1.56,
"learning_rate": 2.008684261467365e-06,
"loss": 1.7334,
"step": 33000
},
{
"epoch": 1.56,
"learning_rate": 2.003441760944525e-06,
"loss": 1.6914,
"step": 33100
},
{
"epoch": 1.57,
"learning_rate": 1.998192321004908e-06,
"loss": 1.5967,
"step": 33200
},
{
"epoch": 1.57,
"learning_rate": 1.992936014006538e-06,
"loss": 1.6271,
"step": 33300
},
{
"epoch": 1.58,
"learning_rate": 1.9876729124020963e-06,
"loss": 1.5439,
"step": 33400
},
{
"epoch": 1.58,
"learning_rate": 1.982403088737918e-06,
"loss": 1.5242,
"step": 33500
},
{
"epoch": 1.59,
"learning_rate": 1.977126615652999e-06,
"loss": 1.7863,
"step": 33600
},
{
"epoch": 1.59,
"learning_rate": 1.9718435658779864e-06,
"loss": 1.7852,
"step": 33700
},
{
"epoch": 1.6,
"learning_rate": 1.9665540122341817e-06,
"loss": 1.7474,
"step": 33800
},
{
"epoch": 1.6,
"learning_rate": 1.9612580276325363e-06,
"loss": 1.818,
"step": 33900
},
{
"epoch": 1.61,
"learning_rate": 1.9559556850726433e-06,
"loss": 1.8187,
"step": 34000
},
{
"epoch": 1.61,
"learning_rate": 1.9506470576417362e-06,
"loss": 1.6308,
"step": 34100
},
{
"epoch": 1.62,
"learning_rate": 1.9453322185136772e-06,
"loss": 1.5877,
"step": 34200
},
{
"epoch": 1.62,
"learning_rate": 1.9400112409479507e-06,
"loss": 1.5775,
"step": 34300
},
{
"epoch": 1.63,
"learning_rate": 1.9346841982886527e-06,
"loss": 1.6369,
"step": 34400
},
{
"epoch": 1.63,
"learning_rate": 1.929351163963481e-06,
"loss": 1.7436,
"step": 34500
},
{
"epoch": 1.64,
"learning_rate": 1.924012211482721e-06,
"loss": 1.7817,
"step": 34600
},
{
"epoch": 1.64,
"learning_rate": 1.918667414438235e-06,
"loss": 1.7958,
"step": 34700
},
{
"epoch": 1.64,
"learning_rate": 1.9133168465024454e-06,
"loss": 1.6632,
"step": 34800
},
{
"epoch": 1.65,
"learning_rate": 1.907960581427321e-06,
"loss": 1.7518,
"step": 34900
},
{
"epoch": 1.65,
"learning_rate": 1.9025986930433594e-06,
"loss": 1.7184,
"step": 35000
},
{
"epoch": 1.66,
"learning_rate": 1.8972312552585695e-06,
"loss": 1.6154,
"step": 35100
},
{
"epoch": 1.66,
"learning_rate": 1.891858342057453e-06,
"loss": 1.7069,
"step": 35200
},
{
"epoch": 1.67,
"learning_rate": 1.8864800274999842e-06,
"loss": 1.6902,
"step": 35300
},
{
"epoch": 1.67,
"learning_rate": 1.8810963857205902e-06,
"loss": 1.6736,
"step": 35400
},
{
"epoch": 1.68,
"learning_rate": 1.8757074909271275e-06,
"loss": 1.7893,
"step": 35500
},
{
"epoch": 1.68,
"learning_rate": 1.8703134173998603e-06,
"loss": 1.7374,
"step": 35600
},
{
"epoch": 1.69,
"learning_rate": 1.864914239490436e-06,
"loss": 1.7173,
"step": 35700
},
{
"epoch": 1.69,
"learning_rate": 1.8595100316208608e-06,
"loss": 1.6844,
"step": 35800
},
{
"epoch": 1.7,
"learning_rate": 1.854100868282473e-06,
"loss": 1.6794,
"step": 35900
},
{
"epoch": 1.7,
"learning_rate": 1.8486868240349173e-06,
"loss": 1.65,
"step": 36000
},
{
"epoch": 1.71,
"learning_rate": 1.8432679735051177e-06,
"loss": 1.6641,
"step": 36100
},
{
"epoch": 1.71,
"learning_rate": 1.8378443913862453e-06,
"loss": 1.6942,
"step": 36200
},
{
"epoch": 1.72,
"learning_rate": 1.8324161524366935e-06,
"loss": 1.782,
"step": 36300
},
{
"epoch": 1.72,
"learning_rate": 1.8269833314790437e-06,
"loss": 1.5728,
"step": 36400
},
{
"epoch": 1.73,
"learning_rate": 1.8215460033990368e-06,
"loss": 1.6751,
"step": 36500
},
{
"epoch": 1.73,
"learning_rate": 1.8161042431445376e-06,
"loss": 1.5691,
"step": 36600
},
{
"epoch": 1.73,
"learning_rate": 1.8106581257245064e-06,
"loss": 1.7601,
"step": 36700
},
{
"epoch": 1.74,
"learning_rate": 1.8052077262079612e-06,
"loss": 1.6157,
"step": 36800
},
{
"epoch": 1.74,
"learning_rate": 1.799753119722943e-06,
"loss": 1.7615,
"step": 36900
},
{
"epoch": 1.75,
"learning_rate": 1.7942943814554837e-06,
"loss": 1.7381,
"step": 37000
},
{
"epoch": 1.75,
"eval_loss": 1.319101095199585,
"eval_runtime": 162.3139,
"eval_samples_per_second": 5.723,
"eval_steps_per_second": 5.723,
"step": 37030
},
{
"epoch": 1.75,
"learning_rate": 1.7888315866485659e-06,
"loss": 1.7177,
"step": 37100
},
{
"epoch": 1.76,
"learning_rate": 1.7833648106010884e-06,
"loss": 1.7527,
"step": 37200
},
{
"epoch": 1.76,
"learning_rate": 1.7778941286668257e-06,
"loss": 1.6938,
"step": 37300
},
{
"epoch": 1.77,
"learning_rate": 1.772419616253393e-06,
"loss": 1.7706,
"step": 37400
},
{
"epoch": 1.77,
"learning_rate": 1.7669413488212027e-06,
"loss": 1.6078,
"step": 37500
},
{
"epoch": 1.78,
"learning_rate": 1.761459401882427e-06,
"loss": 1.6867,
"step": 37600
},
{
"epoch": 1.78,
"learning_rate": 1.755973850999957e-06,
"loss": 1.6677,
"step": 37700
},
{
"epoch": 1.79,
"learning_rate": 1.750484771786358e-06,
"loss": 1.6582,
"step": 37800
},
{
"epoch": 1.79,
"learning_rate": 1.7449922399028333e-06,
"loss": 1.6047,
"step": 37900
},
{
"epoch": 1.8,
"learning_rate": 1.7394963310581735e-06,
"loss": 1.8746,
"step": 38000
},
{
"epoch": 1.8,
"learning_rate": 1.733997121007721e-06,
"loss": 1.549,
"step": 38100
},
{
"epoch": 1.81,
"learning_rate": 1.7284946855523186e-06,
"loss": 1.7323,
"step": 38200
},
{
"epoch": 1.81,
"learning_rate": 1.7229891005372704e-06,
"loss": 1.734,
"step": 38300
},
{
"epoch": 1.82,
"learning_rate": 1.7174804418512918e-06,
"loss": 1.6329,
"step": 38400
},
{
"epoch": 1.82,
"learning_rate": 1.7119687854254674e-06,
"loss": 1.5707,
"step": 38500
},
{
"epoch": 1.82,
"learning_rate": 1.7064542072322015e-06,
"loss": 1.7011,
"step": 38600
},
{
"epoch": 1.83,
"learning_rate": 1.7009367832841715e-06,
"loss": 1.6164,
"step": 38700
},
{
"epoch": 1.83,
"learning_rate": 1.6954165896332817e-06,
"loss": 1.6312,
"step": 38800
},
{
"epoch": 1.84,
"learning_rate": 1.6898937023696123e-06,
"loss": 1.7649,
"step": 38900
},
{
"epoch": 1.84,
"learning_rate": 1.6843681976203744e-06,
"loss": 1.6634,
"step": 39000
},
{
"epoch": 1.85,
"learning_rate": 1.6788401515488557e-06,
"loss": 1.6431,
"step": 39100
},
{
"epoch": 1.85,
"learning_rate": 1.673309640353376e-06,
"loss": 1.7147,
"step": 39200
},
{
"epoch": 1.86,
"learning_rate": 1.6677767402662318e-06,
"loss": 1.881,
"step": 39300
},
{
"epoch": 1.86,
"learning_rate": 1.6622415275526502e-06,
"loss": 1.6384,
"step": 39400
},
{
"epoch": 1.87,
"learning_rate": 1.6567040785097333e-06,
"loss": 1.6662,
"step": 39500
},
{
"epoch": 1.87,
"learning_rate": 1.6511644694654109e-06,
"loss": 1.6323,
"step": 39600
},
{
"epoch": 1.88,
"learning_rate": 1.6456227767773842e-06,
"loss": 1.7642,
"step": 39700
},
{
"epoch": 1.88,
"learning_rate": 1.6400790768320761e-06,
"loss": 1.6971,
"step": 39800
},
{
"epoch": 1.89,
"learning_rate": 1.6345334460435775e-06,
"loss": 1.7224,
"step": 39900
},
{
"epoch": 1.89,
"learning_rate": 1.6289859608525936e-06,
"loss": 1.7847,
"step": 40000
},
{
"epoch": 1.9,
"learning_rate": 1.623436697725391e-06,
"loss": 1.6998,
"step": 40100
},
{
"epoch": 1.9,
"learning_rate": 1.6178857331527427e-06,
"loss": 1.7637,
"step": 40200
},
{
"epoch": 1.9,
"learning_rate": 1.6123331436488752e-06,
"loss": 1.738,
"step": 40300
},
{
"epoch": 1.91,
"learning_rate": 1.6067790057504125e-06,
"loss": 1.8809,
"step": 40400
},
{
"epoch": 1.91,
"learning_rate": 1.6012233960153213e-06,
"loss": 1.6865,
"step": 40500
},
{
"epoch": 1.92,
"learning_rate": 1.5956663910218566e-06,
"loss": 1.7502,
"step": 40600
},
{
"epoch": 1.92,
"learning_rate": 1.590108067367505e-06,
"loss": 1.7131,
"step": 40700
},
{
"epoch": 1.93,
"learning_rate": 1.58454850166793e-06,
"loss": 1.6668,
"step": 40800
},
{
"epoch": 1.93,
"learning_rate": 1.5789877705559149e-06,
"loss": 1.6616,
"step": 40900
},
{
"epoch": 1.94,
"learning_rate": 1.573425950680308e-06,
"loss": 1.8484,
"step": 41000
},
{
"epoch": 1.94,
"learning_rate": 1.567863118704963e-06,
"loss": 1.722,
"step": 41100
},
{
"epoch": 1.95,
"learning_rate": 1.562299351307686e-06,
"loss": 1.6145,
"step": 41200
},
{
"epoch": 1.95,
"learning_rate": 1.5567347251791773e-06,
"loss": 1.744,
"step": 41300
},
{
"epoch": 1.96,
"learning_rate": 1.5511693170219723e-06,
"loss": 1.7476,
"step": 41400
},
{
"epoch": 1.96,
"learning_rate": 1.5456032035493878e-06,
"loss": 1.6705,
"step": 41500
},
{
"epoch": 1.97,
"learning_rate": 1.5400364614844604e-06,
"loss": 1.5381,
"step": 41600
},
{
"epoch": 1.97,
"learning_rate": 1.5344691675588926e-06,
"loss": 1.7072,
"step": 41700
},
{
"epoch": 1.98,
"learning_rate": 1.5289013985119934e-06,
"loss": 1.7217,
"step": 41800
},
{
"epoch": 1.98,
"learning_rate": 1.5233332310896214e-06,
"loss": 1.6447,
"step": 41900
},
{
"epoch": 1.99,
"learning_rate": 1.5177647420431253e-06,
"loss": 1.6961,
"step": 42000
},
{
"epoch": 1.99,
"learning_rate": 1.5121960081282878e-06,
"loss": 1.8037,
"step": 42100
},
{
"epoch": 1.99,
"learning_rate": 1.5066271061042672e-06,
"loss": 1.6076,
"step": 42200
},
{
"epoch": 2.0,
"learning_rate": 1.5010581127325374e-06,
"loss": 1.6256,
"step": 42300
},
{
"epoch": 2.0,
"eval_loss": 1.309001088142395,
"eval_runtime": 163.9053,
"eval_samples_per_second": 5.668,
"eval_steps_per_second": 5.668,
"step": 42320
},
{
"epoch": 2.0,
"learning_rate": 1.4954891047758328e-06,
"loss": 1.6049,
"step": 42400
},
{
"epoch": 2.01,
"learning_rate": 1.489920158997089e-06,
"loss": 1.5866,
"step": 42500
},
{
"epoch": 2.01,
"learning_rate": 1.4843513521583844e-06,
"loss": 1.6174,
"step": 42600
},
{
"epoch": 2.02,
"learning_rate": 1.4787827610198813e-06,
"loss": 1.711,
"step": 42700
},
{
"epoch": 2.02,
"learning_rate": 1.4732144623387696e-06,
"loss": 1.6283,
"step": 42800
},
{
"epoch": 2.03,
"learning_rate": 1.4676465328682085e-06,
"loss": 1.7035,
"step": 42900
},
{
"epoch": 2.03,
"learning_rate": 1.4620790493562662e-06,
"loss": 1.6869,
"step": 43000
},
{
"epoch": 2.04,
"learning_rate": 1.4565120885448656e-06,
"loss": 1.6827,
"step": 43100
},
{
"epoch": 2.04,
"learning_rate": 1.4509457271687238e-06,
"loss": 1.7237,
"step": 43200
},
{
"epoch": 2.05,
"learning_rate": 1.4453800419542962e-06,
"loss": 1.6418,
"step": 43300
},
{
"epoch": 2.05,
"learning_rate": 1.4398151096187167e-06,
"loss": 1.7514,
"step": 43400
},
{
"epoch": 2.06,
"learning_rate": 1.434251006868743e-06,
"loss": 1.7102,
"step": 43500
},
{
"epoch": 2.06,
"learning_rate": 1.4286878103996967e-06,
"loss": 1.6147,
"step": 43600
},
{
"epoch": 2.07,
"learning_rate": 1.4231255968944078e-06,
"loss": 1.557,
"step": 43700
},
{
"epoch": 2.07,
"learning_rate": 1.4175644430221568e-06,
"loss": 1.6971,
"step": 43800
},
{
"epoch": 2.07,
"learning_rate": 1.412004425437619e-06,
"loss": 1.6645,
"step": 43900
},
{
"epoch": 2.08,
"learning_rate": 1.4064456207798066e-06,
"loss": 1.688,
"step": 44000
},
{
"epoch": 2.08,
"learning_rate": 1.4008881056710125e-06,
"loss": 1.7062,
"step": 44100
},
{
"epoch": 2.09,
"learning_rate": 1.3953319567157556e-06,
"loss": 1.5745,
"step": 44200
},
{
"epoch": 2.09,
"learning_rate": 1.3897772504997228e-06,
"loss": 1.5922,
"step": 44300
},
{
"epoch": 2.1,
"learning_rate": 1.3842240635887154e-06,
"loss": 1.7366,
"step": 44400
},
{
"epoch": 2.1,
"learning_rate": 1.3786724725275911e-06,
"loss": 1.7974,
"step": 44500
},
{
"epoch": 2.11,
"learning_rate": 1.3731225538392125e-06,
"loss": 1.7394,
"step": 44600
},
{
"epoch": 2.11,
"learning_rate": 1.367574384023388e-06,
"loss": 1.7766,
"step": 44700
},
{
"epoch": 2.12,
"learning_rate": 1.3620280395558218e-06,
"loss": 1.631,
"step": 44800
},
{
"epoch": 2.12,
"learning_rate": 1.3564835968870557e-06,
"loss": 1.6251,
"step": 44900
},
{
"epoch": 2.13,
"learning_rate": 1.3509411324414191e-06,
"loss": 1.6983,
"step": 45000
},
{
"epoch": 2.13,
"learning_rate": 1.345400722615972e-06,
"loss": 1.6382,
"step": 45100
},
{
"epoch": 2.14,
"learning_rate": 1.3398624437794549e-06,
"loss": 1.6588,
"step": 45200
},
{
"epoch": 2.14,
"learning_rate": 1.3343263722712342e-06,
"loss": 1.8123,
"step": 45300
},
{
"epoch": 2.15,
"learning_rate": 1.3287925844002496e-06,
"loss": 1.6796,
"step": 45400
},
{
"epoch": 2.15,
"learning_rate": 1.3232611564439656e-06,
"loss": 1.5431,
"step": 45500
},
{
"epoch": 2.16,
"learning_rate": 1.3177321646473154e-06,
"loss": 1.57,
"step": 45600
},
{
"epoch": 2.16,
"learning_rate": 1.3122056852216538e-06,
"loss": 1.6356,
"step": 45700
},
{
"epoch": 2.16,
"learning_rate": 1.3066817943437054e-06,
"loss": 1.6333,
"step": 45800
},
{
"epoch": 2.17,
"learning_rate": 1.3011605681545126e-06,
"loss": 1.595,
"step": 45900
},
{
"epoch": 2.17,
"learning_rate": 1.29564208275839e-06,
"loss": 1.5615,
"step": 46000
},
{
"epoch": 2.18,
"learning_rate": 1.2901264142218712e-06,
"loss": 1.7929,
"step": 46100
},
{
"epoch": 2.18,
"learning_rate": 1.2846136385726644e-06,
"loss": 1.8091,
"step": 46200
},
{
"epoch": 2.19,
"learning_rate": 1.2791038317986009e-06,
"loss": 1.6715,
"step": 46300
},
{
"epoch": 2.19,
"learning_rate": 1.2735970698465896e-06,
"loss": 1.6615,
"step": 46400
},
{
"epoch": 2.2,
"learning_rate": 1.2680934286215696e-06,
"loss": 1.6615,
"step": 46500
},
{
"epoch": 2.2,
"learning_rate": 1.2625929839854644e-06,
"loss": 1.7039,
"step": 46600
},
{
"epoch": 2.21,
"learning_rate": 1.2570958117561357e-06,
"loss": 1.7209,
"step": 46700
},
{
"epoch": 2.21,
"learning_rate": 1.2516019877063388e-06,
"loss": 1.7251,
"step": 46800
},
{
"epoch": 2.22,
"learning_rate": 1.2461115875626768e-06,
"loss": 1.7202,
"step": 46900
},
{
"epoch": 2.22,
"learning_rate": 1.2406246870045588e-06,
"loss": 1.7948,
"step": 47000
},
{
"epoch": 2.23,
"learning_rate": 1.2351413616631561e-06,
"loss": 1.6631,
"step": 47100
},
{
"epoch": 2.23,
"learning_rate": 1.2296616871203584e-06,
"loss": 1.6321,
"step": 47200
},
{
"epoch": 2.24,
"learning_rate": 1.2241857389077332e-06,
"loss": 1.7737,
"step": 47300
},
{
"epoch": 2.24,
"learning_rate": 1.2187135925054852e-06,
"loss": 1.5694,
"step": 47400
},
{
"epoch": 2.25,
"learning_rate": 1.2132453233414145e-06,
"loss": 1.7562,
"step": 47500
},
{
"epoch": 2.25,
"learning_rate": 1.207781006789877e-06,
"loss": 1.5521,
"step": 47600
},
{
"epoch": 2.25,
"eval_loss": 1.2960591316223145,
"eval_runtime": 158.903,
"eval_samples_per_second": 5.846,
"eval_steps_per_second": 5.846,
"step": 47610
},
{
"epoch": 2.25,
"learning_rate": 1.202320718170748e-06,
"loss": 1.6698,
"step": 47700
},
{
"epoch": 2.26,
"learning_rate": 1.1968645327483792e-06,
"loss": 1.5465,
"step": 47800
},
{
"epoch": 2.26,
"learning_rate": 1.1914125257305654e-06,
"loss": 1.6406,
"step": 47900
},
{
"epoch": 2.27,
"learning_rate": 1.1859647722675075e-06,
"loss": 1.6434,
"step": 48000
},
{
"epoch": 2.27,
"learning_rate": 1.1805213474507738e-06,
"loss": 1.5834,
"step": 48100
},
{
"epoch": 2.28,
"learning_rate": 1.1750823263122683e-06,
"loss": 1.683,
"step": 48200
},
{
"epoch": 2.28,
"learning_rate": 1.169647783823193e-06,
"loss": 1.5975,
"step": 48300
},
{
"epoch": 2.29,
"learning_rate": 1.1642177948930188e-06,
"loss": 1.6729,
"step": 48400
},
{
"epoch": 2.29,
"learning_rate": 1.1587924343684486e-06,
"loss": 1.688,
"step": 48500
},
{
"epoch": 2.3,
"learning_rate": 1.1533717770323887e-06,
"loss": 1.6362,
"step": 48600
},
{
"epoch": 2.3,
"learning_rate": 1.1479558976029164e-06,
"loss": 1.7004,
"step": 48700
},
{
"epoch": 2.31,
"learning_rate": 1.1425448707322505e-06,
"loss": 1.6087,
"step": 48800
},
{
"epoch": 2.31,
"learning_rate": 1.137138771005723e-06,
"loss": 1.6815,
"step": 48900
},
{
"epoch": 2.32,
"learning_rate": 1.1317376729407493e-06,
"loss": 1.5914,
"step": 49000
},
{
"epoch": 2.32,
"learning_rate": 1.1263416509858032e-06,
"loss": 1.5619,
"step": 49100
},
{
"epoch": 2.33,
"learning_rate": 1.1209507795193888e-06,
"loss": 1.6197,
"step": 49200
},
{
"epoch": 2.33,
"learning_rate": 1.1155651328490174e-06,
"loss": 1.6824,
"step": 49300
},
{
"epoch": 2.33,
"learning_rate": 1.11018478521018e-06,
"loss": 1.7277,
"step": 49400
},
{
"epoch": 2.34,
"learning_rate": 1.1048098107653282e-06,
"loss": 1.6273,
"step": 49500
},
{
"epoch": 2.34,
"learning_rate": 1.0994402836028472e-06,
"loss": 1.6803,
"step": 49600
},
{
"epoch": 2.35,
"learning_rate": 1.0940762777360401e-06,
"loss": 1.5929,
"step": 49700
},
{
"epoch": 2.35,
"learning_rate": 1.0887178671021024e-06,
"loss": 1.6484,
"step": 49800
},
{
"epoch": 2.36,
"learning_rate": 1.0833651255611058e-06,
"loss": 1.7423,
"step": 49900
},
{
"epoch": 2.36,
"learning_rate": 1.0780181268949805e-06,
"loss": 1.6847,
"step": 50000
},
{
"epoch": 2.37,
"learning_rate": 1.0726769448064956e-06,
"loss": 1.6074,
"step": 50100
},
{
"epoch": 2.37,
"learning_rate": 1.0673416529182462e-06,
"loss": 1.7478,
"step": 50200
},
{
"epoch": 2.38,
"learning_rate": 1.0620123247716362e-06,
"loss": 1.7042,
"step": 50300
},
{
"epoch": 2.38,
"learning_rate": 1.0566890338258655e-06,
"loss": 1.6337,
"step": 50400
},
{
"epoch": 2.39,
"learning_rate": 1.0513718534569187e-06,
"loss": 1.7174,
"step": 50500
},
{
"epoch": 2.39,
"learning_rate": 1.0460608569565506e-06,
"loss": 1.6805,
"step": 50600
},
{
"epoch": 2.4,
"learning_rate": 1.0407561175312802e-06,
"loss": 1.5872,
"step": 50700
},
{
"epoch": 2.4,
"learning_rate": 1.035457708301377e-06,
"loss": 1.7103,
"step": 50800
},
{
"epoch": 2.41,
"learning_rate": 1.0301657022998575e-06,
"loss": 1.7544,
"step": 50900
},
{
"epoch": 2.41,
"learning_rate": 1.0248801724714746e-06,
"loss": 1.6165,
"step": 51000
},
{
"epoch": 2.42,
"learning_rate": 1.019601191671715e-06,
"loss": 1.5813,
"step": 51100
},
{
"epoch": 2.42,
"learning_rate": 1.0143288326657935e-06,
"loss": 1.6332,
"step": 51200
},
{
"epoch": 2.42,
"learning_rate": 1.0090631681276508e-06,
"loss": 1.7332,
"step": 51300
},
{
"epoch": 2.43,
"learning_rate": 1.0038042706389505e-06,
"loss": 1.5387,
"step": 51400
},
{
"epoch": 2.43,
"learning_rate": 9.985522126880806e-07,
"loss": 1.5534,
"step": 51500
},
{
"epoch": 2.44,
"learning_rate": 9.93307066669153e-07,
"loss": 1.6457,
"step": 51600
},
{
"epoch": 2.44,
"learning_rate": 9.880689048810049e-07,
"loss": 1.6818,
"step": 51700
},
{
"epoch": 2.45,
"learning_rate": 9.828377995262048e-07,
"loss": 1.5609,
"step": 51800
},
{
"epoch": 2.45,
"learning_rate": 9.77613822710054e-07,
"loss": 1.7747,
"step": 51900
},
{
"epoch": 2.46,
"learning_rate": 9.72397046439596e-07,
"loss": 1.7221,
"step": 52000
},
{
"epoch": 2.46,
"learning_rate": 9.671875426226204e-07,
"loss": 1.7983,
"step": 52100
},
{
"epoch": 2.47,
"learning_rate": 9.61985383066676e-07,
"loss": 1.6314,
"step": 52200
},
{
"epoch": 2.47,
"learning_rate": 9.567906394780763e-07,
"loss": 1.6959,
"step": 52300
},
{
"epoch": 2.48,
"learning_rate": 9.516033834609155e-07,
"loss": 1.6105,
"step": 52400
},
{
"epoch": 2.48,
"learning_rate": 9.464236865160779e-07,
"loss": 1.573,
"step": 52500
},
{
"epoch": 2.49,
"learning_rate": 9.412516200402556e-07,
"loss": 1.6789,
"step": 52600
},
{
"epoch": 2.49,
"learning_rate": 9.360872553249605e-07,
"loss": 1.7057,
"step": 52700
},
{
"epoch": 2.5,
"learning_rate": 9.30930663555545e-07,
"loss": 1.6102,
"step": 52800
},
{
"epoch": 2.5,
"learning_rate": 9.257819158102203e-07,
"loss": 1.8318,
"step": 52900
},
{
"epoch": 2.5,
"eval_loss": 1.2909756898880005,
"eval_runtime": 158.004,
"eval_samples_per_second": 5.88,
"eval_steps_per_second": 5.88,
"step": 52900
},
{
"epoch": 2.51,
"learning_rate": 9.206410830590746e-07,
"loss": 1.6514,
"step": 53000
},
{
"epoch": 2.51,
"learning_rate": 9.15508236163097e-07,
"loss": 1.7379,
"step": 53100
},
{
"epoch": 2.51,
"learning_rate": 9.103834458732002e-07,
"loss": 1.6323,
"step": 53200
},
{
"epoch": 2.52,
"learning_rate": 9.052667828292439e-07,
"loss": 1.8245,
"step": 53300
},
{
"epoch": 2.52,
"learning_rate": 9.001583175590636e-07,
"loss": 1.5375,
"step": 53400
},
{
"epoch": 2.53,
"learning_rate": 8.950581204774961e-07,
"loss": 1.737,
"step": 53500
},
{
"epoch": 2.53,
"learning_rate": 8.899662618854105e-07,
"loss": 1.6755,
"step": 53600
},
{
"epoch": 2.54,
"learning_rate": 8.848828119687375e-07,
"loss": 1.6737,
"step": 53700
},
{
"epoch": 2.54,
"learning_rate": 8.798078407975051e-07,
"loss": 1.7876,
"step": 53800
},
{
"epoch": 2.55,
"learning_rate": 8.747414183248682e-07,
"loss": 1.6804,
"step": 53900
},
{
"epoch": 2.55,
"learning_rate": 8.696836143861491e-07,
"loss": 1.5951,
"step": 54000
},
{
"epoch": 2.56,
"learning_rate": 8.646344986978708e-07,
"loss": 1.6206,
"step": 54100
},
{
"epoch": 2.56,
"learning_rate": 8.595941408567983e-07,
"loss": 1.7823,
"step": 54200
},
{
"epoch": 2.57,
"learning_rate": 8.545626103389805e-07,
"loss": 1.6832,
"step": 54300
},
{
"epoch": 2.57,
"learning_rate": 8.495399764987894e-07,
"loss": 1.6455,
"step": 54400
},
{
"epoch": 2.58,
"learning_rate": 8.445263085679645e-07,
"loss": 1.6894,
"step": 54500
},
{
"epoch": 2.58,
"learning_rate": 8.395216756546627e-07,
"loss": 1.5944,
"step": 54600
},
{
"epoch": 2.59,
"learning_rate": 8.345261467425003e-07,
"loss": 1.7441,
"step": 54700
},
{
"epoch": 2.59,
"learning_rate": 8.295397906896052e-07,
"loss": 1.7046,
"step": 54800
},
{
"epoch": 2.59,
"learning_rate": 8.245626762276663e-07,
"loss": 1.6335,
"step": 54900
},
{
"epoch": 2.6,
"learning_rate": 8.195948719609889e-07,
"loss": 1.7515,
"step": 55000
},
{
"epoch": 2.6,
"learning_rate": 8.146364463655458e-07,
"loss": 1.6208,
"step": 55100
},
{
"epoch": 2.61,
"learning_rate": 8.096874677880322e-07,
"loss": 1.6655,
"step": 55200
},
{
"epoch": 2.61,
"learning_rate": 8.047480044449309e-07,
"loss": 1.7218,
"step": 55300
},
{
"epoch": 2.62,
"learning_rate": 7.998181244215638e-07,
"loss": 1.5814,
"step": 55400
},
{
"epoch": 2.62,
"learning_rate": 7.948978956711576e-07,
"loss": 1.7588,
"step": 55500
},
{
"epoch": 2.63,
"learning_rate": 7.899873860139058e-07,
"loss": 1.6841,
"step": 55600
},
{
"epoch": 2.63,
"learning_rate": 7.850866631360363e-07,
"loss": 1.6321,
"step": 55700
},
{
"epoch": 2.64,
"learning_rate": 7.801957945888744e-07,
"loss": 1.654,
"step": 55800
},
{
"epoch": 2.64,
"learning_rate": 7.75314847787914e-07,
"loss": 1.6165,
"step": 55900
},
{
"epoch": 2.65,
"learning_rate": 7.704438900118902e-07,
"loss": 1.7136,
"step": 56000
},
{
"epoch": 2.65,
"learning_rate": 7.655829884018475e-07,
"loss": 1.6892,
"step": 56100
},
{
"epoch": 2.66,
"learning_rate": 7.607322099602175e-07,
"loss": 1.6254,
"step": 56200
},
{
"epoch": 2.66,
"learning_rate": 7.558916215498944e-07,
"loss": 1.5811,
"step": 56300
},
{
"epoch": 2.67,
"learning_rate": 7.510612898933145e-07,
"loss": 1.6081,
"step": 56400
},
{
"epoch": 2.67,
"learning_rate": 7.462412815715343e-07,
"loss": 1.5603,
"step": 56500
},
{
"epoch": 2.68,
"learning_rate": 7.414316630233144e-07,
"loss": 1.7405,
"step": 56600
},
{
"epoch": 2.68,
"learning_rate": 7.366325005442026e-07,
"loss": 1.6653,
"step": 56700
},
{
"epoch": 2.68,
"learning_rate": 7.318438602856225e-07,
"loss": 1.6596,
"step": 56800
},
{
"epoch": 2.69,
"learning_rate": 7.270658082539581e-07,
"loss": 1.706,
"step": 56900
},
{
"epoch": 2.69,
"learning_rate": 7.222984103096469e-07,
"loss": 1.718,
"step": 57000
},
{
"epoch": 2.7,
"learning_rate": 7.175417321662698e-07,
"loss": 1.6861,
"step": 57100
},
{
"epoch": 2.7,
"learning_rate": 7.127958393896484e-07,
"loss": 1.668,
"step": 57200
},
{
"epoch": 2.71,
"learning_rate": 7.080607973969376e-07,
"loss": 1.7527,
"step": 57300
},
{
"epoch": 2.71,
"learning_rate": 7.033366714557257e-07,
"loss": 1.7254,
"step": 57400
},
{
"epoch": 2.72,
"learning_rate": 6.986235266831368e-07,
"loss": 1.5732,
"step": 57500
},
{
"epoch": 2.72,
"learning_rate": 6.93921428044928e-07,
"loss": 1.6163,
"step": 57600
},
{
"epoch": 2.73,
"learning_rate": 6.892304403545984e-07,
"loss": 1.7492,
"step": 57700
},
{
"epoch": 2.73,
"learning_rate": 6.845506282724956e-07,
"loss": 1.7095,
"step": 57800
},
{
"epoch": 2.74,
"learning_rate": 6.798820563049212e-07,
"loss": 1.7914,
"step": 57900
},
{
"epoch": 2.74,
"learning_rate": 6.75224788803245e-07,
"loss": 1.6378,
"step": 58000
},
{
"epoch": 2.75,
"learning_rate": 6.70578889963015e-07,
"loss": 1.6761,
"step": 58100
},
{
"epoch": 2.75,
"eval_loss": 1.2901337146759033,
"eval_runtime": 158.1238,
"eval_samples_per_second": 5.875,
"eval_steps_per_second": 5.875,
"step": 58190
},
{
"epoch": 2.75,
"learning_rate": 6.659444238230763e-07,
"loss": 1.6017,
"step": 58200
},
{
"epoch": 2.76,
"learning_rate": 6.613214542646845e-07,
"loss": 1.5221,
"step": 58300
},
{
"epoch": 2.76,
"learning_rate": 6.567100450106276e-07,
"loss": 1.7276,
"step": 58400
},
{
"epoch": 2.77,
"learning_rate": 6.521102596243459e-07,
"loss": 1.5169,
"step": 58500
},
{
"epoch": 2.77,
"learning_rate": 6.475221615090591e-07,
"loss": 1.7469,
"step": 58600
},
{
"epoch": 2.77,
"learning_rate": 6.429458139068882e-07,
"loss": 1.646,
"step": 58700
},
{
"epoch": 2.78,
"learning_rate": 6.383812798979856e-07,
"loss": 1.6483,
"step": 58800
},
{
"epoch": 2.78,
"learning_rate": 6.338286223996673e-07,
"loss": 1.5527,
"step": 58900
},
{
"epoch": 2.79,
"learning_rate": 6.29287904165543e-07,
"loss": 1.6215,
"step": 59000
},
{
"epoch": 2.79,
"learning_rate": 6.247591877846517e-07,
"loss": 1.6239,
"step": 59100
},
{
"epoch": 2.8,
"learning_rate": 6.202425356805997e-07,
"loss": 1.6994,
"step": 59200
},
{
"epoch": 2.8,
"learning_rate": 6.157380101107016e-07,
"loss": 1.5472,
"step": 59300
},
{
"epoch": 2.81,
"learning_rate": 6.112456731651181e-07,
"loss": 1.589,
"step": 59400
},
{
"epoch": 2.81,
"learning_rate": 6.067655867660037e-07,
"loss": 1.6836,
"step": 59500
},
{
"epoch": 2.82,
"learning_rate": 6.022978126666509e-07,
"loss": 1.6906,
"step": 59600
},
{
"epoch": 2.82,
"learning_rate": 5.978424124506421e-07,
"loss": 1.7639,
"step": 59700
},
{
"epoch": 2.83,
"learning_rate": 5.933994475309969e-07,
"loss": 1.6307,
"step": 59800
},
{
"epoch": 2.83,
"learning_rate": 5.889689791493279e-07,
"loss": 1.6508,
"step": 59900
},
{
"epoch": 2.84,
"learning_rate": 5.84551068374996e-07,
"loss": 1.6107,
"step": 60000
},
{
"epoch": 2.84,
"learning_rate": 5.801457761042689e-07,
"loss": 1.6451,
"step": 60100
},
{
"epoch": 2.85,
"learning_rate": 5.757531630594812e-07,
"loss": 1.6345,
"step": 60200
},
{
"epoch": 2.85,
"learning_rate": 5.71373289788197e-07,
"loss": 1.5496,
"step": 60300
},
{
"epoch": 2.85,
"learning_rate": 5.670062166623781e-07,
"loss": 1.6161,
"step": 60400
},
{
"epoch": 2.86,
"learning_rate": 5.626520038775476e-07,
"loss": 1.618,
"step": 60500
},
{
"epoch": 2.86,
"learning_rate": 5.583107114519624e-07,
"loss": 1.5446,
"step": 60600
},
{
"epoch": 2.87,
"learning_rate": 5.539823992257877e-07,
"loss": 1.6561,
"step": 60700
},
{
"epoch": 2.87,
"learning_rate": 5.496671268602682e-07,
"loss": 1.7354,
"step": 60800
},
{
"epoch": 2.88,
"learning_rate": 5.453649538369088e-07,
"loss": 1.5153,
"step": 60900
},
{
"epoch": 2.88,
"learning_rate": 5.410759394566529e-07,
"loss": 1.6056,
"step": 61000
},
{
"epoch": 2.89,
"learning_rate": 5.368001428390672e-07,
"loss": 1.674,
"step": 61100
},
{
"epoch": 2.89,
"learning_rate": 5.325376229215244e-07,
"loss": 1.6993,
"step": 61200
},
{
"epoch": 2.9,
"learning_rate": 5.282884384583917e-07,
"loss": 1.6882,
"step": 61300
},
{
"epoch": 2.9,
"learning_rate": 5.240526480202211e-07,
"loss": 1.5872,
"step": 61400
},
{
"epoch": 2.91,
"learning_rate": 5.198303099929429e-07,
"loss": 1.6554,
"step": 61500
},
{
"epoch": 2.91,
"learning_rate": 5.156214825770591e-07,
"loss": 1.6168,
"step": 61600
},
{
"epoch": 2.92,
"learning_rate": 5.114262237868423e-07,
"loss": 1.5752,
"step": 61700
},
{
"epoch": 2.92,
"learning_rate": 5.072445914495355e-07,
"loss": 1.655,
"step": 61800
},
{
"epoch": 2.93,
"learning_rate": 5.030766432045565e-07,
"loss": 1.6429,
"step": 61900
},
{
"epoch": 2.93,
"learning_rate": 4.989224365027009e-07,
"loss": 1.6156,
"step": 62000
},
{
"epoch": 2.94,
"learning_rate": 4.947820286053518e-07,
"loss": 1.6634,
"step": 62100
},
{
"epoch": 2.94,
"learning_rate": 4.906554765836916e-07,
"loss": 1.7337,
"step": 62200
},
{
"epoch": 2.94,
"learning_rate": 4.865428373179121e-07,
"loss": 1.6085,
"step": 62300
},
{
"epoch": 2.95,
"learning_rate": 4.824441674964334e-07,
"loss": 1.6445,
"step": 62400
},
{
"epoch": 2.95,
"learning_rate": 4.783595236151211e-07,
"loss": 1.7347,
"step": 62500
},
{
"epoch": 2.96,
"learning_rate": 4.7428896197650816e-07,
"loss": 1.5851,
"step": 62600
},
{
"epoch": 2.96,
"learning_rate": 4.702325386890184e-07,
"loss": 1.6059,
"step": 62700
},
{
"epoch": 2.97,
"learning_rate": 4.661903096661929e-07,
"loss": 1.6562,
"step": 62800
},
{
"epoch": 2.97,
"learning_rate": 4.6216233062592107e-07,
"loss": 1.6983,
"step": 62900
},
{
"epoch": 2.98,
"learning_rate": 4.581486570896701e-07,
"loss": 1.7001,
"step": 63000
},
{
"epoch": 2.98,
"learning_rate": 4.541493443817206e-07,
"loss": 1.5994,
"step": 63100
},
{
"epoch": 2.99,
"learning_rate": 4.501644476284045e-07,
"loss": 1.6582,
"step": 63200
},
{
"epoch": 2.99,
"learning_rate": 4.4619402175734606e-07,
"loss": 1.7147,
"step": 63300
},
{
"epoch": 3.0,
"learning_rate": 4.4223812149670195e-07,
"loss": 1.6312,
"step": 63400
},
{
"epoch": 3.0,
"eval_loss": 1.287863850593567,
"eval_runtime": 157.981,
"eval_samples_per_second": 5.88,
"eval_steps_per_second": 5.88,
"step": 63480
},
{
"epoch": 3.0,
"learning_rate": 4.3829680137440883e-07,
"loss": 1.5559,
"step": 63500
},
{
"epoch": 3.01,
"learning_rate": 4.343701157174329e-07,
"loss": 1.6739,
"step": 63600
},
{
"epoch": 3.01,
"learning_rate": 4.3045811865101767e-07,
"loss": 1.4717,
"step": 63700
},
{
"epoch": 3.02,
"learning_rate": 4.265608640979411e-07,
"loss": 1.6384,
"step": 63800
},
{
"epoch": 3.02,
"learning_rate": 4.226784057777699e-07,
"loss": 1.5138,
"step": 63900
},
{
"epoch": 3.03,
"learning_rate": 4.1881079720612204e-07,
"loss": 1.5968,
"step": 64000
},
{
"epoch": 3.03,
"learning_rate": 4.149580916939255e-07,
"loss": 1.5826,
"step": 64100
},
{
"epoch": 3.03,
"learning_rate": 4.1112034234668615e-07,
"loss": 1.7272,
"step": 64200
},
{
"epoch": 3.04,
"learning_rate": 4.0729760206375404e-07,
"loss": 1.722,
"step": 64300
},
{
"epoch": 3.04,
"learning_rate": 4.0348992353759657e-07,
"loss": 1.6016,
"step": 64400
},
{
"epoch": 3.05,
"learning_rate": 3.9969735925306884e-07,
"loss": 1.5948,
"step": 64500
},
{
"epoch": 3.05,
"learning_rate": 3.95919961486693e-07,
"loss": 1.4295,
"step": 64600
},
{
"epoch": 3.06,
"learning_rate": 3.9215778230593563e-07,
"loss": 1.6671,
"step": 64700
},
{
"epoch": 3.06,
"learning_rate": 3.8841087356849295e-07,
"loss": 1.6863,
"step": 64800
},
{
"epoch": 3.07,
"learning_rate": 3.846792869215725e-07,
"loss": 1.7321,
"step": 64900
},
{
"epoch": 3.07,
"learning_rate": 3.8096307380118334e-07,
"loss": 1.6549,
"step": 65000
},
{
"epoch": 3.08,
"learning_rate": 3.7726228543142645e-07,
"loss": 1.672,
"step": 65100
},
{
"epoch": 3.08,
"learning_rate": 3.7357697282378916e-07,
"loss": 1.5972,
"step": 65200
},
{
"epoch": 3.09,
"learning_rate": 3.6990718677644103e-07,
"loss": 1.6149,
"step": 65300
},
{
"epoch": 3.09,
"learning_rate": 3.662529778735354e-07,
"loss": 1.6483,
"step": 65400
},
{
"epoch": 3.1,
"learning_rate": 3.6261439648450973e-07,
"loss": 1.5785,
"step": 65500
},
{
"epoch": 3.1,
"learning_rate": 3.5899149276339345e-07,
"loss": 1.7107,
"step": 65600
},
{
"epoch": 3.11,
"learning_rate": 3.553843166481148e-07,
"loss": 1.6453,
"step": 65700
},
{
"epoch": 3.11,
"learning_rate": 3.517929178598151e-07,
"loss": 1.7473,
"step": 65800
},
{
"epoch": 3.11,
"learning_rate": 3.4821734590216027e-07,
"loss": 1.6577,
"step": 65900
},
{
"epoch": 3.12,
"learning_rate": 3.4465765006066065e-07,
"loss": 1.6899,
"step": 66000
},
{
"epoch": 3.12,
"learning_rate": 3.4111387940199014e-07,
"loss": 1.5638,
"step": 66100
},
{
"epoch": 3.13,
"learning_rate": 3.3758608277331257e-07,
"loss": 1.7071,
"step": 66200
},
{
"epoch": 3.13,
"learning_rate": 3.3407430880160433e-07,
"loss": 1.4997,
"step": 66300
},
{
"epoch": 3.14,
"learning_rate": 3.3057860589298746e-07,
"loss": 1.4916,
"step": 66400
},
{
"epoch": 3.14,
"learning_rate": 3.2709902223206136e-07,
"loss": 1.7187,
"step": 66500
},
{
"epoch": 3.15,
"learning_rate": 3.2363560578123807e-07,
"loss": 1.6423,
"step": 66600
},
{
"epoch": 3.15,
"learning_rate": 3.2018840428008176e-07,
"loss": 1.6532,
"step": 66700
},
{
"epoch": 3.16,
"learning_rate": 3.167574652446497e-07,
"loss": 1.7428,
"step": 66800
},
{
"epoch": 3.16,
"learning_rate": 3.133428359668401e-07,
"loss": 1.6502,
"step": 66900
},
{
"epoch": 3.17,
"learning_rate": 3.099445635137365e-07,
"loss": 1.6433,
"step": 67000
},
{
"epoch": 3.17,
"learning_rate": 3.0656269472696136e-07,
"loss": 1.8004,
"step": 67100
},
{
"epoch": 3.18,
"learning_rate": 3.031972762220291e-07,
"loss": 1.5027,
"step": 67200
},
{
"epoch": 3.18,
"learning_rate": 2.998483543877065e-07,
"loss": 1.7288,
"step": 67300
},
{
"epoch": 3.19,
"learning_rate": 2.965159753853681e-07,
"loss": 1.7243,
"step": 67400
},
{
"epoch": 3.19,
"learning_rate": 2.93200185148364e-07,
"loss": 1.7439,
"step": 67500
},
{
"epoch": 3.2,
"learning_rate": 2.8990102938138685e-07,
"loss": 1.6872,
"step": 67600
},
{
"epoch": 3.2,
"learning_rate": 2.866185535598389e-07,
"loss": 1.588,
"step": 67700
},
{
"epoch": 3.2,
"learning_rate": 2.83352802929207e-07,
"loss": 1.6774,
"step": 67800
},
{
"epoch": 3.21,
"learning_rate": 2.801038225044403e-07,
"loss": 1.663,
"step": 67900
},
{
"epoch": 3.21,
"learning_rate": 2.7687165706932636e-07,
"loss": 1.6019,
"step": 68000
},
{
"epoch": 3.22,
"learning_rate": 2.7365635117587673e-07,
"loss": 1.6143,
"step": 68100
},
{
"epoch": 3.22,
"learning_rate": 2.704579491437113e-07,
"loss": 1.7017,
"step": 68200
},
{
"epoch": 3.23,
"learning_rate": 2.672764950594491e-07,
"loss": 1.5511,
"step": 68300
},
{
"epoch": 3.23,
"learning_rate": 2.641120327760981e-07,
"loss": 1.6588,
"step": 68400
},
{
"epoch": 3.24,
"learning_rate": 2.609646059124529e-07,
"loss": 1.6361,
"step": 68500
},
{
"epoch": 3.24,
"learning_rate": 2.578342578524922e-07,
"loss": 1.6632,
"step": 68600
},
{
"epoch": 3.25,
"learning_rate": 2.54721031744782e-07,
"loss": 1.7003,
"step": 68700
},
{
"epoch": 3.25,
"eval_loss": 1.2819619178771973,
"eval_runtime": 159.0772,
"eval_samples_per_second": 5.84,
"eval_steps_per_second": 5.84,
"step": 68770
},
{
"epoch": 3.25,
"learning_rate": 2.516249705018797e-07,
"loss": 1.6595,
"step": 68800
},
{
"epoch": 3.26,
"learning_rate": 2.485461167997429e-07,
"loss": 1.5922,
"step": 68900
},
{
"epoch": 3.26,
"learning_rate": 2.4548451307714115e-07,
"loss": 1.6189,
"step": 69000
},
{
"epoch": 3.27,
"learning_rate": 2.4244020153507233e-07,
"loss": 1.697,
"step": 69100
},
{
"epoch": 3.27,
"learning_rate": 2.394132241361782e-07,
"loss": 1.5139,
"step": 69200
},
{
"epoch": 3.28,
"learning_rate": 2.364036226041679e-07,
"loss": 1.5705,
"step": 69300
},
{
"epoch": 3.28,
"learning_rate": 2.334114384232437e-07,
"loss": 1.7863,
"step": 69400
},
{
"epoch": 3.28,
"learning_rate": 2.3043671283752649e-07,
"loss": 1.7356,
"step": 69500
},
{
"epoch": 3.29,
"learning_rate": 2.274794868504891e-07,
"loss": 1.7676,
"step": 69600
},
{
"epoch": 3.29,
"learning_rate": 2.2453980122439088e-07,
"loss": 1.6916,
"step": 69700
},
{
"epoch": 3.3,
"learning_rate": 2.2161769647971637e-07,
"loss": 1.6342,
"step": 69800
},
{
"epoch": 3.3,
"learning_rate": 2.1871321289461466e-07,
"loss": 1.6444,
"step": 69900
},
{
"epoch": 3.31,
"learning_rate": 2.158263905043462e-07,
"loss": 1.5611,
"step": 70000
}
],
"logging_steps": 100,
"max_steps": 84628,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 5000,
"total_flos": 4.146961444680499e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}