diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,5926 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.99809427010545, + "eval_steps": 500, + "global_step": 9835, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01, + "learning_rate": 4.999987245581571e-05, + "loss": 1.7374, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.9999489824564244e-05, + "loss": 1.5943, + "step": 20 + }, + { + "epoch": 0.02, + "learning_rate": 4.9998852110149786e-05, + "loss": 1.4529, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 4.999795931907928e-05, + "loss": 1.4299, + "step": 40 + }, + { + "epoch": 0.03, + "learning_rate": 4.999681146046236e-05, + "loss": 1.2767, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 4.9995408546011235e-05, + "loss": 1.2893, + "step": 60 + }, + { + "epoch": 0.04, + "learning_rate": 4.9993750590040575e-05, + "loss": 1.297, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 4.9991837609467425e-05, + "loss": 1.1932, + "step": 80 + }, + { + "epoch": 0.05, + "learning_rate": 4.998966962381092e-05, + "loss": 1.1411, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.998724665519219e-05, + "loss": 1.1558, + "step": 100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9984568728334075e-05, + "loss": 1.1515, + "step": 110 + }, + { + "epoch": 0.06, + "learning_rate": 4.998163587056089e-05, + "loss": 1.0365, + "step": 120 + }, + { + "epoch": 0.07, + "learning_rate": 4.997844811179817e-05, + "loss": 1.0264, + "step": 130 + }, + { + "epoch": 0.07, + "learning_rate": 4.9975005484572305e-05, + "loss": 1.0283, + "step": 140 + }, + { + "epoch": 0.08, + "learning_rate": 4.997130802401027e-05, + "loss": 1.0356, + "step": 150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9967355767839225e-05, + "loss": 1.0245, + "step": 160 + }, + { + "epoch": 0.09, + "learning_rate": 4.996314875638616e-05, + "loss": 1.0557, + "step": 170 + }, + { + "epoch": 0.09, + "learning_rate": 4.995868703257745e-05, + "loss": 1.0417, + "step": 180 + }, + { + "epoch": 0.1, + "learning_rate": 4.995397064193846e-05, + "loss": 1.0008, + "step": 190 + }, + { + "epoch": 0.1, + "learning_rate": 4.9948999632593055e-05, + "loss": 1.035, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 4.994377405526308e-05, + "loss": 1.0543, + "step": 210 + }, + { + "epoch": 0.11, + "learning_rate": 4.9938293963267914e-05, + "loss": 1.0384, + "step": 220 + }, + { + "epoch": 0.12, + "learning_rate": 4.993255941252385e-05, + "loss": 1.0359, + "step": 230 + }, + { + "epoch": 0.12, + "learning_rate": 4.9926570461543586e-05, + "loss": 1.0484, + "step": 240 + }, + { + "epoch": 0.13, + "learning_rate": 4.992032717143559e-05, + "loss": 0.9157, + "step": 250 + }, + { + "epoch": 0.13, + "learning_rate": 4.9913829605903486e-05, + "loss": 1.0029, + "step": 260 + }, + { + "epoch": 0.14, + "learning_rate": 4.990707783124541e-05, + "loss": 1.0332, + "step": 270 + }, + { + "epoch": 0.14, + "learning_rate": 4.990007191635334e-05, + "loss": 0.9525, + "step": 280 + }, + { + "epoch": 0.15, + "learning_rate": 4.989281193271236e-05, + "loss": 0.9969, + "step": 290 + }, + { + "epoch": 0.15, + "learning_rate": 4.9885297954399964e-05, + "loss": 0.9313, + "step": 300 + }, + { + "epoch": 0.16, + "learning_rate": 4.98775300580853e-05, + "loss": 0.9715, + "step": 310 + }, + { + "epoch": 0.16, + "learning_rate": 4.986950832302836e-05, + "loss": 0.9459, + "step": 320 + }, + { + "epoch": 0.17, + "learning_rate": 4.9861232831079194e-05, + "loss": 0.9614, + "step": 330 + }, + { + "epoch": 0.17, + "learning_rate": 4.985270366667708e-05, + "loss": 0.9995, + "step": 340 + }, + { + "epoch": 0.18, + "learning_rate": 4.9843920916849645e-05, + "loss": 0.9779, + "step": 350 + }, + { + "epoch": 0.18, + "learning_rate": 4.9834884671211976e-05, + "loss": 0.926, + "step": 360 + }, + { + "epoch": 0.19, + "learning_rate": 4.982559502196572e-05, + "loss": 1.0427, + "step": 370 + }, + { + "epoch": 0.19, + "learning_rate": 4.981605206389814e-05, + "loss": 0.9495, + "step": 380 + }, + { + "epoch": 0.2, + "learning_rate": 4.9806255894381135e-05, + "loss": 0.9446, + "step": 390 + }, + { + "epoch": 0.2, + "learning_rate": 4.979620661337026e-05, + "loss": 0.9732, + "step": 400 + }, + { + "epoch": 0.21, + "learning_rate": 4.978590432340371e-05, + "loss": 0.8346, + "step": 410 + }, + { + "epoch": 0.21, + "learning_rate": 4.9775349129601243e-05, + "loss": 0.9642, + "step": 420 + }, + { + "epoch": 0.22, + "learning_rate": 4.9764541139663176e-05, + "loss": 0.8718, + "step": 430 + }, + { + "epoch": 0.22, + "learning_rate": 4.975348046386917e-05, + "loss": 0.9406, + "step": 440 + }, + { + "epoch": 0.23, + "learning_rate": 4.974216721507725e-05, + "loss": 0.8534, + "step": 450 + }, + { + "epoch": 0.23, + "learning_rate": 4.973060150872253e-05, + "loss": 0.9735, + "step": 460 + }, + { + "epoch": 0.24, + "learning_rate": 4.971878346281609e-05, + "loss": 0.9225, + "step": 470 + }, + { + "epoch": 0.24, + "learning_rate": 4.970671319794378e-05, + "loss": 0.8771, + "step": 480 + }, + { + "epoch": 0.25, + "learning_rate": 4.969439083726496e-05, + "loss": 0.9068, + "step": 490 + }, + { + "epoch": 0.25, + "learning_rate": 4.968181650651127e-05, + "loss": 0.9524, + "step": 500 + }, + { + "epoch": 0.26, + "learning_rate": 4.966899033398533e-05, + "loss": 0.8811, + "step": 510 + }, + { + "epoch": 0.26, + "learning_rate": 4.965591245055944e-05, + "loss": 0.9009, + "step": 520 + }, + { + "epoch": 0.27, + "learning_rate": 4.964258298967423e-05, + "loss": 0.8791, + "step": 530 + }, + { + "epoch": 0.27, + "learning_rate": 4.962900208733734e-05, + "loss": 0.9129, + "step": 540 + }, + { + "epoch": 0.28, + "learning_rate": 4.9615169882121945e-05, + "loss": 0.9973, + "step": 550 + }, + { + "epoch": 0.28, + "learning_rate": 4.960108651516545e-05, + "loss": 1.0256, + "step": 560 + }, + { + "epoch": 0.29, + "learning_rate": 4.958675213016798e-05, + "loss": 0.8638, + "step": 570 + }, + { + "epoch": 0.29, + "learning_rate": 4.9572166873390925e-05, + "loss": 0.8928, + "step": 580 + }, + { + "epoch": 0.3, + "learning_rate": 4.955733089365546e-05, + "loss": 0.8579, + "step": 590 + }, + { + "epoch": 0.3, + "learning_rate": 4.9542244342341026e-05, + "loss": 0.9767, + "step": 600 + }, + { + "epoch": 0.31, + "learning_rate": 4.9526907373383766e-05, + "loss": 0.8605, + "step": 610 + }, + { + "epoch": 0.32, + "learning_rate": 4.951132014327498e-05, + "loss": 0.883, + "step": 620 + }, + { + "epoch": 0.32, + "learning_rate": 4.949548281105951e-05, + "loss": 0.9282, + "step": 630 + }, + { + "epoch": 0.33, + "learning_rate": 4.947939553833412e-05, + "loss": 0.8909, + "step": 640 + }, + { + "epoch": 0.33, + "learning_rate": 4.9463058489245874e-05, + "loss": 0.8618, + "step": 650 + }, + { + "epoch": 0.34, + "learning_rate": 4.9446471830490396e-05, + "loss": 0.8674, + "step": 660 + }, + { + "epoch": 0.34, + "learning_rate": 4.942963573131025e-05, + "loss": 0.9487, + "step": 670 + }, + { + "epoch": 0.35, + "learning_rate": 4.941255036349316e-05, + "loss": 0.959, + "step": 680 + }, + { + "epoch": 0.35, + "learning_rate": 4.9395215901370265e-05, + "loss": 0.9217, + "step": 690 + }, + { + "epoch": 0.36, + "learning_rate": 4.937763252181434e-05, + "loss": 0.9214, + "step": 700 + }, + { + "epoch": 0.36, + "learning_rate": 4.935980040423803e-05, + "loss": 0.9023, + "step": 710 + }, + { + "epoch": 0.37, + "learning_rate": 4.934171973059196e-05, + "loss": 0.863, + "step": 720 + }, + { + "epoch": 0.37, + "learning_rate": 4.9323390685362915e-05, + "loss": 0.9249, + "step": 730 + }, + { + "epoch": 0.38, + "learning_rate": 4.930481345557193e-05, + "loss": 0.9269, + "step": 740 + }, + { + "epoch": 0.38, + "learning_rate": 4.928598823077243e-05, + "loss": 0.8938, + "step": 750 + }, + { + "epoch": 0.39, + "learning_rate": 4.926691520304824e-05, + "loss": 0.9187, + "step": 760 + }, + { + "epoch": 0.39, + "learning_rate": 4.924759456701167e-05, + "loss": 0.8909, + "step": 770 + }, + { + "epoch": 0.4, + "learning_rate": 4.922802651980149e-05, + "loss": 0.8665, + "step": 780 + }, + { + "epoch": 0.4, + "learning_rate": 4.920821126108096e-05, + "loss": 0.9484, + "step": 790 + }, + { + "epoch": 0.41, + "learning_rate": 4.9188148993035754e-05, + "loss": 0.9234, + "step": 800 + }, + { + "epoch": 0.41, + "learning_rate": 4.916783992037193e-05, + "loss": 0.9564, + "step": 810 + }, + { + "epoch": 0.42, + "learning_rate": 4.914728425031379e-05, + "loss": 0.8569, + "step": 820 + }, + { + "epoch": 0.42, + "learning_rate": 4.912648219260188e-05, + "loss": 0.972, + "step": 830 + }, + { + "epoch": 0.43, + "learning_rate": 4.910543395949067e-05, + "loss": 0.7543, + "step": 840 + }, + { + "epoch": 0.43, + "learning_rate": 4.908413976574655e-05, + "loss": 0.8996, + "step": 850 + }, + { + "epoch": 0.44, + "learning_rate": 4.9062599828645574e-05, + "loss": 0.7431, + "step": 860 + }, + { + "epoch": 0.44, + "learning_rate": 4.9040814367971236e-05, + "loss": 0.8302, + "step": 870 + }, + { + "epoch": 0.45, + "learning_rate": 4.901878360601223e-05, + "loss": 0.9348, + "step": 880 + }, + { + "epoch": 0.45, + "learning_rate": 4.899650776756023e-05, + "loss": 0.8658, + "step": 890 + }, + { + "epoch": 0.46, + "learning_rate": 4.897398707990749e-05, + "loss": 0.8688, + "step": 900 + }, + { + "epoch": 0.46, + "learning_rate": 4.895122177284465e-05, + "loss": 0.8237, + "step": 910 + }, + { + "epoch": 0.47, + "learning_rate": 4.8928212078658315e-05, + "loss": 0.8714, + "step": 920 + }, + { + "epoch": 0.47, + "learning_rate": 4.8904958232128687e-05, + "loss": 0.8695, + "step": 930 + }, + { + "epoch": 0.48, + "learning_rate": 4.888146047052721e-05, + "loss": 0.8811, + "step": 940 + }, + { + "epoch": 0.48, + "learning_rate": 4.88577190336141e-05, + "loss": 0.954, + "step": 950 + }, + { + "epoch": 0.49, + "learning_rate": 4.883373416363593e-05, + "loss": 0.9335, + "step": 960 + }, + { + "epoch": 0.49, + "learning_rate": 4.8809506105323164e-05, + "loss": 0.864, + "step": 970 + }, + { + "epoch": 0.5, + "learning_rate": 4.878503510588765e-05, + "loss": 0.9355, + "step": 980 + }, + { + "epoch": 0.5, + "learning_rate": 4.876032141502004e-05, + "loss": 0.885, + "step": 990 + }, + { + "epoch": 0.51, + "learning_rate": 4.8735365284887374e-05, + "loss": 0.8106, + "step": 1000 + }, + { + "epoch": 0.51, + "learning_rate": 4.8710166970130376e-05, + "loss": 0.7904, + "step": 1010 + }, + { + "epoch": 0.52, + "learning_rate": 4.8684726727860944e-05, + "loss": 0.8767, + "step": 1020 + }, + { + "epoch": 0.52, + "learning_rate": 4.865904481765945e-05, + "loss": 0.9206, + "step": 1030 + }, + { + "epoch": 0.53, + "learning_rate": 4.863312150157216e-05, + "loss": 0.905, + "step": 1040 + }, + { + "epoch": 0.53, + "learning_rate": 4.8606957044108556e-05, + "loss": 0.8438, + "step": 1050 + }, + { + "epoch": 0.54, + "learning_rate": 4.858055171223856e-05, + "loss": 0.8928, + "step": 1060 + }, + { + "epoch": 0.54, + "learning_rate": 4.855390577538991e-05, + "loss": 0.8943, + "step": 1070 + }, + { + "epoch": 0.55, + "learning_rate": 4.8527019505445346e-05, + "loss": 0.8802, + "step": 1080 + }, + { + "epoch": 0.55, + "learning_rate": 4.849989317673984e-05, + "loss": 0.8525, + "step": 1090 + }, + { + "epoch": 0.56, + "learning_rate": 4.847252706605786e-05, + "loss": 0.8939, + "step": 1100 + }, + { + "epoch": 0.56, + "learning_rate": 4.844492145263044e-05, + "loss": 0.8652, + "step": 1110 + }, + { + "epoch": 0.57, + "learning_rate": 4.8417076618132426e-05, + "loss": 0.8308, + "step": 1120 + }, + { + "epoch": 0.57, + "learning_rate": 4.838899284667956e-05, + "loss": 0.8173, + "step": 1130 + }, + { + "epoch": 0.58, + "learning_rate": 4.836067042482557e-05, + "loss": 0.9149, + "step": 1140 + }, + { + "epoch": 0.58, + "learning_rate": 4.833210964155928e-05, + "loss": 0.7346, + "step": 1150 + }, + { + "epoch": 0.59, + "learning_rate": 4.8303310788301624e-05, + "loss": 1.0617, + "step": 1160 + }, + { + "epoch": 0.59, + "learning_rate": 4.827427415890271e-05, + "loss": 0.8963, + "step": 1170 + }, + { + "epoch": 0.6, + "learning_rate": 4.82450000496388e-05, + "loss": 0.9368, + "step": 1180 + }, + { + "epoch": 0.6, + "learning_rate": 4.821548875920927e-05, + "loss": 0.9157, + "step": 1190 + }, + { + "epoch": 0.61, + "learning_rate": 4.818574058873361e-05, + "loss": 0.7684, + "step": 1200 + }, + { + "epoch": 0.61, + "learning_rate": 4.8155755841748296e-05, + "loss": 0.8846, + "step": 1210 + }, + { + "epoch": 0.62, + "learning_rate": 4.8125534824203754e-05, + "loss": 0.9657, + "step": 1220 + }, + { + "epoch": 0.63, + "learning_rate": 4.8095077844461176e-05, + "loss": 0.6925, + "step": 1230 + }, + { + "epoch": 0.63, + "learning_rate": 4.8064385213289414e-05, + "loss": 0.8198, + "step": 1240 + }, + { + "epoch": 0.64, + "learning_rate": 4.8033457243861804e-05, + "loss": 0.8938, + "step": 1250 + }, + { + "epoch": 0.64, + "learning_rate": 4.800229425175294e-05, + "loss": 0.7988, + "step": 1260 + }, + { + "epoch": 0.65, + "learning_rate": 4.7970896554935506e-05, + "loss": 0.933, + "step": 1270 + }, + { + "epoch": 0.65, + "learning_rate": 4.7939264473776995e-05, + "loss": 0.8274, + "step": 1280 + }, + { + "epoch": 0.66, + "learning_rate": 4.790739833103644e-05, + "loss": 0.946, + "step": 1290 + }, + { + "epoch": 0.66, + "learning_rate": 4.787529845186114e-05, + "loss": 0.8698, + "step": 1300 + }, + { + "epoch": 0.67, + "learning_rate": 4.784296516378333e-05, + "loss": 0.9597, + "step": 1310 + }, + { + "epoch": 0.67, + "learning_rate": 4.7810398796716825e-05, + "loss": 0.7929, + "step": 1320 + }, + { + "epoch": 0.68, + "learning_rate": 4.777759968295369e-05, + "loss": 0.9, + "step": 1330 + }, + { + "epoch": 0.68, + "learning_rate": 4.774456815716083e-05, + "loss": 0.7947, + "step": 1340 + }, + { + "epoch": 0.69, + "learning_rate": 4.7711304556376555e-05, + "loss": 0.948, + "step": 1350 + }, + { + "epoch": 0.69, + "learning_rate": 4.767780922000718e-05, + "loss": 0.8153, + "step": 1360 + }, + { + "epoch": 0.7, + "learning_rate": 4.7644082489823525e-05, + "loss": 0.9613, + "step": 1370 + }, + { + "epoch": 0.7, + "learning_rate": 4.761012470995746e-05, + "loss": 0.861, + "step": 1380 + }, + { + "epoch": 0.71, + "learning_rate": 4.7575936226898366e-05, + "loss": 0.8061, + "step": 1390 + }, + { + "epoch": 0.71, + "learning_rate": 4.754151738948962e-05, + "loss": 0.8082, + "step": 1400 + }, + { + "epoch": 0.72, + "learning_rate": 4.750686854892503e-05, + "loss": 0.8568, + "step": 1410 + }, + { + "epoch": 0.72, + "learning_rate": 4.747199005874524e-05, + "loss": 0.8792, + "step": 1420 + }, + { + "epoch": 0.73, + "learning_rate": 4.7436882274834135e-05, + "loss": 0.9441, + "step": 1430 + }, + { + "epoch": 0.73, + "learning_rate": 4.7401545555415204e-05, + "loss": 0.8254, + "step": 1440 + }, + { + "epoch": 0.74, + "learning_rate": 4.73659802610479e-05, + "loss": 0.9127, + "step": 1450 + }, + { + "epoch": 0.74, + "learning_rate": 4.733018675462394e-05, + "loss": 0.8423, + "step": 1460 + }, + { + "epoch": 0.75, + "learning_rate": 4.729416540136361e-05, + "loss": 0.8832, + "step": 1470 + }, + { + "epoch": 0.75, + "learning_rate": 4.725791656881203e-05, + "loss": 0.8718, + "step": 1480 + }, + { + "epoch": 0.76, + "learning_rate": 4.722144062683543e-05, + "loss": 0.7659, + "step": 1490 + }, + { + "epoch": 0.76, + "learning_rate": 4.7184737947617354e-05, + "loss": 0.793, + "step": 1500 + }, + { + "epoch": 0.77, + "learning_rate": 4.714780890565485e-05, + "loss": 0.8891, + "step": 1510 + }, + { + "epoch": 0.77, + "learning_rate": 4.71106538777547e-05, + "loss": 1.041, + "step": 1520 + }, + { + "epoch": 0.78, + "learning_rate": 4.707327324302951e-05, + "loss": 0.8006, + "step": 1530 + }, + { + "epoch": 0.78, + "learning_rate": 4.703566738289389e-05, + "loss": 0.9304, + "step": 1540 + }, + { + "epoch": 0.79, + "learning_rate": 4.699783668106054e-05, + "loss": 0.9484, + "step": 1550 + }, + { + "epoch": 0.79, + "learning_rate": 4.695978152353634e-05, + "loss": 0.8737, + "step": 1560 + }, + { + "epoch": 0.8, + "learning_rate": 4.69215022986184e-05, + "loss": 0.8265, + "step": 1570 + }, + { + "epoch": 0.8, + "learning_rate": 4.688299939689015e-05, + "loss": 0.9525, + "step": 1580 + }, + { + "epoch": 0.81, + "learning_rate": 4.684427321121726e-05, + "loss": 0.8764, + "step": 1590 + }, + { + "epoch": 0.81, + "learning_rate": 4.6805324136743714e-05, + "loss": 0.9115, + "step": 1600 + }, + { + "epoch": 0.82, + "learning_rate": 4.676615257088776e-05, + "loss": 0.8334, + "step": 1610 + }, + { + "epoch": 0.82, + "learning_rate": 4.672675891333782e-05, + "loss": 0.8515, + "step": 1620 + }, + { + "epoch": 0.83, + "learning_rate": 4.668714356604845e-05, + "loss": 0.9023, + "step": 1630 + }, + { + "epoch": 0.83, + "learning_rate": 4.664730693323622e-05, + "loss": 0.8983, + "step": 1640 + }, + { + "epoch": 0.84, + "learning_rate": 4.660724942137561e-05, + "loss": 0.8063, + "step": 1650 + }, + { + "epoch": 0.84, + "learning_rate": 4.656697143919482e-05, + "loss": 0.91, + "step": 1660 + }, + { + "epoch": 0.85, + "learning_rate": 4.6526473397671644e-05, + "loss": 0.8909, + "step": 1670 + }, + { + "epoch": 0.85, + "learning_rate": 4.6485755710029256e-05, + "loss": 0.8485, + "step": 1680 + }, + { + "epoch": 0.86, + "learning_rate": 4.644481879173199e-05, + "loss": 0.793, + "step": 1690 + }, + { + "epoch": 0.86, + "learning_rate": 4.640366306048113e-05, + "loss": 0.9396, + "step": 1700 + }, + { + "epoch": 0.87, + "learning_rate": 4.63622889362106e-05, + "loss": 0.8538, + "step": 1710 + }, + { + "epoch": 0.87, + "learning_rate": 4.63206968410827e-05, + "loss": 0.8986, + "step": 1720 + }, + { + "epoch": 0.88, + "learning_rate": 4.627888719948385e-05, + "loss": 0.9277, + "step": 1730 + }, + { + "epoch": 0.88, + "learning_rate": 4.623686043802016e-05, + "loss": 0.8013, + "step": 1740 + }, + { + "epoch": 0.89, + "learning_rate": 4.619461698551315e-05, + "loss": 0.8565, + "step": 1750 + }, + { + "epoch": 0.89, + "learning_rate": 4.6152157272995355e-05, + "loss": 0.7764, + "step": 1760 + }, + { + "epoch": 0.9, + "learning_rate": 4.610948173370594e-05, + "loss": 0.8674, + "step": 1770 + }, + { + "epoch": 0.9, + "learning_rate": 4.606659080308624e-05, + "loss": 0.7947, + "step": 1780 + }, + { + "epoch": 0.91, + "learning_rate": 4.6023484918775364e-05, + "loss": 0.8766, + "step": 1790 + }, + { + "epoch": 0.91, + "learning_rate": 4.598016452060569e-05, + "loss": 0.8197, + "step": 1800 + }, + { + "epoch": 0.92, + "learning_rate": 4.593663005059841e-05, + "loss": 0.9353, + "step": 1810 + }, + { + "epoch": 0.92, + "learning_rate": 4.589288195295901e-05, + "loss": 0.8794, + "step": 1820 + }, + { + "epoch": 0.93, + "learning_rate": 4.584892067407272e-05, + "loss": 0.844, + "step": 1830 + }, + { + "epoch": 0.94, + "learning_rate": 4.580474666249997e-05, + "loss": 0.8291, + "step": 1840 + }, + { + "epoch": 0.94, + "learning_rate": 4.576036036897182e-05, + "loss": 0.7954, + "step": 1850 + }, + { + "epoch": 0.95, + "learning_rate": 4.571576224638536e-05, + "loss": 0.9331, + "step": 1860 + }, + { + "epoch": 0.95, + "learning_rate": 4.56709527497991e-05, + "loss": 0.8131, + "step": 1870 + }, + { + "epoch": 0.96, + "learning_rate": 4.562593233642828e-05, + "loss": 0.8134, + "step": 1880 + }, + { + "epoch": 0.96, + "learning_rate": 4.5580701465640254e-05, + "loss": 0.8629, + "step": 1890 + }, + { + "epoch": 0.97, + "learning_rate": 4.553526059894978e-05, + "loss": 0.8637, + "step": 1900 + }, + { + "epoch": 0.97, + "learning_rate": 4.548961020001432e-05, + "loss": 0.8737, + "step": 1910 + }, + { + "epoch": 0.98, + "learning_rate": 4.544375073462932e-05, + "loss": 0.8404, + "step": 1920 + }, + { + "epoch": 0.98, + "learning_rate": 4.539768267072341e-05, + "loss": 0.9458, + "step": 1930 + }, + { + "epoch": 0.99, + "learning_rate": 4.535140647835369e-05, + "loss": 0.8777, + "step": 1940 + }, + { + "epoch": 0.99, + "learning_rate": 4.5304922629700896e-05, + "loss": 0.8794, + "step": 1950 + }, + { + "epoch": 1.0, + "learning_rate": 4.525823159906459e-05, + "loss": 0.8848, + "step": 1960 + }, + { + "epoch": 1.0, + "learning_rate": 4.521133386285833e-05, + "loss": 0.8398, + "step": 1970 + }, + { + "epoch": 1.01, + "learning_rate": 4.5164229899604796e-05, + "loss": 0.8368, + "step": 1980 + }, + { + "epoch": 1.01, + "learning_rate": 4.51169201899309e-05, + "loss": 0.7742, + "step": 1990 + }, + { + "epoch": 1.02, + "learning_rate": 4.506940521656293e-05, + "loss": 0.875, + "step": 2000 + }, + { + "epoch": 1.02, + "learning_rate": 4.502168546432155e-05, + "loss": 0.8689, + "step": 2010 + }, + { + "epoch": 1.03, + "learning_rate": 4.497376142011693e-05, + "loss": 0.8329, + "step": 2020 + }, + { + "epoch": 1.03, + "learning_rate": 4.492563357294369e-05, + "loss": 0.804, + "step": 2030 + }, + { + "epoch": 1.04, + "learning_rate": 4.487730241387602e-05, + "loss": 0.7985, + "step": 2040 + }, + { + "epoch": 1.04, + "learning_rate": 4.482876843606257e-05, + "loss": 0.8019, + "step": 2050 + }, + { + "epoch": 1.05, + "learning_rate": 4.478003213472146e-05, + "loss": 0.7925, + "step": 2060 + }, + { + "epoch": 1.05, + "learning_rate": 4.473109400713525e-05, + "loss": 0.8566, + "step": 2070 + }, + { + "epoch": 1.06, + "learning_rate": 4.468195455264581e-05, + "loss": 0.7679, + "step": 2080 + }, + { + "epoch": 1.06, + "learning_rate": 4.463261427264928e-05, + "loss": 0.7556, + "step": 2090 + }, + { + "epoch": 1.07, + "learning_rate": 4.458307367059092e-05, + "loss": 0.7138, + "step": 2100 + }, + { + "epoch": 1.07, + "learning_rate": 4.4533333251959985e-05, + "loss": 0.893, + "step": 2110 + }, + { + "epoch": 1.08, + "learning_rate": 4.448339352428456e-05, + "loss": 0.9398, + "step": 2120 + }, + { + "epoch": 1.08, + "learning_rate": 4.4433254997126394e-05, + "loss": 0.9158, + "step": 2130 + }, + { + "epoch": 1.09, + "learning_rate": 4.438291818207569e-05, + "loss": 0.8109, + "step": 2140 + }, + { + "epoch": 1.09, + "learning_rate": 4.4332383592745894e-05, + "loss": 0.9455, + "step": 2150 + }, + { + "epoch": 1.1, + "learning_rate": 4.4281651744768436e-05, + "loss": 0.8938, + "step": 2160 + }, + { + "epoch": 1.1, + "learning_rate": 4.42307231557875e-05, + "loss": 0.7618, + "step": 2170 + }, + { + "epoch": 1.11, + "learning_rate": 4.4179598345454704e-05, + "loss": 0.8194, + "step": 2180 + }, + { + "epoch": 1.11, + "learning_rate": 4.4128277835423825e-05, + "loss": 0.8848, + "step": 2190 + }, + { + "epoch": 1.12, + "learning_rate": 4.407676214934548e-05, + "loss": 0.7657, + "step": 2200 + }, + { + "epoch": 1.12, + "learning_rate": 4.402505181286175e-05, + "loss": 0.8897, + "step": 2210 + }, + { + "epoch": 1.13, + "learning_rate": 4.3973147353600866e-05, + "loss": 0.8431, + "step": 2220 + }, + { + "epoch": 1.13, + "learning_rate": 4.392104930117177e-05, + "loss": 0.8565, + "step": 2230 + }, + { + "epoch": 1.14, + "learning_rate": 4.386875818715874e-05, + "loss": 0.8426, + "step": 2240 + }, + { + "epoch": 1.14, + "learning_rate": 4.3816274545116e-05, + "loss": 0.8096, + "step": 2250 + }, + { + "epoch": 1.15, + "learning_rate": 4.37635989105622e-05, + "loss": 0.8651, + "step": 2260 + }, + { + "epoch": 1.15, + "learning_rate": 4.3710731820975e-05, + "loss": 0.8658, + "step": 2270 + }, + { + "epoch": 1.16, + "learning_rate": 4.36576738157856e-05, + "loss": 0.8642, + "step": 2280 + }, + { + "epoch": 1.16, + "learning_rate": 4.3604425436373194e-05, + "loss": 0.8334, + "step": 2290 + }, + { + "epoch": 1.17, + "learning_rate": 4.355098722605946e-05, + "loss": 0.929, + "step": 2300 + }, + { + "epoch": 1.17, + "learning_rate": 4.349735973010305e-05, + "loss": 0.8318, + "step": 2310 + }, + { + "epoch": 1.18, + "learning_rate": 4.344354349569398e-05, + "loss": 0.7576, + "step": 2320 + }, + { + "epoch": 1.18, + "learning_rate": 4.3389539071948065e-05, + "loss": 0.7787, + "step": 2330 + }, + { + "epoch": 1.19, + "learning_rate": 4.3335347009901314e-05, + "loss": 0.7632, + "step": 2340 + }, + { + "epoch": 1.19, + "learning_rate": 4.328096786250432e-05, + "loss": 0.7624, + "step": 2350 + }, + { + "epoch": 1.2, + "learning_rate": 4.32264021846166e-05, + "loss": 0.8955, + "step": 2360 + }, + { + "epoch": 1.2, + "learning_rate": 4.317165053300095e-05, + "loss": 0.9177, + "step": 2370 + }, + { + "epoch": 1.21, + "learning_rate": 4.311671346631774e-05, + "loss": 0.9347, + "step": 2380 + }, + { + "epoch": 1.21, + "learning_rate": 4.306159154511925e-05, + "loss": 0.8386, + "step": 2390 + }, + { + "epoch": 1.22, + "learning_rate": 4.300628533184391e-05, + "loss": 0.7803, + "step": 2400 + }, + { + "epoch": 1.22, + "learning_rate": 4.295079539081058e-05, + "loss": 0.8833, + "step": 2410 + }, + { + "epoch": 1.23, + "learning_rate": 4.289512228821282e-05, + "loss": 0.896, + "step": 2420 + }, + { + "epoch": 1.23, + "learning_rate": 4.283926659211306e-05, + "loss": 0.8046, + "step": 2430 + }, + { + "epoch": 1.24, + "learning_rate": 4.278322887243683e-05, + "loss": 0.8341, + "step": 2440 + }, + { + "epoch": 1.25, + "learning_rate": 4.272700970096696e-05, + "loss": 0.8365, + "step": 2450 + }, + { + "epoch": 1.25, + "learning_rate": 4.26706096513377e-05, + "loss": 0.8236, + "step": 2460 + }, + { + "epoch": 1.26, + "learning_rate": 4.2614029299028944e-05, + "loss": 0.9149, + "step": 2470 + }, + { + "epoch": 1.26, + "learning_rate": 4.2557269221360265e-05, + "loss": 0.8772, + "step": 2480 + }, + { + "epoch": 1.27, + "learning_rate": 4.250032999748508e-05, + "loss": 0.8428, + "step": 2490 + }, + { + "epoch": 1.27, + "learning_rate": 4.2443212208384755e-05, + "loss": 0.7791, + "step": 2500 + }, + { + "epoch": 1.28, + "learning_rate": 4.238591643686263e-05, + "loss": 0.894, + "step": 2510 + }, + { + "epoch": 1.28, + "learning_rate": 4.23284432675381e-05, + "loss": 0.8412, + "step": 2520 + }, + { + "epoch": 1.29, + "learning_rate": 4.2270793286840636e-05, + "loss": 0.7827, + "step": 2530 + }, + { + "epoch": 1.29, + "learning_rate": 4.2212967083003835e-05, + "loss": 0.8868, + "step": 2540 + }, + { + "epoch": 1.3, + "learning_rate": 4.215496524605936e-05, + "loss": 0.8999, + "step": 2550 + }, + { + "epoch": 1.3, + "learning_rate": 4.209678836783098e-05, + "loss": 0.8319, + "step": 2560 + }, + { + "epoch": 1.31, + "learning_rate": 4.2038437041928505e-05, + "loss": 0.8147, + "step": 2570 + }, + { + "epoch": 1.31, + "learning_rate": 4.1979911863741686e-05, + "loss": 0.8202, + "step": 2580 + }, + { + "epoch": 1.32, + "learning_rate": 4.192121343043424e-05, + "loss": 0.8346, + "step": 2590 + }, + { + "epoch": 1.32, + "learning_rate": 4.1862342340937655e-05, + "loss": 0.8413, + "step": 2600 + }, + { + "epoch": 1.33, + "learning_rate": 4.1803299195945145e-05, + "loss": 0.8867, + "step": 2610 + }, + { + "epoch": 1.33, + "learning_rate": 4.174408459790549e-05, + "loss": 0.789, + "step": 2620 + }, + { + "epoch": 1.34, + "learning_rate": 4.1684699151016896e-05, + "loss": 0.7649, + "step": 2630 + }, + { + "epoch": 1.34, + "learning_rate": 4.162514346122083e-05, + "loss": 0.8685, + "step": 2640 + }, + { + "epoch": 1.35, + "learning_rate": 4.156541813619585e-05, + "loss": 0.7793, + "step": 2650 + }, + { + "epoch": 1.35, + "learning_rate": 4.150552378535137e-05, + "loss": 0.871, + "step": 2660 + }, + { + "epoch": 1.36, + "learning_rate": 4.144546101982151e-05, + "loss": 0.8534, + "step": 2670 + }, + { + "epoch": 1.36, + "learning_rate": 4.1385230452458756e-05, + "loss": 0.8658, + "step": 2680 + }, + { + "epoch": 1.37, + "learning_rate": 4.132483269782781e-05, + "loss": 0.7654, + "step": 2690 + }, + { + "epoch": 1.37, + "learning_rate": 4.126426837219925e-05, + "loss": 0.788, + "step": 2700 + }, + { + "epoch": 1.38, + "learning_rate": 4.120353809354328e-05, + "loss": 0.7899, + "step": 2710 + }, + { + "epoch": 1.38, + "learning_rate": 4.114264248152342e-05, + "loss": 0.8984, + "step": 2720 + }, + { + "epoch": 1.39, + "learning_rate": 4.108158215749014e-05, + "loss": 0.9037, + "step": 2730 + }, + { + "epoch": 1.39, + "learning_rate": 4.10203577444746e-05, + "loss": 0.8417, + "step": 2740 + }, + { + "epoch": 1.4, + "learning_rate": 4.095896986718221e-05, + "loss": 0.8844, + "step": 2750 + }, + { + "epoch": 1.4, + "learning_rate": 4.089741915198632e-05, + "loss": 0.8565, + "step": 2760 + }, + { + "epoch": 1.41, + "learning_rate": 4.0835706226921776e-05, + "loss": 0.8313, + "step": 2770 + }, + { + "epoch": 1.41, + "learning_rate": 4.077383172167857e-05, + "loss": 0.8312, + "step": 2780 + }, + { + "epoch": 1.42, + "learning_rate": 4.0711796267595355e-05, + "loss": 0.8551, + "step": 2790 + }, + { + "epoch": 1.42, + "learning_rate": 4.064960049765304e-05, + "loss": 0.8134, + "step": 2800 + }, + { + "epoch": 1.43, + "learning_rate": 4.058724504646834e-05, + "loss": 0.8246, + "step": 2810 + }, + { + "epoch": 1.43, + "learning_rate": 4.052473055028726e-05, + "loss": 0.7552, + "step": 2820 + }, + { + "epoch": 1.44, + "learning_rate": 4.046205764697862e-05, + "loss": 0.8374, + "step": 2830 + }, + { + "epoch": 1.44, + "learning_rate": 4.0399226976027583e-05, + "loss": 0.8721, + "step": 2840 + }, + { + "epoch": 1.45, + "learning_rate": 4.0336239178529075e-05, + "loss": 0.756, + "step": 2850 + }, + { + "epoch": 1.45, + "learning_rate": 4.0273094897181285e-05, + "loss": 0.7646, + "step": 2860 + }, + { + "epoch": 1.46, + "learning_rate": 4.020979477627907e-05, + "loss": 0.8254, + "step": 2870 + }, + { + "epoch": 1.46, + "learning_rate": 4.014633946170742e-05, + "loss": 0.843, + "step": 2880 + }, + { + "epoch": 1.47, + "learning_rate": 4.0082729600934844e-05, + "loss": 0.8923, + "step": 2890 + }, + { + "epoch": 1.47, + "learning_rate": 4.001896584300675e-05, + "loss": 0.8476, + "step": 2900 + }, + { + "epoch": 1.48, + "learning_rate": 3.995504883853888e-05, + "loss": 0.8202, + "step": 2910 + }, + { + "epoch": 1.48, + "learning_rate": 3.98909792397106e-05, + "loss": 0.8049, + "step": 2920 + }, + { + "epoch": 1.49, + "learning_rate": 3.9826757700258284e-05, + "loss": 0.7977, + "step": 2930 + }, + { + "epoch": 1.49, + "learning_rate": 3.976238487546864e-05, + "loss": 0.8494, + "step": 2940 + }, + { + "epoch": 1.5, + "learning_rate": 3.9697861422172034e-05, + "loss": 0.871, + "step": 2950 + }, + { + "epoch": 1.5, + "learning_rate": 3.963318799873575e-05, + "loss": 0.9323, + "step": 2960 + }, + { + "epoch": 1.51, + "learning_rate": 3.956836526505733e-05, + "loss": 0.912, + "step": 2970 + }, + { + "epoch": 1.51, + "learning_rate": 3.9503393882557766e-05, + "loss": 0.851, + "step": 2980 + }, + { + "epoch": 1.52, + "learning_rate": 3.943827451417483e-05, + "loss": 0.78, + "step": 2990 + }, + { + "epoch": 1.52, + "learning_rate": 3.937300782435625e-05, + "loss": 0.7798, + "step": 3000 + }, + { + "epoch": 1.53, + "learning_rate": 3.930759447905298e-05, + "loss": 0.8496, + "step": 3010 + }, + { + "epoch": 1.53, + "learning_rate": 3.9242035145712344e-05, + "loss": 0.8427, + "step": 3020 + }, + { + "epoch": 1.54, + "learning_rate": 3.9176330493271285e-05, + "loss": 0.8619, + "step": 3030 + }, + { + "epoch": 1.54, + "learning_rate": 3.9110481192149504e-05, + "loss": 0.7663, + "step": 3040 + }, + { + "epoch": 1.55, + "learning_rate": 3.9044487914242646e-05, + "loss": 0.7478, + "step": 3050 + }, + { + "epoch": 1.56, + "learning_rate": 3.897835133291539e-05, + "loss": 0.8048, + "step": 3060 + }, + { + "epoch": 1.56, + "learning_rate": 3.891207212299467e-05, + "loss": 0.8875, + "step": 3070 + }, + { + "epoch": 1.57, + "learning_rate": 3.884565096076269e-05, + "loss": 0.8649, + "step": 3080 + }, + { + "epoch": 1.57, + "learning_rate": 3.877908852395008e-05, + "loss": 0.8313, + "step": 3090 + }, + { + "epoch": 1.58, + "learning_rate": 3.8712385491729e-05, + "loss": 0.8779, + "step": 3100 + }, + { + "epoch": 1.58, + "learning_rate": 3.864554254470613e-05, + "loss": 0.7845, + "step": 3110 + }, + { + "epoch": 1.59, + "learning_rate": 3.857856036491582e-05, + "loss": 0.8581, + "step": 3120 + }, + { + "epoch": 1.59, + "learning_rate": 3.851143963581306e-05, + "loss": 0.8512, + "step": 3130 + }, + { + "epoch": 1.6, + "learning_rate": 3.844418104226656e-05, + "loss": 0.7689, + "step": 3140 + }, + { + "epoch": 1.6, + "learning_rate": 3.837678527055168e-05, + "loss": 0.8677, + "step": 3150 + }, + { + "epoch": 1.61, + "learning_rate": 3.830925300834356e-05, + "loss": 0.7601, + "step": 3160 + }, + { + "epoch": 1.61, + "learning_rate": 3.824158494470996e-05, + "loss": 0.8637, + "step": 3170 + }, + { + "epoch": 1.62, + "learning_rate": 3.817378177010431e-05, + "loss": 0.7152, + "step": 3180 + }, + { + "epoch": 1.62, + "learning_rate": 3.8105844176358674e-05, + "loss": 0.9339, + "step": 3190 + }, + { + "epoch": 1.63, + "learning_rate": 3.803777285667665e-05, + "loss": 0.8261, + "step": 3200 + }, + { + "epoch": 1.63, + "learning_rate": 3.7969568505626305e-05, + "loss": 0.896, + "step": 3210 + }, + { + "epoch": 1.64, + "learning_rate": 3.7901231819133105e-05, + "loss": 0.9026, + "step": 3220 + }, + { + "epoch": 1.64, + "learning_rate": 3.783276349447281e-05, + "loss": 0.8242, + "step": 3230 + }, + { + "epoch": 1.65, + "learning_rate": 3.7764164230264357e-05, + "loss": 0.8292, + "step": 3240 + }, + { + "epoch": 1.65, + "learning_rate": 3.7695434726462704e-05, + "loss": 0.9249, + "step": 3250 + }, + { + "epoch": 1.66, + "learning_rate": 3.762657568435174e-05, + "loss": 0.9214, + "step": 3260 + }, + { + "epoch": 1.66, + "learning_rate": 3.7557587806537094e-05, + "loss": 0.8414, + "step": 3270 + }, + { + "epoch": 1.67, + "learning_rate": 3.748847179693897e-05, + "loss": 0.7875, + "step": 3280 + }, + { + "epoch": 1.67, + "learning_rate": 3.741922836078499e-05, + "loss": 0.7981, + "step": 3290 + }, + { + "epoch": 1.68, + "learning_rate": 3.734985820460293e-05, + "loss": 0.8205, + "step": 3300 + }, + { + "epoch": 1.68, + "learning_rate": 3.728036203621361e-05, + "loss": 0.8429, + "step": 3310 + }, + { + "epoch": 1.69, + "learning_rate": 3.72107405647236e-05, + "loss": 0.8432, + "step": 3320 + }, + { + "epoch": 1.69, + "learning_rate": 3.7140994500517995e-05, + "loss": 0.86, + "step": 3330 + }, + { + "epoch": 1.7, + "learning_rate": 3.707112455525318e-05, + "loss": 0.7461, + "step": 3340 + }, + { + "epoch": 1.7, + "learning_rate": 3.7001131441849586e-05, + "loss": 0.8739, + "step": 3350 + }, + { + "epoch": 1.71, + "learning_rate": 3.693101587448436e-05, + "loss": 0.8064, + "step": 3360 + }, + { + "epoch": 1.71, + "learning_rate": 3.6860778568584145e-05, + "loss": 0.8171, + "step": 3370 + }, + { + "epoch": 1.72, + "learning_rate": 3.6790420240817715e-05, + "loss": 0.7549, + "step": 3380 + }, + { + "epoch": 1.72, + "learning_rate": 3.671994160908872e-05, + "loss": 0.8102, + "step": 3390 + }, + { + "epoch": 1.73, + "learning_rate": 3.6649343392528335e-05, + "loss": 0.8086, + "step": 3400 + }, + { + "epoch": 1.73, + "learning_rate": 3.657862631148791e-05, + "loss": 0.9243, + "step": 3410 + }, + { + "epoch": 1.74, + "learning_rate": 3.650779108753163e-05, + "loss": 0.8874, + "step": 3420 + }, + { + "epoch": 1.74, + "learning_rate": 3.6436838443429175e-05, + "loss": 0.7962, + "step": 3430 + }, + { + "epoch": 1.75, + "learning_rate": 3.636576910314831e-05, + "loss": 0.7621, + "step": 3440 + }, + { + "epoch": 1.75, + "learning_rate": 3.6294583791847514e-05, + "loss": 0.8126, + "step": 3450 + }, + { + "epoch": 1.76, + "learning_rate": 3.622328323586859e-05, + "loss": 0.8358, + "step": 3460 + }, + { + "epoch": 1.76, + "learning_rate": 3.615186816272925e-05, + "loss": 0.8677, + "step": 3470 + }, + { + "epoch": 1.77, + "learning_rate": 3.608033930111564e-05, + "loss": 0.8286, + "step": 3480 + }, + { + "epoch": 1.77, + "learning_rate": 3.600869738087501e-05, + "loss": 0.8292, + "step": 3490 + }, + { + "epoch": 1.78, + "learning_rate": 3.5936943133008183e-05, + "loss": 0.8448, + "step": 3500 + }, + { + "epoch": 1.78, + "learning_rate": 3.5865077289662114e-05, + "loss": 0.7162, + "step": 3510 + }, + { + "epoch": 1.79, + "learning_rate": 3.5793100584122426e-05, + "loss": 0.7949, + "step": 3520 + }, + { + "epoch": 1.79, + "learning_rate": 3.572101375080594e-05, + "loss": 0.8263, + "step": 3530 + }, + { + "epoch": 1.8, + "learning_rate": 3.564881752525317e-05, + "loss": 0.9174, + "step": 3540 + }, + { + "epoch": 1.8, + "learning_rate": 3.5576512644120804e-05, + "loss": 0.8188, + "step": 3550 + }, + { + "epoch": 1.81, + "learning_rate": 3.550409984517421e-05, + "loss": 0.81, + "step": 3560 + }, + { + "epoch": 1.81, + "learning_rate": 3.5431579867279905e-05, + "loss": 0.8592, + "step": 3570 + }, + { + "epoch": 1.82, + "learning_rate": 3.5358953450397995e-05, + "loss": 0.8419, + "step": 3580 + }, + { + "epoch": 1.82, + "learning_rate": 3.528622133557465e-05, + "loss": 0.7349, + "step": 3590 + }, + { + "epoch": 1.83, + "learning_rate": 3.521338426493453e-05, + "loss": 0.8005, + "step": 3600 + }, + { + "epoch": 1.83, + "learning_rate": 3.514044298167322e-05, + "loss": 0.7567, + "step": 3610 + }, + { + "epoch": 1.84, + "learning_rate": 3.506739823004963e-05, + "loss": 0.7951, + "step": 3620 + }, + { + "epoch": 1.84, + "learning_rate": 3.4994250755378434e-05, + "loss": 0.8423, + "step": 3630 + }, + { + "epoch": 1.85, + "learning_rate": 3.492100130402242e-05, + "loss": 0.844, + "step": 3640 + }, + { + "epoch": 1.85, + "learning_rate": 3.4847650623384914e-05, + "loss": 0.8515, + "step": 3650 + }, + { + "epoch": 1.86, + "learning_rate": 3.477419946190213e-05, + "loss": 0.906, + "step": 3660 + }, + { + "epoch": 1.87, + "learning_rate": 3.470064856903555e-05, + "loss": 0.8309, + "step": 3670 + }, + { + "epoch": 1.87, + "learning_rate": 3.462699869526427e-05, + "loss": 0.8666, + "step": 3680 + }, + { + "epoch": 1.88, + "learning_rate": 3.455325059207732e-05, + "loss": 0.7585, + "step": 3690 + }, + { + "epoch": 1.88, + "learning_rate": 3.4479405011966056e-05, + "loss": 0.8133, + "step": 3700 + }, + { + "epoch": 1.89, + "learning_rate": 3.440546270841639e-05, + "loss": 0.9355, + "step": 3710 + }, + { + "epoch": 1.89, + "learning_rate": 3.4331424435901214e-05, + "loss": 0.7332, + "step": 3720 + }, + { + "epoch": 1.9, + "learning_rate": 3.4257290949872614e-05, + "loss": 0.8603, + "step": 3730 + }, + { + "epoch": 1.9, + "learning_rate": 3.418306300675416e-05, + "loss": 0.8269, + "step": 3740 + }, + { + "epoch": 1.91, + "learning_rate": 3.410874136393327e-05, + "loss": 0.799, + "step": 3750 + }, + { + "epoch": 1.91, + "learning_rate": 3.403432677975341e-05, + "loss": 0.8898, + "step": 3760 + }, + { + "epoch": 1.92, + "learning_rate": 3.395982001350637e-05, + "loss": 0.7441, + "step": 3770 + }, + { + "epoch": 1.92, + "learning_rate": 3.3885221825424537e-05, + "loss": 0.8466, + "step": 3780 + }, + { + "epoch": 1.93, + "learning_rate": 3.381053297667309e-05, + "loss": 0.8273, + "step": 3790 + }, + { + "epoch": 1.93, + "learning_rate": 3.3735754229342326e-05, + "loss": 0.8397, + "step": 3800 + }, + { + "epoch": 1.94, + "learning_rate": 3.3660886346439765e-05, + "loss": 0.8455, + "step": 3810 + }, + { + "epoch": 1.94, + "learning_rate": 3.358593009188247e-05, + "loss": 0.8254, + "step": 3820 + }, + { + "epoch": 1.95, + "learning_rate": 3.351088623048918e-05, + "loss": 0.8374, + "step": 3830 + }, + { + "epoch": 1.95, + "learning_rate": 3.3435755527972536e-05, + "loss": 0.781, + "step": 3840 + }, + { + "epoch": 1.96, + "learning_rate": 3.336053875093128e-05, + "loss": 0.8414, + "step": 3850 + }, + { + "epoch": 1.96, + "learning_rate": 3.32852366668424e-05, + "loss": 0.7875, + "step": 3860 + }, + { + "epoch": 1.97, + "learning_rate": 3.320985004405334e-05, + "loss": 0.7889, + "step": 3870 + }, + { + "epoch": 1.97, + "learning_rate": 3.3134379651774114e-05, + "loss": 0.894, + "step": 3880 + }, + { + "epoch": 1.98, + "learning_rate": 3.30588262600695e-05, + "loss": 0.8475, + "step": 3890 + }, + { + "epoch": 1.98, + "learning_rate": 3.298319063985116e-05, + "loss": 0.8024, + "step": 3900 + }, + { + "epoch": 1.99, + "learning_rate": 3.2907473562869754e-05, + "loss": 0.8467, + "step": 3910 + }, + { + "epoch": 1.99, + "learning_rate": 3.283167580170712e-05, + "loss": 0.7829, + "step": 3920 + }, + { + "epoch": 2.0, + "learning_rate": 3.275579812976835e-05, + "loss": 0.8466, + "step": 3930 + }, + { + "epoch": 2.0, + "learning_rate": 3.2679841321273895e-05, + "loss": 0.7958, + "step": 3940 + }, + { + "epoch": 2.01, + "learning_rate": 3.260380615125171e-05, + "loss": 0.7956, + "step": 3950 + }, + { + "epoch": 2.01, + "learning_rate": 3.252769339552927e-05, + "loss": 0.8578, + "step": 3960 + }, + { + "epoch": 2.02, + "learning_rate": 3.245150383072573e-05, + "loss": 0.8806, + "step": 3970 + }, + { + "epoch": 2.02, + "learning_rate": 3.2375238234243965e-05, + "loss": 0.8477, + "step": 3980 + }, + { + "epoch": 2.03, + "learning_rate": 3.229889738426264e-05, + "loss": 0.7173, + "step": 3990 + }, + { + "epoch": 2.03, + "learning_rate": 3.222248205972827e-05, + "loss": 0.8259, + "step": 4000 + }, + { + "epoch": 2.04, + "learning_rate": 3.2145993040347264e-05, + "loss": 0.7454, + "step": 4010 + }, + { + "epoch": 2.04, + "learning_rate": 3.2069431106577995e-05, + "loss": 0.8054, + "step": 4020 + }, + { + "epoch": 2.05, + "learning_rate": 3.199279703962282e-05, + "loss": 0.7146, + "step": 4030 + }, + { + "epoch": 2.05, + "learning_rate": 3.1916091621420104e-05, + "loss": 0.8322, + "step": 4040 + }, + { + "epoch": 2.06, + "learning_rate": 3.183931563463624e-05, + "loss": 0.7718, + "step": 4050 + }, + { + "epoch": 2.06, + "learning_rate": 3.176246986265767e-05, + "loss": 0.9118, + "step": 4060 + }, + { + "epoch": 2.07, + "learning_rate": 3.1685555089582906e-05, + "loss": 0.8052, + "step": 4070 + }, + { + "epoch": 2.07, + "learning_rate": 3.1608572100214526e-05, + "loss": 0.8209, + "step": 4080 + }, + { + "epoch": 2.08, + "learning_rate": 3.15315216800511e-05, + "loss": 0.7682, + "step": 4090 + }, + { + "epoch": 2.08, + "learning_rate": 3.145440461527929e-05, + "loss": 0.8159, + "step": 4100 + }, + { + "epoch": 2.09, + "learning_rate": 3.137722169276574e-05, + "loss": 0.8396, + "step": 4110 + }, + { + "epoch": 2.09, + "learning_rate": 3.129997370004909e-05, + "loss": 0.7799, + "step": 4120 + }, + { + "epoch": 2.1, + "learning_rate": 3.122266142533191e-05, + "loss": 0.8488, + "step": 4130 + }, + { + "epoch": 2.1, + "learning_rate": 3.114528565747268e-05, + "loss": 0.7617, + "step": 4140 + }, + { + "epoch": 2.11, + "learning_rate": 3.1067847185977735e-05, + "loss": 0.8345, + "step": 4150 + }, + { + "epoch": 2.11, + "learning_rate": 3.099034680099321e-05, + "loss": 0.7212, + "step": 4160 + }, + { + "epoch": 2.12, + "learning_rate": 3.091278529329698e-05, + "loss": 0.8031, + "step": 4170 + }, + { + "epoch": 2.12, + "learning_rate": 3.0835163454290574e-05, + "loss": 0.8333, + "step": 4180 + }, + { + "epoch": 2.13, + "learning_rate": 3.075748207599114e-05, + "loss": 0.7761, + "step": 4190 + }, + { + "epoch": 2.13, + "learning_rate": 3.06797419510233e-05, + "loss": 0.7531, + "step": 4200 + }, + { + "epoch": 2.14, + "learning_rate": 3.060194387261114e-05, + "loss": 0.8292, + "step": 4210 + }, + { + "epoch": 2.14, + "learning_rate": 3.0524088634570035e-05, + "loss": 0.82, + "step": 4220 + }, + { + "epoch": 2.15, + "learning_rate": 3.0446177031298627e-05, + "loss": 0.8561, + "step": 4230 + }, + { + "epoch": 2.15, + "learning_rate": 3.036820985777067e-05, + "loss": 0.9112, + "step": 4240 + }, + { + "epoch": 2.16, + "learning_rate": 3.0290187909526914e-05, + "loss": 0.8364, + "step": 4250 + }, + { + "epoch": 2.16, + "learning_rate": 3.0212111982667024e-05, + "loss": 0.7643, + "step": 4260 + }, + { + "epoch": 2.17, + "learning_rate": 3.013398287384144e-05, + "loss": 0.8117, + "step": 4270 + }, + { + "epoch": 2.18, + "learning_rate": 3.0055801380243224e-05, + "loss": 0.8721, + "step": 4280 + }, + { + "epoch": 2.18, + "learning_rate": 2.9977568299599973e-05, + "loss": 0.76, + "step": 4290 + }, + { + "epoch": 2.19, + "learning_rate": 2.989928443016564e-05, + "loss": 0.7813, + "step": 4300 + }, + { + "epoch": 2.19, + "learning_rate": 2.9820950570712414e-05, + "loss": 0.8918, + "step": 4310 + }, + { + "epoch": 2.2, + "learning_rate": 2.9742567520522534e-05, + "loss": 0.8043, + "step": 4320 + }, + { + "epoch": 2.2, + "learning_rate": 2.966413607938019e-05, + "loss": 0.8443, + "step": 4330 + }, + { + "epoch": 2.21, + "learning_rate": 2.9585657047563315e-05, + "loss": 0.7935, + "step": 4340 + }, + { + "epoch": 2.21, + "learning_rate": 2.9507131225835432e-05, + "loss": 0.7864, + "step": 4350 + }, + { + "epoch": 2.22, + "learning_rate": 2.9428559415437496e-05, + "loss": 0.8375, + "step": 4360 + }, + { + "epoch": 2.22, + "learning_rate": 2.93499424180797e-05, + "loss": 0.8113, + "step": 4370 + }, + { + "epoch": 2.23, + "learning_rate": 2.9271281035933313e-05, + "loss": 0.7886, + "step": 4380 + }, + { + "epoch": 2.23, + "learning_rate": 2.9192576071622473e-05, + "loss": 0.9166, + "step": 4390 + }, + { + "epoch": 2.24, + "learning_rate": 2.9113828328216027e-05, + "loss": 0.8631, + "step": 4400 + }, + { + "epoch": 2.24, + "learning_rate": 2.9035038609219306e-05, + "loss": 0.861, + "step": 4410 + }, + { + "epoch": 2.25, + "learning_rate": 2.8956207718565942e-05, + "loss": 0.8465, + "step": 4420 + }, + { + "epoch": 2.25, + "learning_rate": 2.8877336460609673e-05, + "loss": 0.7999, + "step": 4430 + }, + { + "epoch": 2.26, + "learning_rate": 2.879842564011612e-05, + "loss": 0.8585, + "step": 4440 + }, + { + "epoch": 2.26, + "learning_rate": 2.871947606225458e-05, + "loss": 0.885, + "step": 4450 + }, + { + "epoch": 2.27, + "learning_rate": 2.8640488532589803e-05, + "loss": 0.736, + "step": 4460 + }, + { + "epoch": 2.27, + "learning_rate": 2.8561463857073804e-05, + "loss": 0.7454, + "step": 4470 + }, + { + "epoch": 2.28, + "learning_rate": 2.8482402842037614e-05, + "loss": 0.8043, + "step": 4480 + }, + { + "epoch": 2.28, + "learning_rate": 2.8403306294183026e-05, + "loss": 0.74, + "step": 4490 + }, + { + "epoch": 2.29, + "learning_rate": 2.8324175020574424e-05, + "loss": 0.8533, + "step": 4500 + }, + { + "epoch": 2.29, + "learning_rate": 2.8245009828630502e-05, + "loss": 0.735, + "step": 4510 + }, + { + "epoch": 2.3, + "learning_rate": 2.816581152611606e-05, + "loss": 0.867, + "step": 4520 + }, + { + "epoch": 2.3, + "learning_rate": 2.808658092113372e-05, + "loss": 0.8848, + "step": 4530 + }, + { + "epoch": 2.31, + "learning_rate": 2.8007318822115713e-05, + "loss": 0.7563, + "step": 4540 + }, + { + "epoch": 2.31, + "learning_rate": 2.792802603781562e-05, + "loss": 0.762, + "step": 4550 + }, + { + "epoch": 2.32, + "learning_rate": 2.7848703377300118e-05, + "loss": 0.7755, + "step": 4560 + }, + { + "epoch": 2.32, + "learning_rate": 2.776935164994074e-05, + "loss": 0.8489, + "step": 4570 + }, + { + "epoch": 2.33, + "learning_rate": 2.7689971665405578e-05, + "loss": 0.7536, + "step": 4580 + }, + { + "epoch": 2.33, + "learning_rate": 2.761056423365107e-05, + "loss": 0.7741, + "step": 4590 + }, + { + "epoch": 2.34, + "learning_rate": 2.7531130164913703e-05, + "loss": 0.7624, + "step": 4600 + }, + { + "epoch": 2.34, + "learning_rate": 2.7451670269701767e-05, + "loss": 0.8003, + "step": 4610 + }, + { + "epoch": 2.35, + "learning_rate": 2.737218535878705e-05, + "loss": 0.8823, + "step": 4620 + }, + { + "epoch": 2.35, + "learning_rate": 2.7292676243196608e-05, + "loss": 0.8301, + "step": 4630 + }, + { + "epoch": 2.36, + "learning_rate": 2.7213143734204462e-05, + "loss": 0.8486, + "step": 4640 + }, + { + "epoch": 2.36, + "learning_rate": 2.7133588643323334e-05, + "loss": 0.7807, + "step": 4650 + }, + { + "epoch": 2.37, + "learning_rate": 2.7054011782296356e-05, + "loss": 0.8104, + "step": 4660 + }, + { + "epoch": 2.37, + "learning_rate": 2.6974413963088797e-05, + "loss": 0.847, + "step": 4670 + }, + { + "epoch": 2.38, + "learning_rate": 2.6894795997879762e-05, + "loss": 0.8375, + "step": 4680 + }, + { + "epoch": 2.38, + "learning_rate": 2.6815158699053932e-05, + "loss": 0.834, + "step": 4690 + }, + { + "epoch": 2.39, + "learning_rate": 2.6735502879193264e-05, + "loss": 0.7997, + "step": 4700 + }, + { + "epoch": 2.39, + "learning_rate": 2.665582935106866e-05, + "loss": 0.7941, + "step": 4710 + }, + { + "epoch": 2.4, + "learning_rate": 2.6576138927631742e-05, + "loss": 0.8244, + "step": 4720 + }, + { + "epoch": 2.4, + "learning_rate": 2.6496432422006522e-05, + "loss": 0.8158, + "step": 4730 + }, + { + "epoch": 2.41, + "learning_rate": 2.641671064748109e-05, + "loss": 0.8289, + "step": 4740 + }, + { + "epoch": 2.41, + "learning_rate": 2.633697441749935e-05, + "loss": 0.8029, + "step": 4750 + }, + { + "epoch": 2.42, + "learning_rate": 2.6257224545652688e-05, + "loss": 0.8135, + "step": 4760 + }, + { + "epoch": 2.42, + "learning_rate": 2.6177461845671685e-05, + "loss": 0.8097, + "step": 4770 + }, + { + "epoch": 2.43, + "learning_rate": 2.6097687131417843e-05, + "loss": 0.8128, + "step": 4780 + }, + { + "epoch": 2.43, + "learning_rate": 2.6017901216875217e-05, + "loss": 0.8145, + "step": 4790 + }, + { + "epoch": 2.44, + "learning_rate": 2.5938104916142155e-05, + "loss": 0.7725, + "step": 4800 + }, + { + "epoch": 2.44, + "learning_rate": 2.585829904342299e-05, + "loss": 0.8902, + "step": 4810 + }, + { + "epoch": 2.45, + "learning_rate": 2.577848441301971e-05, + "loss": 0.8069, + "step": 4820 + }, + { + "epoch": 2.45, + "learning_rate": 2.569866183932368e-05, + "loss": 0.781, + "step": 4830 + }, + { + "epoch": 2.46, + "learning_rate": 2.5618832136807297e-05, + "loss": 0.7496, + "step": 4840 + }, + { + "epoch": 2.46, + "learning_rate": 2.553899612001571e-05, + "loss": 0.8554, + "step": 4850 + }, + { + "epoch": 2.47, + "learning_rate": 2.5459154603558483e-05, + "loss": 0.8187, + "step": 4860 + }, + { + "epoch": 2.47, + "learning_rate": 2.5379308402101303e-05, + "loss": 0.7848, + "step": 4870 + }, + { + "epoch": 2.48, + "learning_rate": 2.529945833035767e-05, + "loss": 0.7408, + "step": 4880 + }, + { + "epoch": 2.49, + "learning_rate": 2.521960520308056e-05, + "loss": 0.7655, + "step": 4890 + }, + { + "epoch": 2.49, + "learning_rate": 2.5139749835054123e-05, + "loss": 0.7614, + "step": 4900 + }, + { + "epoch": 2.5, + "learning_rate": 2.5059893041085392e-05, + "loss": 0.7382, + "step": 4910 + }, + { + "epoch": 2.5, + "learning_rate": 2.4980035635995943e-05, + "loss": 0.7321, + "step": 4920 + }, + { + "epoch": 2.51, + "learning_rate": 2.4900178434613566e-05, + "loss": 0.7464, + "step": 4930 + }, + { + "epoch": 2.51, + "learning_rate": 2.4820322251764e-05, + "loss": 0.7925, + "step": 4940 + }, + { + "epoch": 2.52, + "learning_rate": 2.4740467902262583e-05, + "loss": 0.8016, + "step": 4950 + }, + { + "epoch": 2.52, + "learning_rate": 2.466061620090594e-05, + "loss": 0.8147, + "step": 4960 + }, + { + "epoch": 2.53, + "learning_rate": 2.4580767962463687e-05, + "loss": 0.8129, + "step": 4970 + }, + { + "epoch": 2.53, + "learning_rate": 2.4500924001670088e-05, + "loss": 0.8099, + "step": 4980 + }, + { + "epoch": 2.54, + "learning_rate": 2.4421085133215787e-05, + "loss": 0.8304, + "step": 4990 + }, + { + "epoch": 2.54, + "learning_rate": 2.4341252171739436e-05, + "loss": 0.9241, + "step": 5000 + }, + { + "epoch": 2.55, + "learning_rate": 2.4261425931819437e-05, + "loss": 0.7888, + "step": 5010 + }, + { + "epoch": 2.55, + "learning_rate": 2.4181607227965604e-05, + "loss": 0.8431, + "step": 5020 + }, + { + "epoch": 2.56, + "learning_rate": 2.4101796874610855e-05, + "loss": 0.7654, + "step": 5030 + }, + { + "epoch": 2.56, + "learning_rate": 2.40219956861029e-05, + "loss": 0.8724, + "step": 5040 + }, + { + "epoch": 2.57, + "learning_rate": 2.3942204476695943e-05, + "loss": 0.9028, + "step": 5050 + }, + { + "epoch": 2.57, + "learning_rate": 2.3862424060542357e-05, + "loss": 0.7866, + "step": 5060 + }, + { + "epoch": 2.58, + "learning_rate": 2.3782655251684394e-05, + "loss": 0.8155, + "step": 5070 + }, + { + "epoch": 2.58, + "learning_rate": 2.3702898864045876e-05, + "loss": 0.766, + "step": 5080 + }, + { + "epoch": 2.59, + "learning_rate": 2.362315571142385e-05, + "loss": 0.7741, + "step": 5090 + }, + { + "epoch": 2.59, + "learning_rate": 2.3543426607480364e-05, + "loss": 0.8394, + "step": 5100 + }, + { + "epoch": 2.6, + "learning_rate": 2.346371236573409e-05, + "loss": 0.8572, + "step": 5110 + }, + { + "epoch": 2.6, + "learning_rate": 2.3384013799552072e-05, + "loss": 0.8239, + "step": 5120 + }, + { + "epoch": 2.61, + "learning_rate": 2.3304331722141393e-05, + "loss": 0.7008, + "step": 5130 + }, + { + "epoch": 2.61, + "learning_rate": 2.32246669465409e-05, + "loss": 0.7752, + "step": 5140 + }, + { + "epoch": 2.62, + "learning_rate": 2.3145020285612894e-05, + "loss": 0.7641, + "step": 5150 + }, + { + "epoch": 2.62, + "learning_rate": 2.3065392552034857e-05, + "loss": 0.8388, + "step": 5160 + }, + { + "epoch": 2.63, + "learning_rate": 2.298578455829114e-05, + "loss": 0.8176, + "step": 5170 + }, + { + "epoch": 2.63, + "learning_rate": 2.2906197116664653e-05, + "loss": 0.7676, + "step": 5180 + }, + { + "epoch": 2.64, + "learning_rate": 2.282663103922863e-05, + "loss": 0.7121, + "step": 5190 + }, + { + "epoch": 2.64, + "learning_rate": 2.2747087137838307e-05, + "loss": 0.7567, + "step": 5200 + }, + { + "epoch": 2.65, + "learning_rate": 2.2667566224122648e-05, + "loss": 0.8355, + "step": 5210 + }, + { + "epoch": 2.65, + "learning_rate": 2.2588069109476057e-05, + "loss": 0.7708, + "step": 5220 + }, + { + "epoch": 2.66, + "learning_rate": 2.2508596605050107e-05, + "loss": 0.8587, + "step": 5230 + }, + { + "epoch": 2.66, + "learning_rate": 2.2429149521745254e-05, + "loss": 0.7971, + "step": 5240 + }, + { + "epoch": 2.67, + "learning_rate": 2.2349728670202582e-05, + "loss": 0.7568, + "step": 5250 + }, + { + "epoch": 2.67, + "learning_rate": 2.2270334860795497e-05, + "loss": 0.7911, + "step": 5260 + }, + { + "epoch": 2.68, + "learning_rate": 2.2190968903621498e-05, + "loss": 0.8176, + "step": 5270 + }, + { + "epoch": 2.68, + "learning_rate": 2.2111631608493885e-05, + "loss": 0.8239, + "step": 5280 + }, + { + "epoch": 2.69, + "learning_rate": 2.2032323784933505e-05, + "loss": 0.8844, + "step": 5290 + }, + { + "epoch": 2.69, + "learning_rate": 2.1953046242160493e-05, + "loss": 0.7868, + "step": 5300 + }, + { + "epoch": 2.7, + "learning_rate": 2.187379978908601e-05, + "loss": 0.8765, + "step": 5310 + }, + { + "epoch": 2.7, + "learning_rate": 2.1794585234303993e-05, + "loss": 0.7886, + "step": 5320 + }, + { + "epoch": 2.71, + "learning_rate": 2.1715403386082907e-05, + "loss": 0.861, + "step": 5330 + }, + { + "epoch": 2.71, + "learning_rate": 2.1636255052357497e-05, + "loss": 0.8678, + "step": 5340 + }, + { + "epoch": 2.72, + "learning_rate": 2.1557141040720515e-05, + "loss": 0.8169, + "step": 5350 + }, + { + "epoch": 2.72, + "learning_rate": 2.147806215841454e-05, + "loss": 0.7597, + "step": 5360 + }, + { + "epoch": 2.73, + "learning_rate": 2.1399019212323697e-05, + "loss": 0.8513, + "step": 5370 + }, + { + "epoch": 2.73, + "learning_rate": 2.1320013008965432e-05, + "loss": 0.7976, + "step": 5380 + }, + { + "epoch": 2.74, + "learning_rate": 2.124104435448228e-05, + "loss": 0.7306, + "step": 5390 + }, + { + "epoch": 2.74, + "learning_rate": 2.1162114054633663e-05, + "loss": 0.8193, + "step": 5400 + }, + { + "epoch": 2.75, + "learning_rate": 2.1083222914787623e-05, + "loss": 0.8915, + "step": 5410 + }, + { + "epoch": 2.75, + "learning_rate": 2.1004371739912654e-05, + "loss": 0.7684, + "step": 5420 + }, + { + "epoch": 2.76, + "learning_rate": 2.0925561334569464e-05, + "loss": 0.7708, + "step": 5430 + }, + { + "epoch": 2.76, + "learning_rate": 2.0846792502902753e-05, + "loss": 0.7513, + "step": 5440 + }, + { + "epoch": 2.77, + "learning_rate": 2.0768066048633033e-05, + "loss": 0.7225, + "step": 5450 + }, + { + "epoch": 2.77, + "learning_rate": 2.0689382775048418e-05, + "loss": 0.7696, + "step": 5460 + }, + { + "epoch": 2.78, + "learning_rate": 2.061074348499642e-05, + "loss": 0.751, + "step": 5470 + }, + { + "epoch": 2.78, + "learning_rate": 2.0532148980875768e-05, + "loss": 0.7263, + "step": 5480 + }, + { + "epoch": 2.79, + "learning_rate": 2.045360006462822e-05, + "loss": 0.8106, + "step": 5490 + }, + { + "epoch": 2.8, + "learning_rate": 2.037509753773037e-05, + "loss": 0.7924, + "step": 5500 + }, + { + "epoch": 2.8, + "learning_rate": 2.0296642201185473e-05, + "loss": 0.8711, + "step": 5510 + }, + { + "epoch": 2.81, + "learning_rate": 2.02182348555153e-05, + "loss": 0.8576, + "step": 5520 + }, + { + "epoch": 2.81, + "learning_rate": 2.0139876300751904e-05, + "loss": 0.8587, + "step": 5530 + }, + { + "epoch": 2.82, + "learning_rate": 2.0061567336429527e-05, + "loss": 0.8752, + "step": 5540 + }, + { + "epoch": 2.82, + "learning_rate": 1.9983308761576407e-05, + "loss": 0.8727, + "step": 5550 + }, + { + "epoch": 2.83, + "learning_rate": 1.990510137470664e-05, + "loss": 0.7785, + "step": 5560 + }, + { + "epoch": 2.83, + "learning_rate": 1.9826945973812005e-05, + "loss": 0.7669, + "step": 5570 + }, + { + "epoch": 2.84, + "learning_rate": 1.9748843356353856e-05, + "loss": 0.8083, + "step": 5580 + }, + { + "epoch": 2.84, + "learning_rate": 1.9670794319254963e-05, + "loss": 0.813, + "step": 5590 + }, + { + "epoch": 2.85, + "learning_rate": 1.9592799658891385e-05, + "loss": 0.815, + "step": 5600 + }, + { + "epoch": 2.85, + "learning_rate": 1.951486017108436e-05, + "loss": 0.9141, + "step": 5610 + }, + { + "epoch": 2.86, + "learning_rate": 1.9436976651092144e-05, + "loss": 0.7419, + "step": 5620 + }, + { + "epoch": 2.86, + "learning_rate": 1.9359149893601944e-05, + "loss": 0.8557, + "step": 5630 + }, + { + "epoch": 2.87, + "learning_rate": 1.9281380692721786e-05, + "loss": 0.871, + "step": 5640 + }, + { + "epoch": 2.87, + "learning_rate": 1.9203669841972416e-05, + "loss": 0.8396, + "step": 5650 + }, + { + "epoch": 2.88, + "learning_rate": 1.9126018134279193e-05, + "loss": 0.8437, + "step": 5660 + }, + { + "epoch": 2.88, + "learning_rate": 1.904842636196402e-05, + "loss": 0.7932, + "step": 5670 + }, + { + "epoch": 2.89, + "learning_rate": 1.8970895316737238e-05, + "loss": 0.6945, + "step": 5680 + }, + { + "epoch": 2.89, + "learning_rate": 1.8893425789689575e-05, + "loss": 0.738, + "step": 5690 + }, + { + "epoch": 2.9, + "learning_rate": 1.8816018571284017e-05, + "loss": 0.7109, + "step": 5700 + }, + { + "epoch": 2.9, + "learning_rate": 1.8738674451347818e-05, + "loss": 0.8739, + "step": 5710 + }, + { + "epoch": 2.91, + "learning_rate": 1.866139421906439e-05, + "loss": 0.8342, + "step": 5720 + }, + { + "epoch": 2.91, + "learning_rate": 1.858417866296528e-05, + "loss": 0.7657, + "step": 5730 + }, + { + "epoch": 2.92, + "learning_rate": 1.850702857092208e-05, + "loss": 0.8464, + "step": 5740 + }, + { + "epoch": 2.92, + "learning_rate": 1.8429944730138448e-05, + "loss": 0.7853, + "step": 5750 + }, + { + "epoch": 2.93, + "learning_rate": 1.8352927927142026e-05, + "loss": 0.7642, + "step": 5760 + }, + { + "epoch": 2.93, + "learning_rate": 1.8275978947776436e-05, + "loss": 0.8316, + "step": 5770 + }, + { + "epoch": 2.94, + "learning_rate": 1.819909857719328e-05, + "loss": 0.8784, + "step": 5780 + }, + { + "epoch": 2.94, + "learning_rate": 1.8122287599844066e-05, + "loss": 0.7567, + "step": 5790 + }, + { + "epoch": 2.95, + "learning_rate": 1.8045546799472286e-05, + "loss": 0.7579, + "step": 5800 + }, + { + "epoch": 2.95, + "learning_rate": 1.796887695910535e-05, + "loss": 0.7957, + "step": 5810 + }, + { + "epoch": 2.96, + "learning_rate": 1.7892278861046648e-05, + "loss": 0.7458, + "step": 5820 + }, + { + "epoch": 2.96, + "learning_rate": 1.7815753286867533e-05, + "loss": 0.7479, + "step": 5830 + }, + { + "epoch": 2.97, + "learning_rate": 1.7739301017399355e-05, + "loss": 0.7617, + "step": 5840 + }, + { + "epoch": 2.97, + "learning_rate": 1.7662922832725514e-05, + "loss": 0.8818, + "step": 5850 + }, + { + "epoch": 2.98, + "learning_rate": 1.7586619512173458e-05, + "loss": 0.8075, + "step": 5860 + }, + { + "epoch": 2.98, + "learning_rate": 1.751039183430678e-05, + "loss": 0.786, + "step": 5870 + }, + { + "epoch": 2.99, + "learning_rate": 1.7434240576917226e-05, + "loss": 0.8369, + "step": 5880 + }, + { + "epoch": 2.99, + "learning_rate": 1.735816651701681e-05, + "loss": 0.8883, + "step": 5890 + }, + { + "epoch": 3.0, + "learning_rate": 1.7282170430829837e-05, + "loss": 0.6625, + "step": 5900 + }, + { + "epoch": 3.0, + "learning_rate": 1.7206253093785012e-05, + "loss": 0.838, + "step": 5910 + }, + { + "epoch": 3.01, + "learning_rate": 1.713041528050753e-05, + "loss": 0.6941, + "step": 5920 + }, + { + "epoch": 3.01, + "learning_rate": 1.705465776481114e-05, + "loss": 0.7716, + "step": 5930 + }, + { + "epoch": 3.02, + "learning_rate": 1.6978981319690298e-05, + "loss": 0.8099, + "step": 5940 + }, + { + "epoch": 3.02, + "learning_rate": 1.6903386717312236e-05, + "loss": 0.7046, + "step": 5950 + }, + { + "epoch": 3.03, + "learning_rate": 1.682787472900912e-05, + "loss": 0.8008, + "step": 5960 + }, + { + "epoch": 3.03, + "learning_rate": 1.6752446125270117e-05, + "loss": 0.8587, + "step": 5970 + }, + { + "epoch": 3.04, + "learning_rate": 1.6677101675733625e-05, + "loss": 0.8083, + "step": 5980 + }, + { + "epoch": 3.04, + "learning_rate": 1.6601842149179347e-05, + "loss": 0.7906, + "step": 5990 + }, + { + "epoch": 3.05, + "learning_rate": 1.6526668313520478e-05, + "loss": 0.7089, + "step": 6000 + }, + { + "epoch": 3.05, + "learning_rate": 1.6451580935795863e-05, + "loss": 0.8412, + "step": 6010 + }, + { + "epoch": 3.06, + "learning_rate": 1.637658078216217e-05, + "loss": 0.7232, + "step": 6020 + }, + { + "epoch": 3.06, + "learning_rate": 1.6301668617886072e-05, + "loss": 0.8772, + "step": 6030 + }, + { + "epoch": 3.07, + "learning_rate": 1.622684520733644e-05, + "loss": 0.8135, + "step": 6040 + }, + { + "epoch": 3.07, + "learning_rate": 1.615211131397654e-05, + "loss": 0.7315, + "step": 6050 + }, + { + "epoch": 3.08, + "learning_rate": 1.6077467700356256e-05, + "loss": 0.8048, + "step": 6060 + }, + { + "epoch": 3.08, + "learning_rate": 1.6002915128104284e-05, + "loss": 0.684, + "step": 6070 + }, + { + "epoch": 3.09, + "learning_rate": 1.592845435792039e-05, + "loss": 0.7887, + "step": 6080 + }, + { + "epoch": 3.09, + "learning_rate": 1.585408614956763e-05, + "loss": 0.7469, + "step": 6090 + }, + { + "epoch": 3.1, + "learning_rate": 1.5779811261864604e-05, + "loss": 0.7575, + "step": 6100 + }, + { + "epoch": 3.11, + "learning_rate": 1.5705630452677707e-05, + "loss": 0.7354, + "step": 6110 + }, + { + "epoch": 3.11, + "learning_rate": 1.56315444789134e-05, + "loss": 0.7734, + "step": 6120 + }, + { + "epoch": 3.12, + "learning_rate": 1.555755409651049e-05, + "loss": 0.804, + "step": 6130 + }, + { + "epoch": 3.12, + "learning_rate": 1.5483660060432432e-05, + "loss": 0.7408, + "step": 6140 + }, + { + "epoch": 3.13, + "learning_rate": 1.5409863124659562e-05, + "loss": 0.819, + "step": 6150 + }, + { + "epoch": 3.13, + "learning_rate": 1.5336164042181494e-05, + "loss": 0.7686, + "step": 6160 + }, + { + "epoch": 3.14, + "learning_rate": 1.5262563564989374e-05, + "loss": 0.8118, + "step": 6170 + }, + { + "epoch": 3.14, + "learning_rate": 1.5189062444068225e-05, + "loss": 0.7973, + "step": 6180 + }, + { + "epoch": 3.15, + "learning_rate": 1.5115661429389294e-05, + "loss": 0.7622, + "step": 6190 + }, + { + "epoch": 3.15, + "learning_rate": 1.5042361269902383e-05, + "loss": 0.875, + "step": 6200 + }, + { + "epoch": 3.16, + "learning_rate": 1.4969162713528212e-05, + "loss": 0.7767, + "step": 6210 + }, + { + "epoch": 3.16, + "learning_rate": 1.4896066507150804e-05, + "loss": 0.7193, + "step": 6220 + }, + { + "epoch": 3.17, + "learning_rate": 1.482307339660983e-05, + "loss": 0.7978, + "step": 6230 + }, + { + "epoch": 3.17, + "learning_rate": 1.4750184126693028e-05, + "loss": 0.7754, + "step": 6240 + }, + { + "epoch": 3.18, + "learning_rate": 1.4677399441128603e-05, + "loss": 0.8816, + "step": 6250 + }, + { + "epoch": 3.18, + "learning_rate": 1.4604720082577622e-05, + "loss": 0.8264, + "step": 6260 + }, + { + "epoch": 3.19, + "learning_rate": 1.4532146792626449e-05, + "loss": 0.7528, + "step": 6270 + }, + { + "epoch": 3.19, + "learning_rate": 1.4459680311779159e-05, + "loss": 0.7898, + "step": 6280 + }, + { + "epoch": 3.2, + "learning_rate": 1.438732137945001e-05, + "loss": 0.8136, + "step": 6290 + }, + { + "epoch": 3.2, + "learning_rate": 1.4315070733955888e-05, + "loss": 0.7874, + "step": 6300 + }, + { + "epoch": 3.21, + "learning_rate": 1.4242929112508769e-05, + "loss": 0.7637, + "step": 6310 + }, + { + "epoch": 3.21, + "learning_rate": 1.417089725120817e-05, + "loss": 0.8007, + "step": 6320 + }, + { + "epoch": 3.22, + "learning_rate": 1.4098975885033713e-05, + "loss": 0.7594, + "step": 6330 + }, + { + "epoch": 3.22, + "learning_rate": 1.4027165747837548e-05, + "loss": 0.7395, + "step": 6340 + }, + { + "epoch": 3.23, + "learning_rate": 1.3955467572336905e-05, + "loss": 0.7882, + "step": 6350 + }, + { + "epoch": 3.23, + "learning_rate": 1.3883882090106611e-05, + "loss": 0.7563, + "step": 6360 + }, + { + "epoch": 3.24, + "learning_rate": 1.381241003157162e-05, + "loss": 0.7223, + "step": 6370 + }, + { + "epoch": 3.24, + "learning_rate": 1.3741052125999564e-05, + "loss": 0.7144, + "step": 6380 + }, + { + "epoch": 3.25, + "learning_rate": 1.3669809101493314e-05, + "loss": 0.7936, + "step": 6390 + }, + { + "epoch": 3.25, + "learning_rate": 1.3605789203581502e-05, + "loss": 0.7189, + "step": 6400 + }, + { + "epoch": 3.26, + "learning_rate": 1.3534766454819494e-05, + "loss": 0.7533, + "step": 6410 + }, + { + "epoch": 3.26, + "learning_rate": 1.3463860691966308e-05, + "loss": 0.7467, + "step": 6420 + }, + { + "epoch": 3.27, + "learning_rate": 1.3393072638511351e-05, + "loss": 0.7655, + "step": 6430 + }, + { + "epoch": 3.27, + "learning_rate": 1.332240301674299e-05, + "loss": 0.7911, + "step": 6440 + }, + { + "epoch": 3.28, + "learning_rate": 1.3251852547741161e-05, + "loss": 0.8208, + "step": 6450 + }, + { + "epoch": 3.28, + "learning_rate": 1.3181421951370035e-05, + "loss": 0.8012, + "step": 6460 + }, + { + "epoch": 3.29, + "learning_rate": 1.311111194627064e-05, + "loss": 0.8289, + "step": 6470 + }, + { + "epoch": 3.29, + "learning_rate": 1.304092324985356e-05, + "loss": 0.7399, + "step": 6480 + }, + { + "epoch": 3.3, + "learning_rate": 1.2970856578291598e-05, + "loss": 0.7232, + "step": 6490 + }, + { + "epoch": 3.3, + "learning_rate": 1.290091264651247e-05, + "loss": 0.8301, + "step": 6500 + }, + { + "epoch": 3.31, + "learning_rate": 1.2831092168191517e-05, + "loss": 0.7985, + "step": 6510 + }, + { + "epoch": 3.31, + "learning_rate": 1.2761395855744408e-05, + "loss": 0.7452, + "step": 6520 + }, + { + "epoch": 3.32, + "learning_rate": 1.2691824420319895e-05, + "loss": 0.8126, + "step": 6530 + }, + { + "epoch": 3.32, + "learning_rate": 1.2622378571792535e-05, + "loss": 0.8346, + "step": 6540 + }, + { + "epoch": 3.33, + "learning_rate": 1.2553059018755454e-05, + "loss": 0.7807, + "step": 6550 + }, + { + "epoch": 3.33, + "learning_rate": 1.2483866468513125e-05, + "loss": 0.8407, + "step": 6560 + }, + { + "epoch": 3.34, + "learning_rate": 1.2414801627074144e-05, + "loss": 0.8276, + "step": 6570 + }, + { + "epoch": 3.34, + "learning_rate": 1.2345865199144e-05, + "loss": 0.7426, + "step": 6580 + }, + { + "epoch": 3.35, + "learning_rate": 1.2277057888117944e-05, + "loss": 0.7461, + "step": 6590 + }, + { + "epoch": 3.35, + "learning_rate": 1.220838039607376e-05, + "loss": 0.7221, + "step": 6600 + }, + { + "epoch": 3.36, + "learning_rate": 1.2139833423764626e-05, + "loss": 0.8026, + "step": 6610 + }, + { + "epoch": 3.36, + "learning_rate": 1.2071417670611959e-05, + "loss": 0.7952, + "step": 6620 + }, + { + "epoch": 3.37, + "learning_rate": 1.2003133834698268e-05, + "loss": 0.7638, + "step": 6630 + }, + { + "epoch": 3.37, + "learning_rate": 1.1934982612760049e-05, + "loss": 0.7576, + "step": 6640 + }, + { + "epoch": 3.38, + "learning_rate": 1.186696470018066e-05, + "loss": 0.7676, + "step": 6650 + }, + { + "epoch": 3.38, + "learning_rate": 1.1799080790983246e-05, + "loss": 0.7482, + "step": 6660 + }, + { + "epoch": 3.39, + "learning_rate": 1.1731331577823617e-05, + "loss": 0.8294, + "step": 6670 + }, + { + "epoch": 3.39, + "learning_rate": 1.1663717751983228e-05, + "loss": 0.8052, + "step": 6680 + }, + { + "epoch": 3.4, + "learning_rate": 1.15962400033621e-05, + "loss": 0.844, + "step": 6690 + }, + { + "epoch": 3.4, + "learning_rate": 1.152889902047179e-05, + "loss": 0.7958, + "step": 6700 + }, + { + "epoch": 3.41, + "learning_rate": 1.1461695490428352e-05, + "loss": 0.7427, + "step": 6710 + }, + { + "epoch": 3.42, + "learning_rate": 1.1394630098945342e-05, + "loss": 0.7311, + "step": 6720 + }, + { + "epoch": 3.42, + "learning_rate": 1.1327703530326811e-05, + "loss": 0.8388, + "step": 6730 + }, + { + "epoch": 3.43, + "learning_rate": 1.1260916467460336e-05, + "loss": 0.768, + "step": 6740 + }, + { + "epoch": 3.43, + "learning_rate": 1.1194269591810018e-05, + "loss": 0.7228, + "step": 6750 + }, + { + "epoch": 3.44, + "learning_rate": 1.1127763583409576e-05, + "loss": 0.8205, + "step": 6760 + }, + { + "epoch": 3.44, + "learning_rate": 1.1061399120855375e-05, + "loss": 0.6815, + "step": 6770 + }, + { + "epoch": 3.45, + "learning_rate": 1.0995176881299515e-05, + "loss": 0.7285, + "step": 6780 + }, + { + "epoch": 3.45, + "learning_rate": 1.092909754044292e-05, + "loss": 0.7713, + "step": 6790 + }, + { + "epoch": 3.46, + "learning_rate": 1.086316177252844e-05, + "loss": 0.8213, + "step": 6800 + }, + { + "epoch": 3.46, + "learning_rate": 1.0797370250333975e-05, + "loss": 0.8498, + "step": 6810 + }, + { + "epoch": 3.47, + "learning_rate": 1.0731723645165603e-05, + "loss": 0.6787, + "step": 6820 + }, + { + "epoch": 3.47, + "learning_rate": 1.0666222626850752e-05, + "loss": 0.8175, + "step": 6830 + }, + { + "epoch": 3.48, + "learning_rate": 1.0600867863731321e-05, + "loss": 0.8446, + "step": 6840 + }, + { + "epoch": 3.48, + "learning_rate": 1.0535660022656915e-05, + "loss": 0.7999, + "step": 6850 + }, + { + "epoch": 3.49, + "learning_rate": 1.0470599768978004e-05, + "loss": 0.7724, + "step": 6860 + }, + { + "epoch": 3.49, + "learning_rate": 1.040568776653915e-05, + "loss": 0.8615, + "step": 6870 + }, + { + "epoch": 3.5, + "learning_rate": 1.0340924677672223e-05, + "loss": 0.8512, + "step": 6880 + }, + { + "epoch": 3.5, + "learning_rate": 1.0276311163189647e-05, + "loss": 0.8195, + "step": 6890 + }, + { + "epoch": 3.51, + "learning_rate": 1.021184788237767e-05, + "loss": 0.7919, + "step": 6900 + }, + { + "epoch": 3.51, + "learning_rate": 1.0147535492989613e-05, + "loss": 0.736, + "step": 6910 + }, + { + "epoch": 3.52, + "learning_rate": 1.0083374651239172e-05, + "loss": 0.8283, + "step": 6920 + }, + { + "epoch": 3.52, + "learning_rate": 1.0019366011793732e-05, + "loss": 0.814, + "step": 6930 + }, + { + "epoch": 3.53, + "learning_rate": 9.955510227767665e-06, + "loss": 0.8149, + "step": 6940 + }, + { + "epoch": 3.53, + "learning_rate": 9.891807950715682e-06, + "loss": 0.7876, + "step": 6950 + }, + { + "epoch": 3.54, + "learning_rate": 9.82825983062618e-06, + "loss": 0.6661, + "step": 6960 + }, + { + "epoch": 3.54, + "learning_rate": 9.764866515914611e-06, + "loss": 0.7429, + "step": 6970 + }, + { + "epoch": 3.55, + "learning_rate": 9.701628653416867e-06, + "loss": 0.7564, + "step": 6980 + }, + { + "epoch": 3.55, + "learning_rate": 9.638546888382672e-06, + "loss": 0.8258, + "step": 6990 + }, + { + "epoch": 3.56, + "learning_rate": 9.575621864469006e-06, + "loss": 0.848, + "step": 7000 + }, + { + "epoch": 3.56, + "learning_rate": 9.512854223733547e-06, + "loss": 0.7468, + "step": 7010 + }, + { + "epoch": 3.57, + "learning_rate": 9.450244606628082e-06, + "loss": 0.7268, + "step": 7020 + }, + { + "epoch": 3.57, + "learning_rate": 9.38779365199202e-06, + "loss": 0.896, + "step": 7030 + }, + { + "epoch": 3.58, + "learning_rate": 9.325501997045847e-06, + "loss": 0.8309, + "step": 7040 + }, + { + "epoch": 3.58, + "learning_rate": 9.263370277384631e-06, + "loss": 0.7935, + "step": 7050 + }, + { + "epoch": 3.59, + "learning_rate": 9.20139912697153e-06, + "loss": 0.7681, + "step": 7060 + }, + { + "epoch": 3.59, + "learning_rate": 9.139589178131333e-06, + "loss": 0.7819, + "step": 7070 + }, + { + "epoch": 3.6, + "learning_rate": 9.077941061543996e-06, + "loss": 0.7642, + "step": 7080 + }, + { + "epoch": 3.6, + "learning_rate": 9.016455406238222e-06, + "loss": 0.6481, + "step": 7090 + }, + { + "epoch": 3.61, + "learning_rate": 8.955132839585037e-06, + "loss": 0.7378, + "step": 7100 + }, + { + "epoch": 3.61, + "learning_rate": 8.893973987291369e-06, + "loss": 0.8364, + "step": 7110 + }, + { + "epoch": 3.62, + "learning_rate": 8.832979473393693e-06, + "loss": 0.7551, + "step": 7120 + }, + { + "epoch": 3.62, + "learning_rate": 8.772149920251654e-06, + "loss": 0.8637, + "step": 7130 + }, + { + "epoch": 3.63, + "learning_rate": 8.711485948541715e-06, + "loss": 0.7805, + "step": 7140 + }, + { + "epoch": 3.63, + "learning_rate": 8.650988177250812e-06, + "loss": 0.8183, + "step": 7150 + }, + { + "epoch": 3.64, + "learning_rate": 8.590657223670059e-06, + "loss": 0.8564, + "step": 7160 + }, + { + "epoch": 3.64, + "learning_rate": 8.53049370338844e-06, + "loss": 0.8067, + "step": 7170 + }, + { + "epoch": 3.65, + "learning_rate": 8.470498230286523e-06, + "loss": 0.7626, + "step": 7180 + }, + { + "epoch": 3.65, + "learning_rate": 8.410671416530205e-06, + "loss": 0.8154, + "step": 7190 + }, + { + "epoch": 3.66, + "learning_rate": 8.351013872564447e-06, + "loss": 0.7763, + "step": 7200 + }, + { + "epoch": 3.66, + "learning_rate": 8.291526207107084e-06, + "loss": 0.7609, + "step": 7210 + }, + { + "epoch": 3.67, + "learning_rate": 8.232209027142571e-06, + "loss": 0.7759, + "step": 7220 + }, + { + "epoch": 3.67, + "learning_rate": 8.173062937915812e-06, + "loss": 0.8389, + "step": 7230 + }, + { + "epoch": 3.68, + "learning_rate": 8.114088542925993e-06, + "loss": 0.7771, + "step": 7240 + }, + { + "epoch": 3.68, + "learning_rate": 8.055286443920395e-06, + "loss": 0.7084, + "step": 7250 + }, + { + "epoch": 3.69, + "learning_rate": 7.99665724088828e-06, + "loss": 0.8226, + "step": 7260 + }, + { + "epoch": 3.69, + "learning_rate": 7.938201532054768e-06, + "loss": 0.7129, + "step": 7270 + }, + { + "epoch": 3.7, + "learning_rate": 7.879919913874695e-06, + "loss": 0.7918, + "step": 7280 + }, + { + "epoch": 3.7, + "learning_rate": 7.821812981026588e-06, + "loss": 0.8319, + "step": 7290 + }, + { + "epoch": 3.71, + "learning_rate": 7.763881326406552e-06, + "loss": 0.7772, + "step": 7300 + }, + { + "epoch": 3.71, + "learning_rate": 7.706125541122244e-06, + "loss": 0.8371, + "step": 7310 + }, + { + "epoch": 3.72, + "learning_rate": 7.64854621448682e-06, + "loss": 0.7229, + "step": 7320 + }, + { + "epoch": 3.73, + "learning_rate": 7.591143934012942e-06, + "loss": 0.8227, + "step": 7330 + }, + { + "epoch": 3.73, + "learning_rate": 7.5339192854067736e-06, + "loss": 0.8326, + "step": 7340 + }, + { + "epoch": 3.74, + "learning_rate": 7.476872852562003e-06, + "loss": 0.7516, + "step": 7350 + }, + { + "epoch": 3.74, + "learning_rate": 7.420005217553891e-06, + "loss": 0.7943, + "step": 7360 + }, + { + "epoch": 3.75, + "learning_rate": 7.3633169606333265e-06, + "loss": 0.8194, + "step": 7370 + }, + { + "epoch": 3.75, + "learning_rate": 7.306808660220909e-06, + "loss": 0.8268, + "step": 7380 + }, + { + "epoch": 3.76, + "learning_rate": 7.250480892901046e-06, + "loss": 0.7529, + "step": 7390 + }, + { + "epoch": 3.76, + "learning_rate": 7.194334233416069e-06, + "loss": 0.7817, + "step": 7400 + }, + { + "epoch": 3.77, + "learning_rate": 7.138369254660365e-06, + "loss": 0.8199, + "step": 7410 + }, + { + "epoch": 3.77, + "learning_rate": 7.082586527674542e-06, + "loss": 0.8484, + "step": 7420 + }, + { + "epoch": 3.78, + "learning_rate": 7.0269866216395915e-06, + "loss": 0.7921, + "step": 7430 + }, + { + "epoch": 3.78, + "learning_rate": 6.971570103871089e-06, + "loss": 0.8236, + "step": 7440 + }, + { + "epoch": 3.79, + "learning_rate": 6.9163375398134024e-06, + "loss": 0.8259, + "step": 7450 + }, + { + "epoch": 3.79, + "learning_rate": 6.8612894930339065e-06, + "loss": 0.8038, + "step": 7460 + }, + { + "epoch": 3.8, + "learning_rate": 6.806426525217266e-06, + "loss": 0.8309, + "step": 7470 + }, + { + "epoch": 3.8, + "learning_rate": 6.751749196159679e-06, + "loss": 0.8441, + "step": 7480 + }, + { + "epoch": 3.81, + "learning_rate": 6.69725806376317e-06, + "loss": 0.7264, + "step": 7490 + }, + { + "epoch": 3.81, + "learning_rate": 6.6429536840299035e-06, + "loss": 0.8848, + "step": 7500 + }, + { + "epoch": 3.82, + "learning_rate": 6.588836611056507e-06, + "loss": 0.8101, + "step": 7510 + }, + { + "epoch": 3.82, + "learning_rate": 6.534907397028409e-06, + "loss": 0.724, + "step": 7520 + }, + { + "epoch": 3.83, + "learning_rate": 6.481166592214225e-06, + "loss": 0.8091, + "step": 7530 + }, + { + "epoch": 3.83, + "learning_rate": 6.427614744960126e-06, + "loss": 0.7131, + "step": 7540 + }, + { + "epoch": 3.84, + "learning_rate": 6.374252401684233e-06, + "loss": 0.7833, + "step": 7550 + }, + { + "epoch": 3.84, + "learning_rate": 6.32108010687108e-06, + "loss": 0.8134, + "step": 7560 + }, + { + "epoch": 3.85, + "learning_rate": 6.268098403066022e-06, + "loss": 0.7773, + "step": 7570 + }, + { + "epoch": 3.85, + "learning_rate": 6.2153078308697125e-06, + "loss": 0.7609, + "step": 7580 + }, + { + "epoch": 3.86, + "learning_rate": 6.162708928932592e-06, + "loss": 0.8762, + "step": 7590 + }, + { + "epoch": 3.86, + "learning_rate": 6.110302233949383e-06, + "loss": 0.7745, + "step": 7600 + }, + { + "epoch": 3.87, + "learning_rate": 6.05808828065362e-06, + "loss": 0.7402, + "step": 7610 + }, + { + "epoch": 3.87, + "learning_rate": 6.006067601812187e-06, + "loss": 0.7638, + "step": 7620 + }, + { + "epoch": 3.88, + "learning_rate": 5.954240728219898e-06, + "loss": 0.7565, + "step": 7630 + }, + { + "epoch": 3.88, + "learning_rate": 5.902608188694039e-06, + "loss": 0.7291, + "step": 7640 + }, + { + "epoch": 3.89, + "learning_rate": 5.8511705100690314e-06, + "loss": 0.7956, + "step": 7650 + }, + { + "epoch": 3.89, + "learning_rate": 5.79992821719102e-06, + "loss": 0.7756, + "step": 7660 + }, + { + "epoch": 3.9, + "learning_rate": 5.7488818329125114e-06, + "loss": 0.8543, + "step": 7670 + }, + { + "epoch": 3.9, + "learning_rate": 5.698031878087071e-06, + "loss": 0.6868, + "step": 7680 + }, + { + "epoch": 3.91, + "learning_rate": 5.647378871563971e-06, + "loss": 0.7887, + "step": 7690 + }, + { + "epoch": 3.91, + "learning_rate": 5.59692333018293e-06, + "loss": 0.7896, + "step": 7700 + }, + { + "epoch": 3.92, + "learning_rate": 5.546665768768814e-06, + "loss": 0.8122, + "step": 7710 + }, + { + "epoch": 3.92, + "learning_rate": 5.496606700126397e-06, + "loss": 0.8105, + "step": 7720 + }, + { + "epoch": 3.93, + "learning_rate": 5.4467466350351245e-06, + "loss": 0.8576, + "step": 7730 + }, + { + "epoch": 3.93, + "learning_rate": 5.3970860822439045e-06, + "loss": 0.8091, + "step": 7740 + }, + { + "epoch": 3.94, + "learning_rate": 5.347625548465915e-06, + "loss": 0.7318, + "step": 7750 + }, + { + "epoch": 3.94, + "learning_rate": 5.298365538373426e-06, + "loss": 0.9173, + "step": 7760 + }, + { + "epoch": 3.95, + "learning_rate": 5.2493065545926644e-06, + "loss": 0.8607, + "step": 7770 + }, + { + "epoch": 3.95, + "learning_rate": 5.200449097698676e-06, + "loss": 0.8263, + "step": 7780 + }, + { + "epoch": 3.96, + "learning_rate": 5.151793666210217e-06, + "loss": 0.8193, + "step": 7790 + }, + { + "epoch": 3.96, + "learning_rate": 5.103340756584685e-06, + "loss": 0.7089, + "step": 7800 + }, + { + "epoch": 3.97, + "learning_rate": 5.055090863213008e-06, + "loss": 0.7857, + "step": 7810 + }, + { + "epoch": 3.97, + "learning_rate": 5.007044478414658e-06, + "loss": 0.7785, + "step": 7820 + }, + { + "epoch": 3.98, + "learning_rate": 4.9592020924325936e-06, + "loss": 0.8344, + "step": 7830 + }, + { + "epoch": 3.98, + "learning_rate": 4.911564193428259e-06, + "loss": 0.8568, + "step": 7840 + }, + { + "epoch": 3.99, + "learning_rate": 4.864131267476615e-06, + "loss": 0.8104, + "step": 7850 + }, + { + "epoch": 3.99, + "learning_rate": 4.816903798561168e-06, + "loss": 0.7698, + "step": 7860 + }, + { + "epoch": 4.0, + "learning_rate": 4.769882268569037e-06, + "loss": 0.7878, + "step": 7870 + }, + { + "epoch": 4.0, + "learning_rate": 4.723067157286038e-06, + "loss": 0.8072, + "step": 7880 + }, + { + "epoch": 4.01, + "learning_rate": 4.6764589423917895e-06, + "loss": 0.7909, + "step": 7890 + }, + { + "epoch": 4.01, + "learning_rate": 4.630058099454823e-06, + "loss": 0.8484, + "step": 7900 + }, + { + "epoch": 4.02, + "learning_rate": 4.583865101927756e-06, + "loss": 0.8064, + "step": 7910 + }, + { + "epoch": 4.02, + "learning_rate": 4.537880421142443e-06, + "loss": 0.7695, + "step": 7920 + }, + { + "epoch": 4.03, + "learning_rate": 4.492104526305174e-06, + "loss": 0.7681, + "step": 7930 + }, + { + "epoch": 4.04, + "learning_rate": 4.44653788449188e-06, + "loss": 0.7337, + "step": 7940 + }, + { + "epoch": 4.04, + "learning_rate": 4.401180960643375e-06, + "loss": 0.7758, + "step": 7950 + }, + { + "epoch": 4.05, + "learning_rate": 4.3560342175606064e-06, + "loss": 0.7611, + "step": 7960 + }, + { + "epoch": 4.05, + "learning_rate": 4.311098115899936e-06, + "loss": 0.762, + "step": 7970 + }, + { + "epoch": 4.06, + "learning_rate": 4.266373114168445e-06, + "loss": 0.7921, + "step": 7980 + }, + { + "epoch": 4.06, + "learning_rate": 4.22185966871923e-06, + "loss": 0.7286, + "step": 7990 + }, + { + "epoch": 4.07, + "learning_rate": 4.177558233746787e-06, + "loss": 0.7583, + "step": 8000 + }, + { + "epoch": 4.07, + "learning_rate": 4.133469261282341e-06, + "loss": 0.8479, + "step": 8010 + }, + { + "epoch": 4.08, + "learning_rate": 4.089593201189259e-06, + "loss": 0.8339, + "step": 8020 + }, + { + "epoch": 4.08, + "learning_rate": 4.045930501158443e-06, + "loss": 0.7481, + "step": 8030 + }, + { + "epoch": 4.09, + "learning_rate": 4.00248160670377e-06, + "loss": 0.8175, + "step": 8040 + }, + { + "epoch": 4.09, + "learning_rate": 3.959246961157545e-06, + "loss": 0.7694, + "step": 8050 + }, + { + "epoch": 4.1, + "learning_rate": 3.916227005665976e-06, + "loss": 0.7131, + "step": 8060 + }, + { + "epoch": 4.1, + "learning_rate": 3.873422179184677e-06, + "loss": 0.7284, + "step": 8070 + }, + { + "epoch": 4.11, + "learning_rate": 3.83083291847417e-06, + "loss": 0.8848, + "step": 8080 + }, + { + "epoch": 4.11, + "learning_rate": 3.7884596580954668e-06, + "loss": 0.7927, + "step": 8090 + }, + { + "epoch": 4.12, + "learning_rate": 3.7463028304055987e-06, + "loss": 0.884, + "step": 8100 + }, + { + "epoch": 4.12, + "learning_rate": 3.704362865553221e-06, + "loss": 0.7089, + "step": 8110 + }, + { + "epoch": 4.13, + "learning_rate": 3.662640191474223e-06, + "loss": 0.8192, + "step": 8120 + }, + { + "epoch": 4.13, + "learning_rate": 3.621135233887363e-06, + "loss": 0.8491, + "step": 8130 + }, + { + "epoch": 4.14, + "learning_rate": 3.5798484162899105e-06, + "loss": 0.8009, + "step": 8140 + }, + { + "epoch": 4.14, + "learning_rate": 3.5387801599533475e-06, + "loss": 0.8679, + "step": 8150 + }, + { + "epoch": 4.15, + "learning_rate": 3.4979308839190565e-06, + "loss": 0.7229, + "step": 8160 + }, + { + "epoch": 4.15, + "learning_rate": 3.4573010049940403e-06, + "loss": 0.8634, + "step": 8170 + }, + { + "epoch": 4.16, + "learning_rate": 3.4168909377466836e-06, + "loss": 0.8256, + "step": 8180 + }, + { + "epoch": 4.16, + "learning_rate": 3.3767010945025075e-06, + "loss": 0.7816, + "step": 8190 + }, + { + "epoch": 4.17, + "learning_rate": 3.3367318853399775e-06, + "loss": 0.7579, + "step": 8200 + }, + { + "epoch": 4.17, + "learning_rate": 3.296983718086308e-06, + "loss": 0.7629, + "step": 8210 + }, + { + "epoch": 4.18, + "learning_rate": 3.257456998313302e-06, + "loss": 0.8061, + "step": 8220 + }, + { + "epoch": 4.18, + "learning_rate": 3.2181521293332213e-06, + "loss": 0.8339, + "step": 8230 + }, + { + "epoch": 4.19, + "learning_rate": 3.1790695121946627e-06, + "loss": 0.677, + "step": 8240 + }, + { + "epoch": 4.19, + "learning_rate": 3.140209545678463e-06, + "loss": 0.7408, + "step": 8250 + }, + { + "epoch": 4.2, + "learning_rate": 3.101572626293642e-06, + "loss": 0.8345, + "step": 8260 + }, + { + "epoch": 4.2, + "learning_rate": 3.063159148273351e-06, + "loss": 0.7823, + "step": 8270 + }, + { + "epoch": 4.21, + "learning_rate": 3.024969503570843e-06, + "loss": 0.8105, + "step": 8280 + }, + { + "epoch": 4.21, + "learning_rate": 2.9870040818554934e-06, + "loss": 0.7489, + "step": 8290 + }, + { + "epoch": 4.22, + "learning_rate": 2.9492632705087926e-06, + "loss": 0.7394, + "step": 8300 + }, + { + "epoch": 4.22, + "learning_rate": 2.9117474546204283e-06, + "loss": 0.8176, + "step": 8310 + }, + { + "epoch": 4.23, + "learning_rate": 2.874457016984325e-06, + "loss": 0.7422, + "step": 8320 + }, + { + "epoch": 4.23, + "learning_rate": 2.8373923380947657e-06, + "loss": 0.7723, + "step": 8330 + }, + { + "epoch": 4.24, + "learning_rate": 2.80055379614248e-06, + "loss": 0.7783, + "step": 8340 + }, + { + "epoch": 4.24, + "learning_rate": 2.7639417670108165e-06, + "loss": 0.8144, + "step": 8350 + }, + { + "epoch": 4.25, + "learning_rate": 2.7275566242718846e-06, + "loss": 0.6772, + "step": 8360 + }, + { + "epoch": 4.25, + "learning_rate": 2.6913987391827545e-06, + "loss": 0.7854, + "step": 8370 + }, + { + "epoch": 4.26, + "learning_rate": 2.655468480681658e-06, + "loss": 0.8284, + "step": 8380 + }, + { + "epoch": 4.26, + "learning_rate": 2.6197662153842424e-06, + "loss": 0.8052, + "step": 8390 + }, + { + "epoch": 4.27, + "learning_rate": 2.584292307579808e-06, + "loss": 0.7989, + "step": 8400 + }, + { + "epoch": 4.27, + "learning_rate": 2.549047119227607e-06, + "loss": 0.6619, + "step": 8410 + }, + { + "epoch": 4.28, + "learning_rate": 2.5140310099531494e-06, + "loss": 0.7794, + "step": 8420 + }, + { + "epoch": 4.28, + "learning_rate": 2.479244337044509e-06, + "loss": 0.6983, + "step": 8430 + }, + { + "epoch": 4.29, + "learning_rate": 2.4446874554487216e-06, + "loss": 0.8443, + "step": 8440 + }, + { + "epoch": 4.29, + "learning_rate": 2.4103607177681233e-06, + "loss": 0.7472, + "step": 8450 + }, + { + "epoch": 4.3, + "learning_rate": 2.3762644742567786e-06, + "loss": 0.7612, + "step": 8460 + }, + { + "epoch": 4.3, + "learning_rate": 2.342399072816895e-06, + "loss": 0.7782, + "step": 8470 + }, + { + "epoch": 4.31, + "learning_rate": 2.308764858995266e-06, + "loss": 0.7171, + "step": 8480 + }, + { + "epoch": 4.31, + "learning_rate": 2.275362175979767e-06, + "loss": 0.777, + "step": 8490 + }, + { + "epoch": 4.32, + "learning_rate": 2.2421913645958304e-06, + "loss": 0.7401, + "step": 8500 + }, + { + "epoch": 4.32, + "learning_rate": 2.209252763302988e-06, + "loss": 0.6972, + "step": 8510 + }, + { + "epoch": 4.33, + "learning_rate": 2.176546708191396e-06, + "loss": 0.7424, + "step": 8520 + }, + { + "epoch": 4.33, + "learning_rate": 2.1440735329784273e-06, + "loss": 0.7971, + "step": 8530 + }, + { + "epoch": 4.34, + "learning_rate": 2.1118335690052533e-06, + "loss": 0.8577, + "step": 8540 + }, + { + "epoch": 4.35, + "learning_rate": 2.079827145233465e-06, + "loss": 0.8388, + "step": 8550 + }, + { + "epoch": 4.35, + "learning_rate": 2.048054588241721e-06, + "loss": 0.7897, + "step": 8560 + }, + { + "epoch": 4.36, + "learning_rate": 2.0165162222224087e-06, + "loss": 0.7454, + "step": 8570 + }, + { + "epoch": 4.36, + "learning_rate": 1.985212368978345e-06, + "loss": 0.6974, + "step": 8580 + }, + { + "epoch": 4.37, + "learning_rate": 1.954143347919482e-06, + "loss": 0.8017, + "step": 8590 + }, + { + "epoch": 4.37, + "learning_rate": 1.923309476059654e-06, + "loss": 0.6967, + "step": 8600 + }, + { + "epoch": 4.38, + "learning_rate": 1.8927110680133448e-06, + "loss": 0.7471, + "step": 8610 + }, + { + "epoch": 4.38, + "learning_rate": 1.8623484359924753e-06, + "loss": 0.7466, + "step": 8620 + }, + { + "epoch": 4.39, + "learning_rate": 1.832221889803215e-06, + "loss": 0.7727, + "step": 8630 + }, + { + "epoch": 4.39, + "learning_rate": 1.8023317368428272e-06, + "loss": 0.6746, + "step": 8640 + }, + { + "epoch": 4.4, + "learning_rate": 1.772678282096521e-06, + "loss": 0.7834, + "step": 8650 + }, + { + "epoch": 4.4, + "learning_rate": 1.7432618281343571e-06, + "loss": 0.854, + "step": 8660 + }, + { + "epoch": 4.41, + "learning_rate": 1.7140826751081417e-06, + "loss": 0.7994, + "step": 8670 + }, + { + "epoch": 4.41, + "learning_rate": 1.685141120748379e-06, + "loss": 0.8229, + "step": 8680 + }, + { + "epoch": 4.42, + "learning_rate": 1.6564374603612293e-06, + "loss": 0.7353, + "step": 8690 + }, + { + "epoch": 4.42, + "learning_rate": 1.6279719868254772e-06, + "loss": 0.7952, + "step": 8700 + }, + { + "epoch": 4.43, + "learning_rate": 1.5997449905895773e-06, + "loss": 0.866, + "step": 8710 + }, + { + "epoch": 4.43, + "learning_rate": 1.5717567596686661e-06, + "loss": 0.8125, + "step": 8720 + }, + { + "epoch": 4.44, + "learning_rate": 1.5440075796416292e-06, + "loss": 0.8302, + "step": 8730 + }, + { + "epoch": 4.44, + "learning_rate": 1.5164977336481896e-06, + "loss": 0.7714, + "step": 8740 + }, + { + "epoch": 4.45, + "learning_rate": 1.4892275023860176e-06, + "loss": 0.841, + "step": 8750 + }, + { + "epoch": 4.45, + "learning_rate": 1.4621971641078646e-06, + "loss": 0.8062, + "step": 8760 + }, + { + "epoch": 4.46, + "learning_rate": 1.4354069946187292e-06, + "loss": 0.839, + "step": 8770 + }, + { + "epoch": 4.46, + "learning_rate": 1.408857267273031e-06, + "loss": 0.7927, + "step": 8780 + }, + { + "epoch": 4.47, + "learning_rate": 1.3825482529718382e-06, + "loss": 0.7557, + "step": 8790 + }, + { + "epoch": 4.47, + "learning_rate": 1.3564802201600919e-06, + "loss": 0.7451, + "step": 8800 + }, + { + "epoch": 4.48, + "learning_rate": 1.3306534348238697e-06, + "loss": 0.7812, + "step": 8810 + }, + { + "epoch": 4.48, + "learning_rate": 1.305068160487674e-06, + "loss": 0.7787, + "step": 8820 + }, + { + "epoch": 4.49, + "learning_rate": 1.2797246582117422e-06, + "loss": 0.712, + "step": 8830 + }, + { + "epoch": 4.49, + "learning_rate": 1.2546231865893794e-06, + "loss": 0.8208, + "step": 8840 + }, + { + "epoch": 4.5, + "learning_rate": 1.2297640017443213e-06, + "loss": 0.7492, + "step": 8850 + }, + { + "epoch": 4.5, + "learning_rate": 1.2051473573281292e-06, + "loss": 0.8182, + "step": 8860 + }, + { + "epoch": 4.51, + "learning_rate": 1.180773504517585e-06, + "loss": 0.7812, + "step": 8870 + }, + { + "epoch": 4.51, + "learning_rate": 1.1566426920121415e-06, + "loss": 0.7459, + "step": 8880 + }, + { + "epoch": 4.52, + "learning_rate": 1.132755166031385e-06, + "loss": 0.8221, + "step": 8890 + }, + { + "epoch": 4.52, + "learning_rate": 1.1091111703125157e-06, + "loss": 0.6909, + "step": 8900 + }, + { + "epoch": 4.53, + "learning_rate": 1.0857109461078679e-06, + "loss": 0.8285, + "step": 8910 + }, + { + "epoch": 4.53, + "learning_rate": 1.0625547321824385e-06, + "loss": 0.8397, + "step": 8920 + }, + { + "epoch": 4.54, + "learning_rate": 1.0396427648114632e-06, + "loss": 0.765, + "step": 8930 + }, + { + "epoch": 4.54, + "learning_rate": 1.0169752777779984e-06, + "loss": 0.795, + "step": 8940 + }, + { + "epoch": 4.55, + "learning_rate": 9.945525023705327e-07, + "loss": 0.6929, + "step": 8950 + }, + { + "epoch": 4.55, + "learning_rate": 9.723746673806377e-07, + "loss": 0.8618, + "step": 8960 + }, + { + "epoch": 4.56, + "learning_rate": 9.50441999100618e-07, + "loss": 0.7577, + "step": 8970 + }, + { + "epoch": 4.56, + "learning_rate": 9.287547213212206e-07, + "loss": 0.8186, + "step": 8980 + }, + { + "epoch": 4.57, + "learning_rate": 9.07313055329334e-07, + "loss": 0.8268, + "step": 8990 + }, + { + "epoch": 4.57, + "learning_rate": 8.861172199057466e-07, + "loss": 0.8767, + "step": 9000 + }, + { + "epoch": 4.58, + "learning_rate": 8.651674313228997e-07, + "loss": 0.716, + "step": 9010 + }, + { + "epoch": 4.58, + "learning_rate": 8.444639033426904e-07, + "loss": 0.8054, + "step": 9020 + }, + { + "epoch": 4.59, + "learning_rate": 8.240068472142815e-07, + "loss": 0.7585, + "step": 9030 + }, + { + "epoch": 4.59, + "learning_rate": 8.037964716719609e-07, + "loss": 0.8554, + "step": 9040 + }, + { + "epoch": 4.6, + "learning_rate": 7.838329829329943e-07, + "loss": 0.7113, + "step": 9050 + }, + { + "epoch": 4.6, + "learning_rate": 7.641165846955345e-07, + "loss": 0.8091, + "step": 9060 + }, + { + "epoch": 4.61, + "learning_rate": 7.446474781365314e-07, + "loss": 0.7636, + "step": 9070 + }, + { + "epoch": 4.61, + "learning_rate": 7.254258619096982e-07, + "loss": 0.7905, + "step": 9080 + }, + { + "epoch": 4.62, + "learning_rate": 7.083381736919054e-07, + "loss": 0.796, + "step": 9090 + }, + { + "epoch": 4.62, + "learning_rate": 6.895873273412512e-07, + "loss": 0.7762, + "step": 9100 + }, + { + "epoch": 4.63, + "learning_rate": 6.710845331309279e-07, + "loss": 0.7687, + "step": 9110 + }, + { + "epoch": 4.63, + "learning_rate": 6.528299798548327e-07, + "loss": 0.769, + "step": 9120 + }, + { + "epoch": 4.64, + "learning_rate": 6.34823853773936e-07, + "loss": 0.8484, + "step": 9130 + }, + { + "epoch": 4.64, + "learning_rate": 6.170663386143721e-07, + "loss": 0.7223, + "step": 9140 + }, + { + "epoch": 4.65, + "learning_rate": 5.995576155655657e-07, + "loss": 0.8462, + "step": 9150 + }, + { + "epoch": 4.66, + "learning_rate": 5.822978632783748e-07, + "loss": 0.7518, + "step": 9160 + }, + { + "epoch": 4.66, + "learning_rate": 5.652872578632867e-07, + "loss": 0.7555, + "step": 9170 + }, + { + "epoch": 4.67, + "learning_rate": 5.485259728886055e-07, + "loss": 0.7159, + "step": 9180 + }, + { + "epoch": 4.67, + "learning_rate": 5.320141793786815e-07, + "loss": 0.8041, + "step": 9190 + }, + { + "epoch": 4.68, + "learning_rate": 5.157520458121734e-07, + "loss": 0.7807, + "step": 9200 + }, + { + "epoch": 4.68, + "learning_rate": 4.997397381203278e-07, + "loss": 0.7471, + "step": 9210 + }, + { + "epoch": 4.69, + "learning_rate": 4.839774196852831e-07, + "loss": 0.6955, + "step": 9220 + }, + { + "epoch": 4.69, + "learning_rate": 4.6846525133840135e-07, + "loss": 0.8771, + "step": 9230 + }, + { + "epoch": 4.7, + "learning_rate": 4.532033913586281e-07, + "loss": 0.7871, + "step": 9240 + }, + { + "epoch": 4.7, + "learning_rate": 4.3819199547089073e-07, + "loss": 0.794, + "step": 9250 + }, + { + "epoch": 4.71, + "learning_rate": 4.234312168444804e-07, + "loss": 0.7949, + "step": 9260 + }, + { + "epoch": 4.71, + "learning_rate": 4.0892120609151706e-07, + "loss": 0.8159, + "step": 9270 + }, + { + "epoch": 4.72, + "learning_rate": 3.946621112654009e-07, + "loss": 0.7887, + "step": 9280 + }, + { + "epoch": 4.72, + "learning_rate": 3.806540778593021e-07, + "loss": 0.7609, + "step": 9290 + }, + { + "epoch": 4.73, + "learning_rate": 3.668972488046762e-07, + "loss": 0.7927, + "step": 9300 + }, + { + "epoch": 4.73, + "learning_rate": 3.5339176446980424e-07, + "loss": 0.7027, + "step": 9310 + }, + { + "epoch": 4.74, + "learning_rate": 3.4013776265836293e-07, + "loss": 0.7591, + "step": 9320 + }, + { + "epoch": 4.74, + "learning_rate": 3.271353786080261e-07, + "loss": 0.8672, + "step": 9330 + }, + { + "epoch": 4.75, + "learning_rate": 3.143847449890658e-07, + "loss": 0.7764, + "step": 9340 + }, + { + "epoch": 4.75, + "learning_rate": 3.018859919030198e-07, + "loss": 0.7795, + "step": 9350 + }, + { + "epoch": 4.76, + "learning_rate": 2.896392468813458e-07, + "loss": 0.8484, + "step": 9360 + }, + { + "epoch": 4.76, + "learning_rate": 2.7764463488413327e-07, + "loss": 0.7502, + "step": 9370 + }, + { + "epoch": 4.77, + "learning_rate": 2.659022782988241e-07, + "loss": 0.7886, + "step": 9380 + }, + { + "epoch": 4.77, + "learning_rate": 2.5441229693895786e-07, + "loss": 0.7578, + "step": 9390 + }, + { + "epoch": 4.78, + "learning_rate": 2.431748080429619e-07, + "loss": 0.7947, + "step": 9400 + }, + { + "epoch": 4.78, + "learning_rate": 2.32189926272941e-07, + "loss": 0.7695, + "step": 9410 + }, + { + "epoch": 4.79, + "learning_rate": 2.2145776371352288e-07, + "loss": 0.8766, + "step": 9420 + }, + { + "epoch": 4.79, + "learning_rate": 2.109784298707007e-07, + "loss": 0.9572, + "step": 9430 + }, + { + "epoch": 4.8, + "learning_rate": 2.0075203167071733e-07, + "loss": 0.8072, + "step": 9440 + }, + { + "epoch": 4.8, + "learning_rate": 1.9077867345898282e-07, + "loss": 0.7861, + "step": 9450 + }, + { + "epoch": 4.81, + "learning_rate": 1.8105845699900592e-07, + "loss": 0.813, + "step": 9460 + }, + { + "epoch": 4.81, + "learning_rate": 1.7159148147135596e-07, + "loss": 0.714, + "step": 9470 + }, + { + "epoch": 4.82, + "learning_rate": 1.623778434726414e-07, + "loss": 0.8831, + "step": 9480 + }, + { + "epoch": 4.82, + "learning_rate": 1.5341763701453848e-07, + "loss": 0.7104, + "step": 9490 + }, + { + "epoch": 4.83, + "learning_rate": 1.4471095352282804e-07, + "loss": 0.7877, + "step": 9500 + }, + { + "epoch": 4.83, + "learning_rate": 1.362578818364546e-07, + "loss": 0.7484, + "step": 9510 + }, + { + "epoch": 4.84, + "learning_rate": 1.280585082066299e-07, + "loss": 0.6747, + "step": 9520 + }, + { + "epoch": 4.84, + "learning_rate": 1.2011291629594746e-07, + "loss": 0.7271, + "step": 9530 + }, + { + "epoch": 4.85, + "learning_rate": 1.1242118717753047e-07, + "loss": 0.8372, + "step": 9540 + }, + { + "epoch": 4.85, + "learning_rate": 1.0498339933420476e-07, + "loss": 0.8522, + "step": 9550 + }, + { + "epoch": 4.86, + "learning_rate": 9.779962865769654e-08, + "loss": 0.8283, + "step": 9560 + }, + { + "epoch": 4.86, + "learning_rate": 9.086994844786089e-08, + "loss": 0.7474, + "step": 9570 + }, + { + "epoch": 4.87, + "learning_rate": 8.419442941192679e-08, + "loss": 0.7889, + "step": 9580 + }, + { + "epoch": 4.87, + "learning_rate": 7.77731396637893e-08, + "loss": 0.7646, + "step": 9590 + }, + { + "epoch": 4.88, + "learning_rate": 7.160614472329907e-08, + "loss": 0.8362, + "step": 9600 + }, + { + "epoch": 4.88, + "learning_rate": 6.569350751560177e-08, + "loss": 0.7019, + "step": 9610 + }, + { + "epoch": 4.89, + "learning_rate": 6.003528837049966e-08, + "loss": 0.7858, + "step": 9620 + }, + { + "epoch": 4.89, + "learning_rate": 5.46315450218271e-08, + "loss": 0.7506, + "step": 9630 + }, + { + "epoch": 4.9, + "learning_rate": 4.9482332606867746e-08, + "loss": 0.7883, + "step": 9640 + }, + { + "epoch": 4.9, + "learning_rate": 4.458770366578824e-08, + "loss": 0.7803, + "step": 9650 + }, + { + "epoch": 4.91, + "learning_rate": 3.994770814110538e-08, + "loss": 0.772, + "step": 9660 + }, + { + "epoch": 4.91, + "learning_rate": 3.5562393377172595e-08, + "loss": 0.8655, + "step": 9670 + }, + { + "epoch": 4.92, + "learning_rate": 3.1431804119705366e-08, + "loss": 0.7657, + "step": 9680 + }, + { + "epoch": 4.92, + "learning_rate": 2.7555982515312107e-08, + "loss": 0.8572, + "step": 9690 + }, + { + "epoch": 4.93, + "learning_rate": 2.3934968111075095e-08, + "loss": 0.8006, + "step": 9700 + }, + { + "epoch": 4.93, + "learning_rate": 2.0568797854139678e-08, + "loss": 0.749, + "step": 9710 + }, + { + "epoch": 4.94, + "learning_rate": 1.745750609133956e-08, + "loss": 0.7909, + "step": 9720 + }, + { + "epoch": 4.94, + "learning_rate": 1.4601124568849878e-08, + "loss": 0.8854, + "step": 9730 + }, + { + "epoch": 4.95, + "learning_rate": 1.1999682431859672e-08, + "loss": 0.6958, + "step": 9740 + }, + { + "epoch": 4.95, + "learning_rate": 9.653206224272126e-09, + "loss": 0.7082, + "step": 9750 + }, + { + "epoch": 4.96, + "learning_rate": 7.561719888440899e-09, + "loss": 0.8519, + "step": 9760 + }, + { + "epoch": 4.97, + "learning_rate": 5.725244764917537e-09, + "loss": 0.7919, + "step": 9770 + }, + { + "epoch": 4.97, + "learning_rate": 4.143799592240538e-09, + "loss": 0.7442, + "step": 9780 + }, + { + "epoch": 4.98, + "learning_rate": 2.8174005067410637e-09, + "loss": 0.7768, + "step": 9790 + }, + { + "epoch": 4.98, + "learning_rate": 1.7460610423764011e-09, + "loss": 0.803, + "step": 9800 + }, + { + "epoch": 4.99, + "learning_rate": 9.297921305967405e-10, + "loss": 0.738, + "step": 9810 + }, + { + "epoch": 4.99, + "learning_rate": 3.686021002313744e-10, + "loss": 0.7374, + "step": 9820 + }, + { + "epoch": 5.0, + "learning_rate": 6.249667740265696e-11, + "loss": 0.7616, + "step": 9830 + }, + { + "epoch": 5.0, + "step": 9835, + "total_flos": 3.008290083981312e+18, + "train_loss": 0.8266432806266161, + "train_runtime": 62063.7747, + "train_samples_per_second": 2.536, + "train_steps_per_second": 0.158 + } + ], + "logging_steps": 10, + "max_steps": 9835, + "num_train_epochs": 5, + "save_steps": 1000, + "total_flos": 3.008290083981312e+18, + "trial_name": null, + "trial_params": null +}