{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.715047439818082, "global_step": 138500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 1.9934655897958653e-05, "loss": 2.1542, "step": 500 }, { "epoch": 0.02, "learning_rate": 1.9869311795917304e-05, "loss": 2.1432, "step": 1000 }, { "epoch": 0.03, "learning_rate": 1.980396769387595e-05, "loss": 2.1245, "step": 1500 }, { "epoch": 0.04, "learning_rate": 1.9738623591834603e-05, "loss": 2.1267, "step": 2000 }, { "epoch": 0.05, "learning_rate": 1.9673279489793254e-05, "loss": 2.1284, "step": 2500 }, { "epoch": 0.06, "learning_rate": 1.9607935387751905e-05, "loss": 2.1148, "step": 3000 }, { "epoch": 0.07, "learning_rate": 1.9542591285710553e-05, "loss": 2.1241, "step": 3500 }, { "epoch": 0.08, "learning_rate": 1.9477247183669204e-05, "loss": 2.1164, "step": 4000 }, { "epoch": 0.09, "learning_rate": 1.9411903081627855e-05, "loss": 2.0986, "step": 4500 }, { "epoch": 0.1, "learning_rate": 1.9346558979586506e-05, "loss": 2.1253, "step": 5000 }, { "epoch": 0.11, "learning_rate": 1.9281214877545154e-05, "loss": 2.1073, "step": 5500 }, { "epoch": 0.12, "learning_rate": 1.9215870775503805e-05, "loss": 2.1043, "step": 6000 }, { "epoch": 0.13, "learning_rate": 1.9150526673462456e-05, "loss": 2.0925, "step": 6500 }, { "epoch": 0.14, "learning_rate": 1.9085182571421107e-05, "loss": 2.0818, "step": 7000 }, { "epoch": 0.15, "learning_rate": 1.9019838469379755e-05, "loss": 2.0909, "step": 7500 }, { "epoch": 0.16, "learning_rate": 1.8954494367338406e-05, "loss": 2.0959, "step": 8000 }, { "epoch": 0.17, "learning_rate": 1.8889150265297057e-05, "loss": 2.081, "step": 8500 }, { "epoch": 0.18, "learning_rate": 1.8823806163255708e-05, "loss": 2.0949, "step": 9000 }, { "epoch": 0.19, "learning_rate": 1.8758462061214356e-05, "loss": 2.1156, "step": 9500 }, { "epoch": 0.2, "learning_rate": 1.8693117959173007e-05, "loss": 2.1004, "step": 10000 }, { "epoch": 0.21, "learning_rate": 1.8627773857131658e-05, "loss": 2.0927, "step": 10500 }, { "epoch": 0.22, "learning_rate": 1.856242975509031e-05, "loss": 2.0659, "step": 11000 }, { "epoch": 0.23, "learning_rate": 1.849708565304896e-05, "loss": 2.0836, "step": 11500 }, { "epoch": 0.24, "learning_rate": 1.8431741551007608e-05, "loss": 2.0682, "step": 12000 }, { "epoch": 0.25, "learning_rate": 1.836639744896626e-05, "loss": 2.0823, "step": 12500 }, { "epoch": 0.25, "learning_rate": 1.830105334692491e-05, "loss": 2.073, "step": 13000 }, { "epoch": 0.26, "learning_rate": 1.8235709244883558e-05, "loss": 2.0721, "step": 13500 }, { "epoch": 0.27, "learning_rate": 1.817036514284221e-05, "loss": 2.0648, "step": 14000 }, { "epoch": 0.28, "learning_rate": 1.810502104080086e-05, "loss": 2.0805, "step": 14500 }, { "epoch": 0.29, "learning_rate": 1.803967693875951e-05, "loss": 2.0868, "step": 15000 }, { "epoch": 0.3, "learning_rate": 1.797433283671816e-05, "loss": 2.0874, "step": 15500 }, { "epoch": 0.31, "learning_rate": 1.790898873467681e-05, "loss": 2.061, "step": 16000 }, { "epoch": 0.32, "learning_rate": 1.784364463263546e-05, "loss": 2.0772, "step": 16500 }, { "epoch": 0.33, "learning_rate": 1.7778300530594112e-05, "loss": 2.0781, "step": 17000 }, { "epoch": 0.34, "learning_rate": 1.771295642855276e-05, "loss": 2.0834, "step": 17500 }, { "epoch": 0.35, "learning_rate": 1.764761232651141e-05, "loss": 2.0503, "step": 18000 }, { "epoch": 0.36, "learning_rate": 1.7582268224470062e-05, "loss": 2.0431, "step": 18500 }, { "epoch": 0.37, "learning_rate": 1.751692412242871e-05, "loss": 2.066, "step": 19000 }, { "epoch": 0.38, "learning_rate": 1.745158002038736e-05, "loss": 2.0421, "step": 19500 }, { "epoch": 0.39, "learning_rate": 1.7386235918346012e-05, "loss": 2.0712, "step": 20000 }, { "epoch": 0.4, "learning_rate": 1.7320891816304663e-05, "loss": 2.0424, "step": 20500 }, { "epoch": 0.41, "learning_rate": 1.725554771426331e-05, "loss": 2.0361, "step": 21000 }, { "epoch": 0.42, "learning_rate": 1.7190203612221962e-05, "loss": 2.0444, "step": 21500 }, { "epoch": 0.43, "learning_rate": 1.7124859510180613e-05, "loss": 2.0548, "step": 22000 }, { "epoch": 0.44, "learning_rate": 1.705951540813926e-05, "loss": 2.0541, "step": 22500 }, { "epoch": 0.45, "learning_rate": 1.699417130609791e-05, "loss": 2.0404, "step": 23000 }, { "epoch": 0.46, "learning_rate": 1.6928827204056563e-05, "loss": 2.0484, "step": 23500 }, { "epoch": 0.47, "learning_rate": 1.6863483102015214e-05, "loss": 2.0469, "step": 24000 }, { "epoch": 0.48, "learning_rate": 1.679813899997386e-05, "loss": 2.0645, "step": 24500 }, { "epoch": 0.49, "learning_rate": 1.6732794897932513e-05, "loss": 2.0149, "step": 25000 }, { "epoch": 0.5, "learning_rate": 1.6667450795891164e-05, "loss": 2.0381, "step": 25500 }, { "epoch": 0.51, "learning_rate": 1.6602106693849815e-05, "loss": 2.0344, "step": 26000 }, { "epoch": 0.52, "learning_rate": 1.6536762591808463e-05, "loss": 2.0524, "step": 26500 }, { "epoch": 0.53, "learning_rate": 1.6471418489767114e-05, "loss": 2.0369, "step": 27000 }, { "epoch": 0.54, "learning_rate": 1.6406074387725765e-05, "loss": 2.0288, "step": 27500 }, { "epoch": 0.55, "learning_rate": 1.6340730285684416e-05, "loss": 2.0461, "step": 28000 }, { "epoch": 0.56, "learning_rate": 1.6275386183643064e-05, "loss": 2.0462, "step": 28500 }, { "epoch": 0.57, "learning_rate": 1.6210042081601715e-05, "loss": 2.0421, "step": 29000 }, { "epoch": 0.58, "learning_rate": 1.6144697979560366e-05, "loss": 2.0358, "step": 29500 }, { "epoch": 0.59, "learning_rate": 1.6079353877519017e-05, "loss": 2.028, "step": 30000 }, { "epoch": 0.6, "learning_rate": 1.6014009775477665e-05, "loss": 2.0281, "step": 30500 }, { "epoch": 0.61, "learning_rate": 1.5948665673436316e-05, "loss": 2.0444, "step": 31000 }, { "epoch": 0.62, "learning_rate": 1.5883321571394967e-05, "loss": 2.0144, "step": 31500 }, { "epoch": 0.63, "learning_rate": 1.5817977469353618e-05, "loss": 2.0389, "step": 32000 }, { "epoch": 0.64, "learning_rate": 1.5752633367312266e-05, "loss": 2.0288, "step": 32500 }, { "epoch": 0.65, "learning_rate": 1.5687289265270917e-05, "loss": 2.0196, "step": 33000 }, { "epoch": 0.66, "learning_rate": 1.5621945163229568e-05, "loss": 2.0079, "step": 33500 }, { "epoch": 0.67, "learning_rate": 1.555660106118822e-05, "loss": 2.0128, "step": 34000 }, { "epoch": 0.68, "learning_rate": 1.549125695914687e-05, "loss": 2.0139, "step": 34500 }, { "epoch": 0.69, "learning_rate": 1.5425912857105518e-05, "loss": 2.0123, "step": 35000 }, { "epoch": 0.7, "learning_rate": 1.536056875506417e-05, "loss": 2.0349, "step": 35500 }, { "epoch": 0.71, "learning_rate": 1.529522465302282e-05, "loss": 2.0122, "step": 36000 }, { "epoch": 0.72, "learning_rate": 1.5229880550981471e-05, "loss": 2.0399, "step": 36500 }, { "epoch": 0.73, "learning_rate": 1.5164536448940119e-05, "loss": 2.0599, "step": 37000 }, { "epoch": 0.74, "learning_rate": 1.509919234689877e-05, "loss": 2.0234, "step": 37500 }, { "epoch": 0.74, "learning_rate": 1.5033848244857421e-05, "loss": 2.0228, "step": 38000 }, { "epoch": 0.75, "learning_rate": 1.4968504142816072e-05, "loss": 2.0418, "step": 38500 }, { "epoch": 0.76, "learning_rate": 1.490316004077472e-05, "loss": 2.0168, "step": 39000 }, { "epoch": 0.77, "learning_rate": 1.4837815938733371e-05, "loss": 1.9802, "step": 39500 }, { "epoch": 0.78, "learning_rate": 1.4772471836692022e-05, "loss": 2.0072, "step": 40000 }, { "epoch": 0.79, "learning_rate": 1.4707127734650673e-05, "loss": 2.0049, "step": 40500 }, { "epoch": 0.8, "learning_rate": 1.464178363260932e-05, "loss": 2.0096, "step": 41000 }, { "epoch": 0.81, "learning_rate": 1.4576439530567972e-05, "loss": 2.0139, "step": 41500 }, { "epoch": 0.82, "learning_rate": 1.4511095428526623e-05, "loss": 2.0151, "step": 42000 }, { "epoch": 0.83, "learning_rate": 1.4445751326485274e-05, "loss": 2.0238, "step": 42500 }, { "epoch": 0.84, "learning_rate": 1.4380407224443922e-05, "loss": 1.9868, "step": 43000 }, { "epoch": 0.85, "learning_rate": 1.4315063122402573e-05, "loss": 1.9997, "step": 43500 }, { "epoch": 0.86, "learning_rate": 1.4249719020361224e-05, "loss": 2.0127, "step": 44000 }, { "epoch": 0.87, "learning_rate": 1.4184374918319873e-05, "loss": 2.0157, "step": 44500 }, { "epoch": 0.88, "learning_rate": 1.4119030816278523e-05, "loss": 2.0254, "step": 45000 }, { "epoch": 0.89, "learning_rate": 1.4053686714237174e-05, "loss": 1.9978, "step": 45500 }, { "epoch": 0.9, "learning_rate": 1.3988342612195825e-05, "loss": 2.005, "step": 46000 }, { "epoch": 0.91, "learning_rate": 1.3922998510154474e-05, "loss": 2.0188, "step": 46500 }, { "epoch": 0.92, "learning_rate": 1.3857654408113126e-05, "loss": 2.0277, "step": 47000 }, { "epoch": 0.93, "learning_rate": 1.3792310306071775e-05, "loss": 2.0019, "step": 47500 }, { "epoch": 0.94, "learning_rate": 1.3726966204030424e-05, "loss": 2.018, "step": 48000 }, { "epoch": 0.95, "learning_rate": 1.3661622101989076e-05, "loss": 2.0108, "step": 48500 }, { "epoch": 0.96, "learning_rate": 1.3596277999947727e-05, "loss": 2.0228, "step": 49000 }, { "epoch": 0.97, "learning_rate": 1.3530933897906376e-05, "loss": 2.0157, "step": 49500 }, { "epoch": 0.98, "learning_rate": 1.3465589795865025e-05, "loss": 1.9915, "step": 50000 }, { "epoch": 0.99, "learning_rate": 1.3400245693823677e-05, "loss": 1.998, "step": 50500 }, { "epoch": 1.0, "learning_rate": 1.3334901591782328e-05, "loss": 2.0098, "step": 51000 }, { "epoch": 1.0, "eval_gen_len": 18.7755, "eval_loss": 1.8032176494598389, "eval_rouge1": 34.7948, "eval_rouge2": 13.0415, "eval_rougeL": 28.2085, "eval_rougeLsum": 28.2037, "eval_runtime": 1582.0831, "eval_samples_per_second": 7.163, "eval_steps_per_second": 1.791, "step": 51012 }, { "epoch": 1.01, "learning_rate": 1.3269557489740977e-05, "loss": 1.9754, "step": 51500 }, { "epoch": 1.02, "learning_rate": 1.3204213387699626e-05, "loss": 1.9726, "step": 52000 }, { "epoch": 1.03, "learning_rate": 1.3138869285658278e-05, "loss": 1.9691, "step": 52500 }, { "epoch": 1.04, "learning_rate": 1.3073525183616929e-05, "loss": 1.9775, "step": 53000 }, { "epoch": 1.05, "learning_rate": 1.3008181081575576e-05, "loss": 1.9516, "step": 53500 }, { "epoch": 1.06, "learning_rate": 1.2942836979534227e-05, "loss": 1.9323, "step": 54000 }, { "epoch": 1.07, "learning_rate": 1.2877492877492879e-05, "loss": 1.9761, "step": 54500 }, { "epoch": 1.08, "learning_rate": 1.281214877545153e-05, "loss": 1.9441, "step": 55000 }, { "epoch": 1.09, "learning_rate": 1.2746804673410177e-05, "loss": 1.9574, "step": 55500 }, { "epoch": 1.1, "learning_rate": 1.2681460571368828e-05, "loss": 1.9569, "step": 56000 }, { "epoch": 1.11, "learning_rate": 1.261611646932748e-05, "loss": 1.9697, "step": 56500 }, { "epoch": 1.12, "learning_rate": 1.255077236728613e-05, "loss": 1.9628, "step": 57000 }, { "epoch": 1.13, "learning_rate": 1.2485428265244782e-05, "loss": 1.9505, "step": 57500 }, { "epoch": 1.14, "learning_rate": 1.242008416320343e-05, "loss": 1.9773, "step": 58000 }, { "epoch": 1.15, "learning_rate": 1.235474006116208e-05, "loss": 1.9245, "step": 58500 }, { "epoch": 1.16, "learning_rate": 1.2289395959120732e-05, "loss": 1.9719, "step": 59000 }, { "epoch": 1.17, "learning_rate": 1.2224051857079383e-05, "loss": 1.9322, "step": 59500 }, { "epoch": 1.18, "learning_rate": 1.215870775503803e-05, "loss": 1.97, "step": 60000 }, { "epoch": 1.19, "learning_rate": 1.2093363652996682e-05, "loss": 1.9543, "step": 60500 }, { "epoch": 1.2, "learning_rate": 1.2028019550955333e-05, "loss": 1.9503, "step": 61000 }, { "epoch": 1.21, "learning_rate": 1.1962675448913982e-05, "loss": 1.9534, "step": 61500 }, { "epoch": 1.22, "learning_rate": 1.1897331346872632e-05, "loss": 1.9626, "step": 62000 }, { "epoch": 1.23, "learning_rate": 1.1831987244831283e-05, "loss": 1.9583, "step": 62500 }, { "epoch": 1.24, "learning_rate": 1.1766643142789934e-05, "loss": 1.9495, "step": 63000 }, { "epoch": 1.24, "learning_rate": 1.1701299040748583e-05, "loss": 1.9411, "step": 63500 }, { "epoch": 1.25, "learning_rate": 1.1635954938707233e-05, "loss": 1.9631, "step": 64000 }, { "epoch": 1.26, "learning_rate": 1.1570610836665884e-05, "loss": 1.9241, "step": 64500 }, { "epoch": 1.27, "learning_rate": 1.1505266734624533e-05, "loss": 1.9573, "step": 65000 }, { "epoch": 1.28, "learning_rate": 1.1439922632583184e-05, "loss": 1.9407, "step": 65500 }, { "epoch": 1.29, "learning_rate": 1.1374578530541834e-05, "loss": 1.9618, "step": 66000 }, { "epoch": 1.3, "learning_rate": 1.1309234428500485e-05, "loss": 1.9594, "step": 66500 }, { "epoch": 1.31, "learning_rate": 1.1243890326459134e-05, "loss": 1.9566, "step": 67000 }, { "epoch": 1.32, "learning_rate": 1.1178546224417785e-05, "loss": 1.9457, "step": 67500 }, { "epoch": 1.33, "learning_rate": 1.1113202122376435e-05, "loss": 1.9306, "step": 68000 }, { "epoch": 1.34, "learning_rate": 1.1047858020335086e-05, "loss": 1.9522, "step": 68500 }, { "epoch": 1.35, "learning_rate": 1.0982513918293735e-05, "loss": 1.9488, "step": 69000 }, { "epoch": 1.36, "learning_rate": 1.0917169816252386e-05, "loss": 1.9522, "step": 69500 }, { "epoch": 1.37, "learning_rate": 1.0851825714211037e-05, "loss": 1.98, "step": 70000 }, { "epoch": 1.38, "learning_rate": 1.0786481612169685e-05, "loss": 1.9431, "step": 70500 }, { "epoch": 1.39, "learning_rate": 1.0721137510128336e-05, "loss": 1.9281, "step": 71000 }, { "epoch": 1.4, "learning_rate": 1.0655793408086987e-05, "loss": 1.9539, "step": 71500 }, { "epoch": 1.41, "learning_rate": 1.0590449306045638e-05, "loss": 1.9509, "step": 72000 }, { "epoch": 1.42, "learning_rate": 1.0525105204004286e-05, "loss": 1.9401, "step": 72500 }, { "epoch": 1.43, "learning_rate": 1.0459761101962937e-05, "loss": 1.9543, "step": 73000 }, { "epoch": 1.44, "learning_rate": 1.0394416999921588e-05, "loss": 1.9553, "step": 73500 }, { "epoch": 1.45, "learning_rate": 1.032907289788024e-05, "loss": 1.9253, "step": 74000 }, { "epoch": 1.46, "learning_rate": 1.0263728795838887e-05, "loss": 1.9452, "step": 74500 }, { "epoch": 1.47, "learning_rate": 1.0198384693797538e-05, "loss": 1.9564, "step": 75000 }, { "epoch": 1.48, "learning_rate": 1.013304059175619e-05, "loss": 1.9471, "step": 75500 }, { "epoch": 1.49, "learning_rate": 1.006769648971484e-05, "loss": 1.9489, "step": 76000 }, { "epoch": 1.5, "learning_rate": 1.0002352387673488e-05, "loss": 1.9376, "step": 76500 }, { "epoch": 1.51, "learning_rate": 9.93700828563214e-06, "loss": 1.9455, "step": 77000 }, { "epoch": 1.52, "learning_rate": 9.87166418359079e-06, "loss": 1.9483, "step": 77500 }, { "epoch": 1.53, "learning_rate": 9.80632008154944e-06, "loss": 1.9491, "step": 78000 }, { "epoch": 1.54, "learning_rate": 9.74097597950809e-06, "loss": 1.9437, "step": 78500 }, { "epoch": 1.55, "learning_rate": 9.67563187746674e-06, "loss": 1.9628, "step": 79000 }, { "epoch": 1.56, "learning_rate": 9.610287775425391e-06, "loss": 1.9353, "step": 79500 }, { "epoch": 1.57, "learning_rate": 9.54494367338404e-06, "loss": 1.931, "step": 80000 }, { "epoch": 1.58, "learning_rate": 9.479599571342692e-06, "loss": 1.9284, "step": 80500 }, { "epoch": 1.59, "learning_rate": 9.414255469301343e-06, "loss": 1.9464, "step": 81000 }, { "epoch": 1.6, "learning_rate": 9.348911367259992e-06, "loss": 1.9514, "step": 81500 }, { "epoch": 1.61, "learning_rate": 9.283567265218643e-06, "loss": 1.949, "step": 82000 }, { "epoch": 1.62, "learning_rate": 9.218223163177293e-06, "loss": 1.9372, "step": 82500 }, { "epoch": 1.63, "learning_rate": 9.152879061135942e-06, "loss": 1.9309, "step": 83000 }, { "epoch": 1.64, "learning_rate": 9.087534959094593e-06, "loss": 1.9237, "step": 83500 }, { "epoch": 1.65, "learning_rate": 9.022190857053243e-06, "loss": 1.9335, "step": 84000 }, { "epoch": 1.66, "learning_rate": 8.956846755011894e-06, "loss": 1.9301, "step": 84500 }, { "epoch": 1.67, "learning_rate": 8.891502652970543e-06, "loss": 1.926, "step": 85000 }, { "epoch": 1.68, "learning_rate": 8.826158550929194e-06, "loss": 1.9587, "step": 85500 }, { "epoch": 1.69, "learning_rate": 8.760814448887844e-06, "loss": 1.9266, "step": 86000 }, { "epoch": 1.7, "learning_rate": 8.695470346846495e-06, "loss": 1.9447, "step": 86500 }, { "epoch": 1.71, "learning_rate": 8.630126244805144e-06, "loss": 1.9143, "step": 87000 }, { "epoch": 1.72, "learning_rate": 8.564782142763794e-06, "loss": 1.9238, "step": 87500 }, { "epoch": 1.73, "learning_rate": 8.499438040722445e-06, "loss": 1.9292, "step": 88000 }, { "epoch": 1.73, "learning_rate": 8.434093938681094e-06, "loss": 1.9096, "step": 88500 }, { "epoch": 1.74, "learning_rate": 8.368749836639745e-06, "loss": 1.918, "step": 89000 }, { "epoch": 1.75, "learning_rate": 8.303405734598395e-06, "loss": 1.933, "step": 89500 }, { "epoch": 1.76, "learning_rate": 8.238061632557046e-06, "loss": 1.9313, "step": 90000 }, { "epoch": 1.77, "learning_rate": 8.172717530515695e-06, "loss": 1.9572, "step": 90500 }, { "epoch": 1.78, "learning_rate": 8.107373428474346e-06, "loss": 1.9296, "step": 91000 }, { "epoch": 1.79, "learning_rate": 8.042029326432996e-06, "loss": 1.922, "step": 91500 }, { "epoch": 1.8, "learning_rate": 7.976685224391647e-06, "loss": 1.9346, "step": 92000 }, { "epoch": 1.81, "learning_rate": 7.911341122350298e-06, "loss": 1.9138, "step": 92500 }, { "epoch": 1.82, "learning_rate": 7.845997020308947e-06, "loss": 1.9432, "step": 93000 }, { "epoch": 1.83, "learning_rate": 7.780652918267598e-06, "loss": 1.9335, "step": 93500 }, { "epoch": 1.84, "learning_rate": 7.715308816226248e-06, "loss": 1.9463, "step": 94000 }, { "epoch": 1.85, "learning_rate": 7.649964714184899e-06, "loss": 1.9164, "step": 94500 }, { "epoch": 1.86, "learning_rate": 7.584620612143548e-06, "loss": 1.9452, "step": 95000 }, { "epoch": 1.87, "learning_rate": 7.5192765101021995e-06, "loss": 1.947, "step": 95500 }, { "epoch": 1.88, "learning_rate": 7.453932408060849e-06, "loss": 1.9233, "step": 96000 }, { "epoch": 1.89, "learning_rate": 7.3885883060195e-06, "loss": 1.9377, "step": 96500 }, { "epoch": 1.9, "learning_rate": 7.323244203978149e-06, "loss": 1.9302, "step": 97000 }, { "epoch": 1.91, "learning_rate": 7.2579001019368e-06, "loss": 1.9431, "step": 97500 }, { "epoch": 1.92, "learning_rate": 7.19255599989545e-06, "loss": 1.9398, "step": 98000 }, { "epoch": 1.93, "learning_rate": 7.1272118978541e-06, "loss": 1.939, "step": 98500 }, { "epoch": 1.94, "learning_rate": 7.06186779581275e-06, "loss": 1.9193, "step": 99000 }, { "epoch": 1.95, "learning_rate": 6.996523693771401e-06, "loss": 1.9228, "step": 99500 }, { "epoch": 1.96, "learning_rate": 6.931179591730051e-06, "loss": 1.9107, "step": 100000 }, { "epoch": 1.97, "learning_rate": 6.865835489688701e-06, "loss": 1.9554, "step": 100500 }, { "epoch": 1.98, "learning_rate": 6.8004913876473514e-06, "loss": 1.9398, "step": 101000 }, { "epoch": 1.99, "learning_rate": 6.735147285606002e-06, "loss": 1.9089, "step": 101500 }, { "epoch": 2.0, "learning_rate": 6.669803183564651e-06, "loss": 1.9315, "step": 102000 }, { "epoch": 2.0, "eval_gen_len": 18.7867, "eval_loss": 1.7752913236618042, "eval_rouge1": 35.1217, "eval_rouge2": 13.5043, "eval_rougeL": 28.6409, "eval_rougeLsum": 28.6441, "eval_runtime": 1535.0703, "eval_samples_per_second": 7.382, "eval_steps_per_second": 1.846, "step": 102024 }, { "epoch": 2.01, "learning_rate": 6.604459081523302e-06, "loss": 1.8969, "step": 102500 }, { "epoch": 2.02, "learning_rate": 6.539114979481952e-06, "loss": 1.8972, "step": 103000 }, { "epoch": 2.03, "learning_rate": 6.473770877440603e-06, "loss": 1.9123, "step": 103500 }, { "epoch": 2.04, "learning_rate": 6.408426775399254e-06, "loss": 1.8942, "step": 104000 }, { "epoch": 2.05, "learning_rate": 6.343082673357903e-06, "loss": 1.8978, "step": 104500 }, { "epoch": 2.06, "learning_rate": 6.277738571316554e-06, "loss": 1.922, "step": 105000 }, { "epoch": 2.07, "learning_rate": 6.212394469275204e-06, "loss": 1.9113, "step": 105500 }, { "epoch": 2.08, "learning_rate": 6.147050367233854e-06, "loss": 1.8857, "step": 106000 }, { "epoch": 2.09, "learning_rate": 6.081706265192504e-06, "loss": 1.9106, "step": 106500 }, { "epoch": 2.1, "learning_rate": 6.0163621631511545e-06, "loss": 1.9159, "step": 107000 }, { "epoch": 2.11, "learning_rate": 5.951018061109805e-06, "loss": 1.8901, "step": 107500 }, { "epoch": 2.12, "learning_rate": 5.885673959068455e-06, "loss": 1.8963, "step": 108000 }, { "epoch": 2.13, "learning_rate": 5.820329857027105e-06, "loss": 1.8897, "step": 108500 }, { "epoch": 2.14, "learning_rate": 5.7549857549857555e-06, "loss": 1.8902, "step": 109000 }, { "epoch": 2.15, "learning_rate": 5.689641652944406e-06, "loss": 1.8956, "step": 109500 }, { "epoch": 2.16, "learning_rate": 5.624297550903056e-06, "loss": 1.8823, "step": 110000 }, { "epoch": 2.17, "learning_rate": 5.558953448861706e-06, "loss": 1.9129, "step": 110500 }, { "epoch": 2.18, "learning_rate": 5.4936093468203565e-06, "loss": 1.8867, "step": 111000 }, { "epoch": 2.19, "learning_rate": 5.428265244779006e-06, "loss": 1.9093, "step": 111500 }, { "epoch": 2.2, "learning_rate": 5.362921142737657e-06, "loss": 1.9047, "step": 112000 }, { "epoch": 2.21, "learning_rate": 5.2975770406963064e-06, "loss": 1.9123, "step": 112500 }, { "epoch": 2.22, "learning_rate": 5.2322329386549575e-06, "loss": 1.8867, "step": 113000 }, { "epoch": 2.22, "learning_rate": 5.166888836613607e-06, "loss": 1.9057, "step": 113500 }, { "epoch": 2.23, "learning_rate": 5.101544734572258e-06, "loss": 1.9101, "step": 114000 }, { "epoch": 2.24, "learning_rate": 5.0362006325309075e-06, "loss": 1.9098, "step": 114500 }, { "epoch": 2.25, "learning_rate": 4.9708565304895586e-06, "loss": 1.9102, "step": 115000 }, { "epoch": 2.26, "learning_rate": 4.905512428448209e-06, "loss": 1.8988, "step": 115500 }, { "epoch": 2.27, "learning_rate": 4.840168326406859e-06, "loss": 1.9089, "step": 116000 }, { "epoch": 2.28, "learning_rate": 4.774824224365509e-06, "loss": 1.8994, "step": 116500 }, { "epoch": 2.29, "learning_rate": 4.70948012232416e-06, "loss": 1.892, "step": 117000 }, { "epoch": 2.3, "learning_rate": 4.64413602028281e-06, "loss": 1.8783, "step": 117500 }, { "epoch": 2.31, "learning_rate": 4.57879191824146e-06, "loss": 1.8987, "step": 118000 }, { "epoch": 2.32, "learning_rate": 4.51344781620011e-06, "loss": 1.8995, "step": 118500 }, { "epoch": 2.33, "learning_rate": 4.448103714158761e-06, "loss": 1.9008, "step": 119000 }, { "epoch": 2.34, "learning_rate": 4.38275961211741e-06, "loss": 1.9234, "step": 119500 }, { "epoch": 2.35, "learning_rate": 4.31741551007606e-06, "loss": 1.902, "step": 120000 }, { "epoch": 2.36, "learning_rate": 4.252071408034711e-06, "loss": 1.893, "step": 120500 }, { "epoch": 2.37, "learning_rate": 4.186727305993362e-06, "loss": 1.9127, "step": 121000 }, { "epoch": 2.38, "learning_rate": 4.121383203952012e-06, "loss": 1.9057, "step": 121500 }, { "epoch": 2.39, "learning_rate": 4.056039101910662e-06, "loss": 1.8866, "step": 122000 }, { "epoch": 2.4, "learning_rate": 3.990694999869312e-06, "loss": 1.8934, "step": 122500 }, { "epoch": 2.41, "learning_rate": 3.925350897827963e-06, "loss": 1.8902, "step": 123000 }, { "epoch": 2.42, "learning_rate": 3.860006795786613e-06, "loss": 1.9046, "step": 123500 }, { "epoch": 2.43, "learning_rate": 3.794662693745263e-06, "loss": 1.9003, "step": 124000 }, { "epoch": 2.44, "learning_rate": 3.729318591703913e-06, "loss": 1.8954, "step": 124500 }, { "epoch": 2.45, "learning_rate": 3.6639744896625632e-06, "loss": 1.8877, "step": 125000 }, { "epoch": 2.46, "learning_rate": 3.5986303876212135e-06, "loss": 1.8993, "step": 125500 }, { "epoch": 2.47, "learning_rate": 3.5332862855798637e-06, "loss": 1.9162, "step": 126000 }, { "epoch": 2.48, "learning_rate": 3.467942183538514e-06, "loss": 1.8953, "step": 126500 }, { "epoch": 2.49, "learning_rate": 3.4025980814971643e-06, "loss": 1.9295, "step": 127000 }, { "epoch": 2.5, "learning_rate": 3.3372539794558145e-06, "loss": 1.9046, "step": 127500 }, { "epoch": 2.51, "learning_rate": 3.2719098774144648e-06, "loss": 1.8862, "step": 128000 }, { "epoch": 2.52, "learning_rate": 3.206565775373115e-06, "loss": 1.8719, "step": 128500 }, { "epoch": 2.53, "learning_rate": 3.1412216733317653e-06, "loss": 1.8713, "step": 129000 }, { "epoch": 2.54, "learning_rate": 3.0758775712904155e-06, "loss": 1.8959, "step": 129500 }, { "epoch": 2.55, "learning_rate": 3.0105334692490658e-06, "loss": 1.8966, "step": 130000 }, { "epoch": 2.56, "learning_rate": 2.945189367207716e-06, "loss": 1.8493, "step": 130500 }, { "epoch": 2.57, "learning_rate": 2.8798452651663663e-06, "loss": 1.8897, "step": 131000 }, { "epoch": 2.58, "learning_rate": 2.814501163125016e-06, "loss": 1.8878, "step": 131500 }, { "epoch": 2.59, "learning_rate": 2.7491570610836672e-06, "loss": 1.8951, "step": 132000 }, { "epoch": 2.6, "learning_rate": 2.6838129590423175e-06, "loss": 1.8869, "step": 132500 }, { "epoch": 2.61, "learning_rate": 2.6184688570009677e-06, "loss": 1.8904, "step": 133000 }, { "epoch": 2.62, "learning_rate": 2.5531247549596176e-06, "loss": 1.8882, "step": 133500 }, { "epoch": 2.63, "learning_rate": 2.487780652918268e-06, "loss": 1.9147, "step": 134000 }, { "epoch": 2.64, "learning_rate": 2.422436550876918e-06, "loss": 1.8836, "step": 134500 }, { "epoch": 2.65, "learning_rate": 2.3570924488355683e-06, "loss": 1.8834, "step": 135000 }, { "epoch": 2.66, "learning_rate": 2.2917483467942186e-06, "loss": 1.9004, "step": 135500 }, { "epoch": 2.67, "learning_rate": 2.226404244752869e-06, "loss": 1.8874, "step": 136000 }, { "epoch": 2.68, "learning_rate": 2.161060142711519e-06, "loss": 1.9072, "step": 136500 }, { "epoch": 2.69, "learning_rate": 2.0957160406701694e-06, "loss": 1.8991, "step": 137000 }, { "epoch": 2.7, "learning_rate": 2.0303719386288196e-06, "loss": 1.8925, "step": 137500 }, { "epoch": 2.71, "learning_rate": 1.96502783658747e-06, "loss": 1.8771, "step": 138000 }, { "epoch": 2.72, "learning_rate": 1.89968373454612e-06, "loss": 1.9066, "step": 138500 } ], "max_steps": 153036, "num_train_epochs": 3, "total_flos": 3.687242740119245e+17, "trial_name": null, "trial_params": null }