|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.715047439818082, |
|
"global_step": 138500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9934655897958653e-05, |
|
"loss": 2.1542, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9869311795917304e-05, |
|
"loss": 2.1432, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.980396769387595e-05, |
|
"loss": 2.1245, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9738623591834603e-05, |
|
"loss": 2.1267, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9673279489793254e-05, |
|
"loss": 2.1284, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9607935387751905e-05, |
|
"loss": 2.1148, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9542591285710553e-05, |
|
"loss": 2.1241, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9477247183669204e-05, |
|
"loss": 2.1164, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9411903081627855e-05, |
|
"loss": 2.0986, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9346558979586506e-05, |
|
"loss": 2.1253, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9281214877545154e-05, |
|
"loss": 2.1073, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9215870775503805e-05, |
|
"loss": 2.1043, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9150526673462456e-05, |
|
"loss": 2.0925, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9085182571421107e-05, |
|
"loss": 2.0818, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9019838469379755e-05, |
|
"loss": 2.0909, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.8954494367338406e-05, |
|
"loss": 2.0959, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8889150265297057e-05, |
|
"loss": 2.081, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8823806163255708e-05, |
|
"loss": 2.0949, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8758462061214356e-05, |
|
"loss": 2.1156, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8693117959173007e-05, |
|
"loss": 2.1004, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8627773857131658e-05, |
|
"loss": 2.0927, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.856242975509031e-05, |
|
"loss": 2.0659, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.849708565304896e-05, |
|
"loss": 2.0836, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.8431741551007608e-05, |
|
"loss": 2.0682, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.836639744896626e-05, |
|
"loss": 2.0823, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.830105334692491e-05, |
|
"loss": 2.073, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.8235709244883558e-05, |
|
"loss": 2.0721, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.817036514284221e-05, |
|
"loss": 2.0648, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.810502104080086e-05, |
|
"loss": 2.0805, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.803967693875951e-05, |
|
"loss": 2.0868, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.797433283671816e-05, |
|
"loss": 2.0874, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.790898873467681e-05, |
|
"loss": 2.061, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.784364463263546e-05, |
|
"loss": 2.0772, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.7778300530594112e-05, |
|
"loss": 2.0781, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.771295642855276e-05, |
|
"loss": 2.0834, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.764761232651141e-05, |
|
"loss": 2.0503, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.7582268224470062e-05, |
|
"loss": 2.0431, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.751692412242871e-05, |
|
"loss": 2.066, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.745158002038736e-05, |
|
"loss": 2.0421, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.7386235918346012e-05, |
|
"loss": 2.0712, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.7320891816304663e-05, |
|
"loss": 2.0424, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.725554771426331e-05, |
|
"loss": 2.0361, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7190203612221962e-05, |
|
"loss": 2.0444, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.7124859510180613e-05, |
|
"loss": 2.0548, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.705951540813926e-05, |
|
"loss": 2.0541, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.699417130609791e-05, |
|
"loss": 2.0404, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6928827204056563e-05, |
|
"loss": 2.0484, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.6863483102015214e-05, |
|
"loss": 2.0469, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.679813899997386e-05, |
|
"loss": 2.0645, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6732794897932513e-05, |
|
"loss": 2.0149, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.6667450795891164e-05, |
|
"loss": 2.0381, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6602106693849815e-05, |
|
"loss": 2.0344, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.6536762591808463e-05, |
|
"loss": 2.0524, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.6471418489767114e-05, |
|
"loss": 2.0369, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.6406074387725765e-05, |
|
"loss": 2.0288, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.6340730285684416e-05, |
|
"loss": 2.0461, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.6275386183643064e-05, |
|
"loss": 2.0462, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6210042081601715e-05, |
|
"loss": 2.0421, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.6144697979560366e-05, |
|
"loss": 2.0358, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.6079353877519017e-05, |
|
"loss": 2.028, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.6014009775477665e-05, |
|
"loss": 2.0281, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.5948665673436316e-05, |
|
"loss": 2.0444, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.5883321571394967e-05, |
|
"loss": 2.0144, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.5817977469353618e-05, |
|
"loss": 2.0389, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.5752633367312266e-05, |
|
"loss": 2.0288, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5687289265270917e-05, |
|
"loss": 2.0196, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5621945163229568e-05, |
|
"loss": 2.0079, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.555660106118822e-05, |
|
"loss": 2.0128, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.549125695914687e-05, |
|
"loss": 2.0139, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5425912857105518e-05, |
|
"loss": 2.0123, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.536056875506417e-05, |
|
"loss": 2.0349, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.529522465302282e-05, |
|
"loss": 2.0122, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5229880550981471e-05, |
|
"loss": 2.0399, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5164536448940119e-05, |
|
"loss": 2.0599, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.509919234689877e-05, |
|
"loss": 2.0234, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5033848244857421e-05, |
|
"loss": 2.0228, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4968504142816072e-05, |
|
"loss": 2.0418, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.490316004077472e-05, |
|
"loss": 2.0168, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4837815938733371e-05, |
|
"loss": 1.9802, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4772471836692022e-05, |
|
"loss": 2.0072, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.4707127734650673e-05, |
|
"loss": 2.0049, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.464178363260932e-05, |
|
"loss": 2.0096, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.4576439530567972e-05, |
|
"loss": 2.0139, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.4511095428526623e-05, |
|
"loss": 2.0151, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4445751326485274e-05, |
|
"loss": 2.0238, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.4380407224443922e-05, |
|
"loss": 1.9868, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4315063122402573e-05, |
|
"loss": 1.9997, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4249719020361224e-05, |
|
"loss": 2.0127, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.4184374918319873e-05, |
|
"loss": 2.0157, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4119030816278523e-05, |
|
"loss": 2.0254, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4053686714237174e-05, |
|
"loss": 1.9978, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3988342612195825e-05, |
|
"loss": 2.005, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3922998510154474e-05, |
|
"loss": 2.0188, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3857654408113126e-05, |
|
"loss": 2.0277, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3792310306071775e-05, |
|
"loss": 2.0019, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3726966204030424e-05, |
|
"loss": 2.018, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3661622101989076e-05, |
|
"loss": 2.0108, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.3596277999947727e-05, |
|
"loss": 2.0228, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3530933897906376e-05, |
|
"loss": 2.0157, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3465589795865025e-05, |
|
"loss": 1.9915, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3400245693823677e-05, |
|
"loss": 1.998, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3334901591782328e-05, |
|
"loss": 2.0098, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 18.7755, |
|
"eval_loss": 1.8032176494598389, |
|
"eval_rouge1": 34.7948, |
|
"eval_rouge2": 13.0415, |
|
"eval_rougeL": 28.2085, |
|
"eval_rougeLsum": 28.2037, |
|
"eval_runtime": 1582.0831, |
|
"eval_samples_per_second": 7.163, |
|
"eval_steps_per_second": 1.791, |
|
"step": 51012 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.3269557489740977e-05, |
|
"loss": 1.9754, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.3204213387699626e-05, |
|
"loss": 1.9726, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.3138869285658278e-05, |
|
"loss": 1.9691, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.3073525183616929e-05, |
|
"loss": 1.9775, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.3008181081575576e-05, |
|
"loss": 1.9516, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.2942836979534227e-05, |
|
"loss": 1.9323, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.2877492877492879e-05, |
|
"loss": 1.9761, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.281214877545153e-05, |
|
"loss": 1.9441, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.2746804673410177e-05, |
|
"loss": 1.9574, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.2681460571368828e-05, |
|
"loss": 1.9569, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.261611646932748e-05, |
|
"loss": 1.9697, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.255077236728613e-05, |
|
"loss": 1.9628, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.2485428265244782e-05, |
|
"loss": 1.9505, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.242008416320343e-05, |
|
"loss": 1.9773, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.235474006116208e-05, |
|
"loss": 1.9245, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2289395959120732e-05, |
|
"loss": 1.9719, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.2224051857079383e-05, |
|
"loss": 1.9322, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.215870775503803e-05, |
|
"loss": 1.97, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.2093363652996682e-05, |
|
"loss": 1.9543, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.2028019550955333e-05, |
|
"loss": 1.9503, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.1962675448913982e-05, |
|
"loss": 1.9534, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.1897331346872632e-05, |
|
"loss": 1.9626, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.1831987244831283e-05, |
|
"loss": 1.9583, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.1766643142789934e-05, |
|
"loss": 1.9495, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.1701299040748583e-05, |
|
"loss": 1.9411, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 1.1635954938707233e-05, |
|
"loss": 1.9631, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.1570610836665884e-05, |
|
"loss": 1.9241, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.1505266734624533e-05, |
|
"loss": 1.9573, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.1439922632583184e-05, |
|
"loss": 1.9407, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1374578530541834e-05, |
|
"loss": 1.9618, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.1309234428500485e-05, |
|
"loss": 1.9594, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.1243890326459134e-05, |
|
"loss": 1.9566, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.1178546224417785e-05, |
|
"loss": 1.9457, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.1113202122376435e-05, |
|
"loss": 1.9306, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.1047858020335086e-05, |
|
"loss": 1.9522, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.0982513918293735e-05, |
|
"loss": 1.9488, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.0917169816252386e-05, |
|
"loss": 1.9522, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.0851825714211037e-05, |
|
"loss": 1.98, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.0786481612169685e-05, |
|
"loss": 1.9431, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.0721137510128336e-05, |
|
"loss": 1.9281, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.0655793408086987e-05, |
|
"loss": 1.9539, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.0590449306045638e-05, |
|
"loss": 1.9509, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.0525105204004286e-05, |
|
"loss": 1.9401, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.0459761101962937e-05, |
|
"loss": 1.9543, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.0394416999921588e-05, |
|
"loss": 1.9553, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.032907289788024e-05, |
|
"loss": 1.9253, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0263728795838887e-05, |
|
"loss": 1.9452, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0198384693797538e-05, |
|
"loss": 1.9564, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.013304059175619e-05, |
|
"loss": 1.9471, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.006769648971484e-05, |
|
"loss": 1.9489, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.0002352387673488e-05, |
|
"loss": 1.9376, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 9.93700828563214e-06, |
|
"loss": 1.9455, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.87166418359079e-06, |
|
"loss": 1.9483, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.80632008154944e-06, |
|
"loss": 1.9491, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.74097597950809e-06, |
|
"loss": 1.9437, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.67563187746674e-06, |
|
"loss": 1.9628, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.610287775425391e-06, |
|
"loss": 1.9353, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 9.54494367338404e-06, |
|
"loss": 1.931, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.479599571342692e-06, |
|
"loss": 1.9284, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.414255469301343e-06, |
|
"loss": 1.9464, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.348911367259992e-06, |
|
"loss": 1.9514, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.283567265218643e-06, |
|
"loss": 1.949, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.218223163177293e-06, |
|
"loss": 1.9372, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.152879061135942e-06, |
|
"loss": 1.9309, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.087534959094593e-06, |
|
"loss": 1.9237, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.022190857053243e-06, |
|
"loss": 1.9335, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.956846755011894e-06, |
|
"loss": 1.9301, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 8.891502652970543e-06, |
|
"loss": 1.926, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.826158550929194e-06, |
|
"loss": 1.9587, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.760814448887844e-06, |
|
"loss": 1.9266, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.695470346846495e-06, |
|
"loss": 1.9447, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 8.630126244805144e-06, |
|
"loss": 1.9143, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.564782142763794e-06, |
|
"loss": 1.9238, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.499438040722445e-06, |
|
"loss": 1.9292, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.434093938681094e-06, |
|
"loss": 1.9096, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.368749836639745e-06, |
|
"loss": 1.918, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.303405734598395e-06, |
|
"loss": 1.933, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 8.238061632557046e-06, |
|
"loss": 1.9313, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.172717530515695e-06, |
|
"loss": 1.9572, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.107373428474346e-06, |
|
"loss": 1.9296, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 8.042029326432996e-06, |
|
"loss": 1.922, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.976685224391647e-06, |
|
"loss": 1.9346, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 7.911341122350298e-06, |
|
"loss": 1.9138, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 7.845997020308947e-06, |
|
"loss": 1.9432, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.780652918267598e-06, |
|
"loss": 1.9335, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.715308816226248e-06, |
|
"loss": 1.9463, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 7.649964714184899e-06, |
|
"loss": 1.9164, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 7.584620612143548e-06, |
|
"loss": 1.9452, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.5192765101021995e-06, |
|
"loss": 1.947, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 7.453932408060849e-06, |
|
"loss": 1.9233, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 7.3885883060195e-06, |
|
"loss": 1.9377, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.323244203978149e-06, |
|
"loss": 1.9302, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 7.2579001019368e-06, |
|
"loss": 1.9431, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.19255599989545e-06, |
|
"loss": 1.9398, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 7.1272118978541e-06, |
|
"loss": 1.939, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.06186779581275e-06, |
|
"loss": 1.9193, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 6.996523693771401e-06, |
|
"loss": 1.9228, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.931179591730051e-06, |
|
"loss": 1.9107, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.865835489688701e-06, |
|
"loss": 1.9554, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 6.8004913876473514e-06, |
|
"loss": 1.9398, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 6.735147285606002e-06, |
|
"loss": 1.9089, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.669803183564651e-06, |
|
"loss": 1.9315, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 18.7867, |
|
"eval_loss": 1.7752913236618042, |
|
"eval_rouge1": 35.1217, |
|
"eval_rouge2": 13.5043, |
|
"eval_rougeL": 28.6409, |
|
"eval_rougeLsum": 28.6441, |
|
"eval_runtime": 1535.0703, |
|
"eval_samples_per_second": 7.382, |
|
"eval_steps_per_second": 1.846, |
|
"step": 102024 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 6.604459081523302e-06, |
|
"loss": 1.8969, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.539114979481952e-06, |
|
"loss": 1.8972, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 6.473770877440603e-06, |
|
"loss": 1.9123, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 6.408426775399254e-06, |
|
"loss": 1.8942, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 6.343082673357903e-06, |
|
"loss": 1.8978, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 6.277738571316554e-06, |
|
"loss": 1.922, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.212394469275204e-06, |
|
"loss": 1.9113, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.147050367233854e-06, |
|
"loss": 1.8857, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 6.081706265192504e-06, |
|
"loss": 1.9106, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 6.0163621631511545e-06, |
|
"loss": 1.9159, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5.951018061109805e-06, |
|
"loss": 1.8901, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 5.885673959068455e-06, |
|
"loss": 1.8963, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 5.820329857027105e-06, |
|
"loss": 1.8897, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.7549857549857555e-06, |
|
"loss": 1.8902, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.689641652944406e-06, |
|
"loss": 1.8956, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 5.624297550903056e-06, |
|
"loss": 1.8823, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 5.558953448861706e-06, |
|
"loss": 1.9129, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 5.4936093468203565e-06, |
|
"loss": 1.8867, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 5.428265244779006e-06, |
|
"loss": 1.9093, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 5.362921142737657e-06, |
|
"loss": 1.9047, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.2975770406963064e-06, |
|
"loss": 1.9123, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 5.2322329386549575e-06, |
|
"loss": 1.8867, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 5.166888836613607e-06, |
|
"loss": 1.9057, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 5.101544734572258e-06, |
|
"loss": 1.9101, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.0362006325309075e-06, |
|
"loss": 1.9098, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.9708565304895586e-06, |
|
"loss": 1.9102, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.905512428448209e-06, |
|
"loss": 1.8988, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.840168326406859e-06, |
|
"loss": 1.9089, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.774824224365509e-06, |
|
"loss": 1.8994, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.70948012232416e-06, |
|
"loss": 1.892, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.64413602028281e-06, |
|
"loss": 1.8783, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.57879191824146e-06, |
|
"loss": 1.8987, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 4.51344781620011e-06, |
|
"loss": 1.8995, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.448103714158761e-06, |
|
"loss": 1.9008, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.38275961211741e-06, |
|
"loss": 1.9234, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.31741551007606e-06, |
|
"loss": 1.902, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 4.252071408034711e-06, |
|
"loss": 1.893, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.186727305993362e-06, |
|
"loss": 1.9127, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.121383203952012e-06, |
|
"loss": 1.9057, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.056039101910662e-06, |
|
"loss": 1.8866, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.990694999869312e-06, |
|
"loss": 1.8934, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.925350897827963e-06, |
|
"loss": 1.8902, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.860006795786613e-06, |
|
"loss": 1.9046, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.794662693745263e-06, |
|
"loss": 1.9003, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.729318591703913e-06, |
|
"loss": 1.8954, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.6639744896625632e-06, |
|
"loss": 1.8877, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.5986303876212135e-06, |
|
"loss": 1.8993, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.5332862855798637e-06, |
|
"loss": 1.9162, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.467942183538514e-06, |
|
"loss": 1.8953, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.4025980814971643e-06, |
|
"loss": 1.9295, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.3372539794558145e-06, |
|
"loss": 1.9046, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.2719098774144648e-06, |
|
"loss": 1.8862, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.206565775373115e-06, |
|
"loss": 1.8719, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.1412216733317653e-06, |
|
"loss": 1.8713, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.0758775712904155e-06, |
|
"loss": 1.8959, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.0105334692490658e-06, |
|
"loss": 1.8966, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.945189367207716e-06, |
|
"loss": 1.8493, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.8798452651663663e-06, |
|
"loss": 1.8897, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.814501163125016e-06, |
|
"loss": 1.8878, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.7491570610836672e-06, |
|
"loss": 1.8951, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.6838129590423175e-06, |
|
"loss": 1.8869, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.6184688570009677e-06, |
|
"loss": 1.8904, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.5531247549596176e-06, |
|
"loss": 1.8882, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.487780652918268e-06, |
|
"loss": 1.9147, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.422436550876918e-06, |
|
"loss": 1.8836, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.3570924488355683e-06, |
|
"loss": 1.8834, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.2917483467942186e-06, |
|
"loss": 1.9004, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.226404244752869e-06, |
|
"loss": 1.8874, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.161060142711519e-06, |
|
"loss": 1.9072, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.0957160406701694e-06, |
|
"loss": 1.8991, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.0303719386288196e-06, |
|
"loss": 1.8925, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.96502783658747e-06, |
|
"loss": 1.8771, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.89968373454612e-06, |
|
"loss": 1.9066, |
|
"step": 138500 |
|
} |
|
], |
|
"max_steps": 153036, |
|
"num_train_epochs": 3, |
|
"total_flos": 3.687242740119245e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|