{ "best_metric": null, "best_model_checkpoint": null, "epoch": 40.32258064516129, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_gen_len": 19.0, "eval_loss": 2.797335147857666, "eval_rouge1": 0.1301, "eval_rouge2": 0.0352, "eval_rougeL": 0.1074, "eval_rougeLsum": 0.1075, "eval_runtime": 18.6868, "eval_samples_per_second": 13.271, "eval_steps_per_second": 0.856, "step": 62 }, { "epoch": 2.0, "eval_gen_len": 19.0, "eval_loss": 2.568485975265503, "eval_rouge1": 0.1455, "eval_rouge2": 0.051, "eval_rougeL": 0.1189, "eval_rougeLsum": 0.1187, "eval_runtime": 16.801, "eval_samples_per_second": 14.761, "eval_steps_per_second": 0.952, "step": 124 }, { "epoch": 3.0, "eval_gen_len": 19.0, "eval_loss": 2.475400924682617, "eval_rouge1": 0.1674, "eval_rouge2": 0.0692, "eval_rougeL": 0.1397, "eval_rougeLsum": 0.1397, "eval_runtime": 17.3933, "eval_samples_per_second": 14.258, "eval_steps_per_second": 0.92, "step": 186 }, { "epoch": 4.0, "eval_gen_len": 19.0, "eval_loss": 2.419504404067993, "eval_rouge1": 0.1901, "eval_rouge2": 0.0867, "eval_rougeL": 0.1586, "eval_rougeLsum": 0.1587, "eval_runtime": 16.7871, "eval_samples_per_second": 14.773, "eval_steps_per_second": 0.953, "step": 248 }, { "epoch": 5.0, "eval_gen_len": 19.0, "eval_loss": 2.3755078315734863, "eval_rouge1": 0.1933, "eval_rouge2": 0.0907, "eval_rougeL": 0.1617, "eval_rougeLsum": 0.1619, "eval_runtime": 16.6712, "eval_samples_per_second": 14.876, "eval_steps_per_second": 0.96, "step": 310 }, { "epoch": 6.0, "eval_gen_len": 19.0, "eval_loss": 2.3425652980804443, "eval_rouge1": 0.1946, "eval_rouge2": 0.0916, "eval_rougeL": 0.1634, "eval_rougeLsum": 0.1636, "eval_runtime": 17.1282, "eval_samples_per_second": 14.479, "eval_steps_per_second": 0.934, "step": 372 }, { "epoch": 7.0, "eval_gen_len": 19.0, "eval_loss": 2.3197405338287354, "eval_rouge1": 0.1964, "eval_rouge2": 0.0929, "eval_rougeL": 0.1646, "eval_rougeLsum": 0.1648, "eval_runtime": 16.7039, "eval_samples_per_second": 14.847, "eval_steps_per_second": 0.958, "step": 434 }, { "epoch": 8.0, "eval_gen_len": 19.0, "eval_loss": 2.2987782955169678, "eval_rouge1": 0.1968, "eval_rouge2": 0.0933, "eval_rougeL": 0.165, "eval_rougeLsum": 0.1653, "eval_runtime": 16.6651, "eval_samples_per_second": 14.881, "eval_steps_per_second": 0.96, "step": 496 }, { "epoch": 8.06, "learning_rate": 1.6780645161290323e-05, "loss": 2.7011, "step": 500 }, { "epoch": 9.0, "eval_gen_len": 19.0, "eval_loss": 2.279834270477295, "eval_rouge1": 0.1969, "eval_rouge2": 0.0946, "eval_rougeL": 0.1662, "eval_rougeLsum": 0.1665, "eval_runtime": 17.134, "eval_samples_per_second": 14.474, "eval_steps_per_second": 0.934, "step": 558 }, { "epoch": 10.0, "eval_gen_len": 19.0, "eval_loss": 2.265596389770508, "eval_rouge1": 0.1987, "eval_rouge2": 0.0962, "eval_rougeL": 0.1672, "eval_rougeLsum": 0.1673, "eval_runtime": 17.1955, "eval_samples_per_second": 14.422, "eval_steps_per_second": 0.93, "step": 620 }, { "epoch": 11.0, "eval_gen_len": 19.0, "eval_loss": 2.2547566890716553, "eval_rouge1": 0.1958, "eval_rouge2": 0.0965, "eval_rougeL": 0.1655, "eval_rougeLsum": 0.1657, "eval_runtime": 16.9264, "eval_samples_per_second": 14.652, "eval_steps_per_second": 0.945, "step": 682 }, { "epoch": 12.0, "eval_gen_len": 19.0, "eval_loss": 2.243624210357666, "eval_rouge1": 0.1965, "eval_rouge2": 0.096, "eval_rougeL": 0.1659, "eval_rougeLsum": 0.166, "eval_runtime": 16.9191, "eval_samples_per_second": 14.658, "eval_steps_per_second": 0.946, "step": 744 }, { "epoch": 13.0, "eval_gen_len": 19.0, "eval_loss": 2.2352294921875, "eval_rouge1": 0.1964, "eval_rouge2": 0.0971, "eval_rougeL": 0.1663, "eval_rougeLsum": 0.1664, "eval_runtime": 16.9525, "eval_samples_per_second": 14.629, "eval_steps_per_second": 0.944, "step": 806 }, { "epoch": 14.0, "eval_gen_len": 19.0, "eval_loss": 2.2252049446105957, "eval_rouge1": 0.197, "eval_rouge2": 0.097, "eval_rougeL": 0.1664, "eval_rougeLsum": 0.1664, "eval_runtime": 16.904, "eval_samples_per_second": 14.671, "eval_steps_per_second": 0.947, "step": 868 }, { "epoch": 15.0, "eval_gen_len": 19.0, "eval_loss": 2.2152445316314697, "eval_rouge1": 0.1954, "eval_rouge2": 0.0979, "eval_rougeL": 0.1664, "eval_rougeLsum": 0.1665, "eval_runtime": 17.117, "eval_samples_per_second": 14.489, "eval_steps_per_second": 0.935, "step": 930 }, { "epoch": 16.0, "eval_gen_len": 19.0, "eval_loss": 2.207918643951416, "eval_rouge1": 0.1954, "eval_rouge2": 0.0988, "eval_rougeL": 0.1674, "eval_rougeLsum": 0.1677, "eval_runtime": 17.2029, "eval_samples_per_second": 14.416, "eval_steps_per_second": 0.93, "step": 992 }, { "epoch": 16.13, "learning_rate": 1.355483870967742e-05, "loss": 2.3282, "step": 1000 }, { "epoch": 17.0, "eval_gen_len": 19.0, "eval_loss": 2.1986870765686035, "eval_rouge1": 0.1951, "eval_rouge2": 0.0995, "eval_rougeL": 0.1672, "eval_rougeLsum": 0.1673, "eval_runtime": 17.1147, "eval_samples_per_second": 14.49, "eval_steps_per_second": 0.935, "step": 1054 }, { "epoch": 18.0, "eval_gen_len": 19.0, "eval_loss": 2.1938998699188232, "eval_rouge1": 0.1974, "eval_rouge2": 0.1015, "eval_rougeL": 0.1695, "eval_rougeLsum": 0.1697, "eval_runtime": 16.7909, "eval_samples_per_second": 14.77, "eval_steps_per_second": 0.953, "step": 1116 }, { "epoch": 19.0, "eval_gen_len": 19.0, "eval_loss": 2.18984055519104, "eval_rouge1": 0.1965, "eval_rouge2": 0.1014, "eval_rougeL": 0.1691, "eval_rougeLsum": 0.1693, "eval_runtime": 16.6689, "eval_samples_per_second": 14.878, "eval_steps_per_second": 0.96, "step": 1178 }, { "epoch": 20.0, "eval_gen_len": 19.0, "eval_loss": 2.183218240737915, "eval_rouge1": 0.1963, "eval_rouge2": 0.0997, "eval_rougeL": 0.1683, "eval_rougeLsum": 0.1685, "eval_runtime": 17.3129, "eval_samples_per_second": 14.325, "eval_steps_per_second": 0.924, "step": 1240 }, { "epoch": 21.0, "eval_gen_len": 19.0, "eval_loss": 2.1765005588531494, "eval_rouge1": 0.1966, "eval_rouge2": 0.0991, "eval_rougeL": 0.1676, "eval_rougeLsum": 0.1678, "eval_runtime": 16.8703, "eval_samples_per_second": 14.7, "eval_steps_per_second": 0.948, "step": 1302 }, { "epoch": 22.0, "eval_gen_len": 19.0, "eval_loss": 2.1725897789001465, "eval_rouge1": 0.1963, "eval_rouge2": 0.0989, "eval_rougeL": 0.1677, "eval_rougeLsum": 0.1676, "eval_runtime": 16.7813, "eval_samples_per_second": 14.778, "eval_steps_per_second": 0.953, "step": 1364 }, { "epoch": 23.0, "eval_gen_len": 19.0, "eval_loss": 2.1676828861236572, "eval_rouge1": 0.1959, "eval_rouge2": 0.0988, "eval_rougeL": 0.168, "eval_rougeLsum": 0.168, "eval_runtime": 17.3121, "eval_samples_per_second": 14.325, "eval_steps_per_second": 0.924, "step": 1426 }, { "epoch": 24.0, "eval_gen_len": 19.0, "eval_loss": 2.1647536754608154, "eval_rouge1": 0.1967, "eval_rouge2": 0.0994, "eval_rougeL": 0.169, "eval_rougeLsum": 0.1692, "eval_runtime": 16.9203, "eval_samples_per_second": 14.657, "eval_steps_per_second": 0.946, "step": 1488 }, { "epoch": 24.19, "learning_rate": 1.0329032258064518e-05, "loss": 2.2281, "step": 1500 }, { "epoch": 25.0, "eval_gen_len": 19.0, "eval_loss": 2.15854549407959, "eval_rouge1": 0.1958, "eval_rouge2": 0.0988, "eval_rougeL": 0.1685, "eval_rougeLsum": 0.1687, "eval_runtime": 17.1171, "eval_samples_per_second": 14.488, "eval_steps_per_second": 0.935, "step": 1550 }, { "epoch": 26.0, "eval_gen_len": 19.0, "eval_loss": 2.1557765007019043, "eval_rouge1": 0.197, "eval_rouge2": 0.1, "eval_rougeL": 0.1698, "eval_rougeLsum": 0.1699, "eval_runtime": 16.8981, "eval_samples_per_second": 14.676, "eval_steps_per_second": 0.947, "step": 1612 }, { "epoch": 27.0, "eval_gen_len": 19.0, "eval_loss": 2.1530044078826904, "eval_rouge1": 0.196, "eval_rouge2": 0.0994, "eval_rougeL": 0.1685, "eval_rougeLsum": 0.1687, "eval_runtime": 16.9499, "eval_samples_per_second": 14.631, "eval_steps_per_second": 0.944, "step": 1674 }, { "epoch": 28.0, "eval_gen_len": 19.0, "eval_loss": 2.1497113704681396, "eval_rouge1": 0.1971, "eval_rouge2": 0.101, "eval_rougeL": 0.1697, "eval_rougeLsum": 0.1699, "eval_runtime": 16.8885, "eval_samples_per_second": 14.685, "eval_steps_per_second": 0.947, "step": 1736 }, { "epoch": 29.0, "eval_gen_len": 19.0, "eval_loss": 2.1459004878997803, "eval_rouge1": 0.1972, "eval_rouge2": 0.1008, "eval_rougeL": 0.17, "eval_rougeLsum": 0.1701, "eval_runtime": 17.5571, "eval_samples_per_second": 14.125, "eval_steps_per_second": 0.911, "step": 1798 }, { "epoch": 30.0, "eval_gen_len": 19.0, "eval_loss": 2.142939805984497, "eval_rouge1": 0.1946, "eval_rouge2": 0.0989, "eval_rougeL": 0.1677, "eval_rougeLsum": 0.1678, "eval_runtime": 17.4108, "eval_samples_per_second": 14.244, "eval_steps_per_second": 0.919, "step": 1860 }, { "epoch": 31.0, "eval_gen_len": 19.0, "eval_loss": 2.14223051071167, "eval_rouge1": 0.1958, "eval_rouge2": 0.1, "eval_rougeL": 0.1691, "eval_rougeLsum": 0.1692, "eval_runtime": 17.3596, "eval_samples_per_second": 14.286, "eval_steps_per_second": 0.922, "step": 1922 }, { "epoch": 32.0, "eval_gen_len": 19.0, "eval_loss": 2.139946699142456, "eval_rouge1": 0.1952, "eval_rouge2": 0.0992, "eval_rougeL": 0.1687, "eval_rougeLsum": 0.1687, "eval_runtime": 17.1971, "eval_samples_per_second": 14.421, "eval_steps_per_second": 0.93, "step": 1984 }, { "epoch": 32.26, "learning_rate": 7.103225806451613e-06, "loss": 2.1696, "step": 2000 }, { "epoch": 33.0, "eval_gen_len": 19.0, "eval_loss": 2.135340690612793, "eval_rouge1": 0.1945, "eval_rouge2": 0.0983, "eval_rougeL": 0.1676, "eval_rougeLsum": 0.1676, "eval_runtime": 16.8259, "eval_samples_per_second": 14.739, "eval_steps_per_second": 0.951, "step": 2046 }, { "epoch": 34.0, "eval_gen_len": 19.0, "eval_loss": 2.1345207691192627, "eval_rouge1": 0.1934, "eval_rouge2": 0.097, "eval_rougeL": 0.1664, "eval_rougeLsum": 0.1665, "eval_runtime": 17.2959, "eval_samples_per_second": 14.339, "eval_steps_per_second": 0.925, "step": 2108 }, { "epoch": 35.0, "eval_gen_len": 19.0, "eval_loss": 2.132594585418701, "eval_rouge1": 0.1934, "eval_rouge2": 0.0969, "eval_rougeL": 0.1666, "eval_rougeLsum": 0.1669, "eval_runtime": 16.8869, "eval_samples_per_second": 14.686, "eval_steps_per_second": 0.947, "step": 2170 }, { "epoch": 36.0, "eval_gen_len": 19.0, "eval_loss": 2.1315131187438965, "eval_rouge1": 0.1942, "eval_rouge2": 0.0982, "eval_rougeL": 0.1674, "eval_rougeLsum": 0.1676, "eval_runtime": 16.9224, "eval_samples_per_second": 14.655, "eval_steps_per_second": 0.945, "step": 2232 }, { "epoch": 37.0, "eval_gen_len": 19.0, "eval_loss": 2.1289427280426025, "eval_rouge1": 0.1941, "eval_rouge2": 0.0989, "eval_rougeL": 0.1679, "eval_rougeLsum": 0.1681, "eval_runtime": 17.1711, "eval_samples_per_second": 14.443, "eval_steps_per_second": 0.932, "step": 2294 }, { "epoch": 38.0, "eval_gen_len": 19.0, "eval_loss": 2.1285345554351807, "eval_rouge1": 0.1924, "eval_rouge2": 0.0971, "eval_rougeL": 0.1664, "eval_rougeLsum": 0.1665, "eval_runtime": 16.844, "eval_samples_per_second": 14.723, "eval_steps_per_second": 0.95, "step": 2356 }, { "epoch": 39.0, "eval_gen_len": 19.0, "eval_loss": 2.1261112689971924, "eval_rouge1": 0.1932, "eval_rouge2": 0.0983, "eval_rougeL": 0.1671, "eval_rougeLsum": 0.1672, "eval_runtime": 16.7999, "eval_samples_per_second": 14.762, "eval_steps_per_second": 0.952, "step": 2418 }, { "epoch": 40.0, "eval_gen_len": 19.0, "eval_loss": 2.1248812675476074, "eval_rouge1": 0.1931, "eval_rouge2": 0.0985, "eval_rougeL": 0.1672, "eval_rougeLsum": 0.1674, "eval_runtime": 17.7634, "eval_samples_per_second": 13.961, "eval_steps_per_second": 0.901, "step": 2480 }, { "epoch": 40.32, "learning_rate": 3.87741935483871e-06, "loss": 2.1317, "step": 2500 } ], "max_steps": 3100, "num_train_epochs": 50, "total_flos": 1.079486208540672e+16, "trial_name": null, "trial_params": null }