|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 110.0, |
|
"global_step": 962830, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.992000456986176e-05, |
|
"loss": 0.9599, |
|
"step": 3501 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 0.9476152062416077, |
|
"eval_runtime": 178.6207, |
|
"eval_samples_per_second": 174.224, |
|
"eval_steps_per_second": 10.889, |
|
"step": 3501 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.984000913972353e-05, |
|
"loss": 0.9731, |
|
"step": 7002 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.9536693692207336, |
|
"eval_runtime": 178.7725, |
|
"eval_samples_per_second": 174.076, |
|
"eval_steps_per_second": 10.88, |
|
"step": 7002 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.9760013709585284e-05, |
|
"loss": 0.9745, |
|
"step": 10503 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 0.9530463218688965, |
|
"eval_runtime": 178.7418, |
|
"eval_samples_per_second": 174.106, |
|
"eval_steps_per_second": 10.882, |
|
"step": 10503 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.9680018279447047e-05, |
|
"loss": 0.9747, |
|
"step": 14004 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 0.9569175839424133, |
|
"eval_runtime": 178.7659, |
|
"eval_samples_per_second": 174.082, |
|
"eval_steps_per_second": 10.88, |
|
"step": 14004 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.960002284930881e-05, |
|
"loss": 0.978, |
|
"step": 17505 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.958555281162262, |
|
"eval_runtime": 178.7319, |
|
"eval_samples_per_second": 174.115, |
|
"eval_steps_per_second": 10.882, |
|
"step": 17505 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.952002741917057e-05, |
|
"loss": 0.9736, |
|
"step": 21006 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 0.9602962136268616, |
|
"eval_runtime": 178.815, |
|
"eval_samples_per_second": 174.035, |
|
"eval_steps_per_second": 10.877, |
|
"step": 21006 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 4.9440031989032335e-05, |
|
"loss": 0.9772, |
|
"step": 24507 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 0.9578276872634888, |
|
"eval_runtime": 178.2789, |
|
"eval_samples_per_second": 174.558, |
|
"eval_steps_per_second": 10.91, |
|
"step": 24507 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 4.93600365588941e-05, |
|
"loss": 0.9771, |
|
"step": 28008 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 0.9583005309104919, |
|
"eval_runtime": 177.473, |
|
"eval_samples_per_second": 175.351, |
|
"eval_steps_per_second": 10.959, |
|
"step": 28008 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 4.928004112875585e-05, |
|
"loss": 0.9773, |
|
"step": 31509 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_loss": 0.9513981938362122, |
|
"eval_runtime": 177.4973, |
|
"eval_samples_per_second": 175.327, |
|
"eval_steps_per_second": 10.958, |
|
"step": 31509 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.920004569861762e-05, |
|
"loss": 0.9802, |
|
"step": 35010 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.9571573138237, |
|
"eval_runtime": 177.3934, |
|
"eval_samples_per_second": 175.429, |
|
"eval_steps_per_second": 10.964, |
|
"step": 35010 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 4.912005026847938e-05, |
|
"loss": 0.9741, |
|
"step": 38511 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_loss": 0.9547442197799683, |
|
"eval_runtime": 177.5692, |
|
"eval_samples_per_second": 175.256, |
|
"eval_steps_per_second": 10.953, |
|
"step": 38511 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 4.904005483834114e-05, |
|
"loss": 0.9776, |
|
"step": 42012 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_loss": 0.9541487097740173, |
|
"eval_runtime": 177.564, |
|
"eval_samples_per_second": 175.261, |
|
"eval_steps_per_second": 10.954, |
|
"step": 42012 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 4.8960059408202904e-05, |
|
"loss": 0.9737, |
|
"step": 45513 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"eval_loss": 0.9530224204063416, |
|
"eval_runtime": 177.5256, |
|
"eval_samples_per_second": 175.299, |
|
"eval_steps_per_second": 10.956, |
|
"step": 45513 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 4.888006397806467e-05, |
|
"loss": 0.9758, |
|
"step": 49014 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_loss": 0.9559855461120605, |
|
"eval_runtime": 177.638, |
|
"eval_samples_per_second": 175.188, |
|
"eval_steps_per_second": 10.949, |
|
"step": 49014 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.880006854792642e-05, |
|
"loss": 0.9777, |
|
"step": 52515 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.9567117691040039, |
|
"eval_runtime": 177.402, |
|
"eval_samples_per_second": 175.421, |
|
"eval_steps_per_second": 10.964, |
|
"step": 52515 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 4.872007311778819e-05, |
|
"loss": 0.9726, |
|
"step": 56016 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 0.9548764228820801, |
|
"eval_runtime": 177.4726, |
|
"eval_samples_per_second": 175.351, |
|
"eval_steps_per_second": 10.959, |
|
"step": 56016 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 4.864007768764995e-05, |
|
"loss": 0.977, |
|
"step": 59517 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_loss": 0.9567227959632874, |
|
"eval_runtime": 177.3888, |
|
"eval_samples_per_second": 175.434, |
|
"eval_steps_per_second": 10.965, |
|
"step": 59517 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 4.856008225751171e-05, |
|
"loss": 0.9736, |
|
"step": 63018 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_loss": 0.9573630094528198, |
|
"eval_runtime": 177.3454, |
|
"eval_samples_per_second": 175.477, |
|
"eval_steps_per_second": 10.967, |
|
"step": 63018 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 4.8480086827373474e-05, |
|
"loss": 0.9732, |
|
"step": 66519 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"eval_loss": 0.9487555623054504, |
|
"eval_runtime": 177.2246, |
|
"eval_samples_per_second": 175.596, |
|
"eval_steps_per_second": 10.975, |
|
"step": 66519 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.8400091397235236e-05, |
|
"loss": 0.9743, |
|
"step": 70020 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.9511750936508179, |
|
"eval_runtime": 177.2113, |
|
"eval_samples_per_second": 175.61, |
|
"eval_steps_per_second": 10.976, |
|
"step": 70020 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 4.832009596709699e-05, |
|
"loss": 0.9684, |
|
"step": 73521 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"eval_loss": 0.954649031162262, |
|
"eval_runtime": 177.1791, |
|
"eval_samples_per_second": 175.642, |
|
"eval_steps_per_second": 10.978, |
|
"step": 73521 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 4.824010053695876e-05, |
|
"loss": 0.974, |
|
"step": 77022 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_loss": 0.9532359838485718, |
|
"eval_runtime": 177.517, |
|
"eval_samples_per_second": 175.307, |
|
"eval_steps_per_second": 10.957, |
|
"step": 77022 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 4.816010510682052e-05, |
|
"loss": 0.9713, |
|
"step": 80523 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_loss": 0.9548117518424988, |
|
"eval_runtime": 177.4324, |
|
"eval_samples_per_second": 175.391, |
|
"eval_steps_per_second": 10.962, |
|
"step": 80523 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 4.808010967668228e-05, |
|
"loss": 0.9693, |
|
"step": 84024 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_loss": 0.9545753002166748, |
|
"eval_runtime": 177.4929, |
|
"eval_samples_per_second": 175.331, |
|
"eval_steps_per_second": 10.958, |
|
"step": 84024 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.800011424654404e-05, |
|
"loss": 0.9734, |
|
"step": 87525 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.9541465044021606, |
|
"eval_runtime": 177.7477, |
|
"eval_samples_per_second": 175.08, |
|
"eval_steps_per_second": 10.942, |
|
"step": 87525 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 4.7920118816405806e-05, |
|
"loss": 0.9681, |
|
"step": 91026 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"eval_loss": 0.9508066177368164, |
|
"eval_runtime": 177.4879, |
|
"eval_samples_per_second": 175.336, |
|
"eval_steps_per_second": 10.958, |
|
"step": 91026 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 4.784012338626757e-05, |
|
"loss": 0.9713, |
|
"step": 94527 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"eval_loss": 0.953546404838562, |
|
"eval_runtime": 177.5133, |
|
"eval_samples_per_second": 175.311, |
|
"eval_steps_per_second": 10.957, |
|
"step": 94527 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 4.776012795612933e-05, |
|
"loss": 0.9682, |
|
"step": 98028 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"eval_loss": 0.9516591429710388, |
|
"eval_runtime": 177.5065, |
|
"eval_samples_per_second": 175.318, |
|
"eval_steps_per_second": 10.957, |
|
"step": 98028 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 4.768013252599109e-05, |
|
"loss": 0.9678, |
|
"step": 101529 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"eval_loss": 0.9487663507461548, |
|
"eval_runtime": 177.5275, |
|
"eval_samples_per_second": 175.297, |
|
"eval_steps_per_second": 10.956, |
|
"step": 101529 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.760013709585286e-05, |
|
"loss": 0.9692, |
|
"step": 105030 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.9506123065948486, |
|
"eval_runtime": 177.4334, |
|
"eval_samples_per_second": 175.39, |
|
"eval_steps_per_second": 10.962, |
|
"step": 105030 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 4.752014166571461e-05, |
|
"loss": 0.9648, |
|
"step": 108531 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"eval_loss": 0.9482792019844055, |
|
"eval_runtime": 177.4207, |
|
"eval_samples_per_second": 175.402, |
|
"eval_steps_per_second": 10.963, |
|
"step": 108531 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 4.7440146235576376e-05, |
|
"loss": 0.9664, |
|
"step": 112032 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_loss": 0.9506826400756836, |
|
"eval_runtime": 177.4367, |
|
"eval_samples_per_second": 175.386, |
|
"eval_steps_per_second": 10.962, |
|
"step": 112032 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"learning_rate": 4.736015080543814e-05, |
|
"loss": 0.9656, |
|
"step": 115533 |
|
}, |
|
{ |
|
"epoch": 13.2, |
|
"eval_loss": 0.9522765874862671, |
|
"eval_runtime": 177.6082, |
|
"eval_samples_per_second": 175.217, |
|
"eval_steps_per_second": 10.951, |
|
"step": 115533 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"learning_rate": 4.72801553752999e-05, |
|
"loss": 0.9678, |
|
"step": 119034 |
|
}, |
|
{ |
|
"epoch": 13.6, |
|
"eval_loss": 0.9502620100975037, |
|
"eval_runtime": 177.507, |
|
"eval_samples_per_second": 175.317, |
|
"eval_steps_per_second": 10.957, |
|
"step": 119034 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 4.720015994516166e-05, |
|
"loss": 0.9652, |
|
"step": 122535 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.9443088173866272, |
|
"eval_runtime": 177.5507, |
|
"eval_samples_per_second": 175.274, |
|
"eval_steps_per_second": 10.955, |
|
"step": 122535 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 4.7120164515023426e-05, |
|
"loss": 0.9624, |
|
"step": 126036 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"eval_loss": 0.9469555020332336, |
|
"eval_runtime": 177.4174, |
|
"eval_samples_per_second": 175.406, |
|
"eval_steps_per_second": 10.963, |
|
"step": 126036 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"learning_rate": 4.704016908488518e-05, |
|
"loss": 0.9632, |
|
"step": 129537 |
|
}, |
|
{ |
|
"epoch": 14.8, |
|
"eval_loss": 0.9473945498466492, |
|
"eval_runtime": 177.5092, |
|
"eval_samples_per_second": 175.315, |
|
"eval_steps_per_second": 10.957, |
|
"step": 129537 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"learning_rate": 4.6960173654746945e-05, |
|
"loss": 0.9625, |
|
"step": 133038 |
|
}, |
|
{ |
|
"epoch": 15.2, |
|
"eval_loss": 0.948591947555542, |
|
"eval_runtime": 177.5625, |
|
"eval_samples_per_second": 175.262, |
|
"eval_steps_per_second": 10.954, |
|
"step": 133038 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 4.688017822460871e-05, |
|
"loss": 0.9606, |
|
"step": 136539 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"eval_loss": 0.9465170502662659, |
|
"eval_runtime": 177.5019, |
|
"eval_samples_per_second": 175.322, |
|
"eval_steps_per_second": 10.958, |
|
"step": 136539 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 4.680018279447047e-05, |
|
"loss": 0.9623, |
|
"step": 140040 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.9485459327697754, |
|
"eval_runtime": 177.4543, |
|
"eval_samples_per_second": 175.369, |
|
"eval_steps_per_second": 10.961, |
|
"step": 140040 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"learning_rate": 4.6720187364332226e-05, |
|
"loss": 0.9621, |
|
"step": 143541 |
|
}, |
|
{ |
|
"epoch": 16.4, |
|
"eval_loss": 0.9435310959815979, |
|
"eval_runtime": 177.7019, |
|
"eval_samples_per_second": 175.125, |
|
"eval_steps_per_second": 10.945, |
|
"step": 143541 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"learning_rate": 4.6640191934193996e-05, |
|
"loss": 0.9608, |
|
"step": 147042 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"eval_loss": 0.9458166360855103, |
|
"eval_runtime": 177.4161, |
|
"eval_samples_per_second": 175.407, |
|
"eval_steps_per_second": 10.963, |
|
"step": 147042 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"learning_rate": 4.656019650405575e-05, |
|
"loss": 0.9607, |
|
"step": 150543 |
|
}, |
|
{ |
|
"epoch": 17.2, |
|
"eval_loss": 0.944205641746521, |
|
"eval_runtime": 177.3072, |
|
"eval_samples_per_second": 175.515, |
|
"eval_steps_per_second": 10.97, |
|
"step": 150543 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 4.6480201073917515e-05, |
|
"loss": 0.9573, |
|
"step": 154044 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"eval_loss": 0.948502242565155, |
|
"eval_runtime": 177.4753, |
|
"eval_samples_per_second": 175.348, |
|
"eval_steps_per_second": 10.959, |
|
"step": 154044 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 4.640020564377928e-05, |
|
"loss": 0.9629, |
|
"step": 157545 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.9456846714019775, |
|
"eval_runtime": 177.5771, |
|
"eval_samples_per_second": 175.248, |
|
"eval_steps_per_second": 10.953, |
|
"step": 157545 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"learning_rate": 4.632021021364104e-05, |
|
"loss": 0.9576, |
|
"step": 161046 |
|
}, |
|
{ |
|
"epoch": 18.4, |
|
"eval_loss": 0.9430428147315979, |
|
"eval_runtime": 177.5195, |
|
"eval_samples_per_second": 175.305, |
|
"eval_steps_per_second": 10.957, |
|
"step": 161046 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 4.62402147835028e-05, |
|
"loss": 0.9572, |
|
"step": 164547 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"eval_loss": 0.945507824420929, |
|
"eval_runtime": 177.5717, |
|
"eval_samples_per_second": 175.253, |
|
"eval_steps_per_second": 10.953, |
|
"step": 164547 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 4.6160219353364566e-05, |
|
"loss": 0.9561, |
|
"step": 168048 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"eval_loss": 0.9430563449859619, |
|
"eval_runtime": 177.639, |
|
"eval_samples_per_second": 175.187, |
|
"eval_steps_per_second": 10.949, |
|
"step": 168048 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"learning_rate": 4.608022392322632e-05, |
|
"loss": 0.9584, |
|
"step": 171549 |
|
}, |
|
{ |
|
"epoch": 19.6, |
|
"eval_loss": 0.944654107093811, |
|
"eval_runtime": 177.2628, |
|
"eval_samples_per_second": 175.559, |
|
"eval_steps_per_second": 10.972, |
|
"step": 171549 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 4.600022849308809e-05, |
|
"loss": 0.9571, |
|
"step": 175050 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.9449816346168518, |
|
"eval_runtime": 177.5343, |
|
"eval_samples_per_second": 175.29, |
|
"eval_steps_per_second": 10.956, |
|
"step": 175050 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"learning_rate": 4.592023306294985e-05, |
|
"loss": 0.9532, |
|
"step": 178551 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"eval_loss": 0.9422610998153687, |
|
"eval_runtime": 177.5589, |
|
"eval_samples_per_second": 175.266, |
|
"eval_steps_per_second": 10.954, |
|
"step": 178551 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 4.584023763281161e-05, |
|
"loss": 0.955, |
|
"step": 182052 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"eval_loss": 0.9425234794616699, |
|
"eval_runtime": 177.4849, |
|
"eval_samples_per_second": 175.339, |
|
"eval_steps_per_second": 10.959, |
|
"step": 182052 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"learning_rate": 4.576024220267337e-05, |
|
"loss": 0.9542, |
|
"step": 185553 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"eval_loss": 0.940060555934906, |
|
"eval_runtime": 177.7301, |
|
"eval_samples_per_second": 175.097, |
|
"eval_steps_per_second": 10.944, |
|
"step": 185553 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 4.5680246772535135e-05, |
|
"loss": 0.9512, |
|
"step": 189054 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"eval_loss": 0.9436028599739075, |
|
"eval_runtime": 177.5045, |
|
"eval_samples_per_second": 175.319, |
|
"eval_steps_per_second": 10.957, |
|
"step": 189054 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 4.560025134239689e-05, |
|
"loss": 0.9556, |
|
"step": 192555 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.9408882260322571, |
|
"eval_runtime": 177.5846, |
|
"eval_samples_per_second": 175.24, |
|
"eval_steps_per_second": 10.953, |
|
"step": 192555 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 4.552025591225866e-05, |
|
"loss": 0.9528, |
|
"step": 196056 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"eval_loss": 0.9434440732002258, |
|
"eval_runtime": 177.5389, |
|
"eval_samples_per_second": 175.286, |
|
"eval_steps_per_second": 10.955, |
|
"step": 196056 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"learning_rate": 4.5440260482120416e-05, |
|
"loss": 0.9519, |
|
"step": 199557 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"eval_loss": 0.9397256970405579, |
|
"eval_runtime": 177.7195, |
|
"eval_samples_per_second": 175.107, |
|
"eval_steps_per_second": 10.944, |
|
"step": 199557 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"learning_rate": 4.536026505198218e-05, |
|
"loss": 0.9526, |
|
"step": 203058 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"eval_loss": 0.9388800263404846, |
|
"eval_runtime": 177.5696, |
|
"eval_samples_per_second": 175.255, |
|
"eval_steps_per_second": 10.953, |
|
"step": 203058 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"learning_rate": 4.528026962184394e-05, |
|
"loss": 0.9494, |
|
"step": 206559 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"eval_loss": 0.93772292137146, |
|
"eval_runtime": 177.5868, |
|
"eval_samples_per_second": 175.238, |
|
"eval_steps_per_second": 10.952, |
|
"step": 206559 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 4.5200274191705705e-05, |
|
"loss": 0.953, |
|
"step": 210060 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.9386118054389954, |
|
"eval_runtime": 177.6021, |
|
"eval_samples_per_second": 175.223, |
|
"eval_steps_per_second": 10.951, |
|
"step": 210060 |
|
}, |
|
{ |
|
"epoch": 24.4, |
|
"learning_rate": 4.512027876156746e-05, |
|
"loss": 0.9472, |
|
"step": 213561 |
|
}, |
|
{ |
|
"epoch": 24.4, |
|
"eval_loss": 0.9392057061195374, |
|
"eval_runtime": 177.5795, |
|
"eval_samples_per_second": 175.245, |
|
"eval_steps_per_second": 10.953, |
|
"step": 213561 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"learning_rate": 4.504028333142923e-05, |
|
"loss": 0.9505, |
|
"step": 217062 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"eval_loss": 0.9383804798126221, |
|
"eval_runtime": 177.5958, |
|
"eval_samples_per_second": 175.229, |
|
"eval_steps_per_second": 10.952, |
|
"step": 217062 |
|
}, |
|
{ |
|
"epoch": 25.2, |
|
"learning_rate": 4.4960287901290986e-05, |
|
"loss": 0.9458, |
|
"step": 220563 |
|
}, |
|
{ |
|
"epoch": 25.2, |
|
"eval_loss": 0.9350699186325073, |
|
"eval_runtime": 177.4438, |
|
"eval_samples_per_second": 175.379, |
|
"eval_steps_per_second": 10.961, |
|
"step": 220563 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 4.488029247115275e-05, |
|
"loss": 0.9479, |
|
"step": 224064 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"eval_loss": 0.9326021671295166, |
|
"eval_runtime": 177.3212, |
|
"eval_samples_per_second": 175.501, |
|
"eval_steps_per_second": 10.969, |
|
"step": 224064 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 4.480029704101451e-05, |
|
"loss": 0.9495, |
|
"step": 227565 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.9333738088607788, |
|
"eval_runtime": 177.2781, |
|
"eval_samples_per_second": 175.543, |
|
"eval_steps_per_second": 10.971, |
|
"step": 227565 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"learning_rate": 4.4720301610876274e-05, |
|
"loss": 0.9434, |
|
"step": 231066 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"eval_loss": 0.9343997836112976, |
|
"eval_runtime": 177.4455, |
|
"eval_samples_per_second": 175.378, |
|
"eval_steps_per_second": 10.961, |
|
"step": 231066 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"learning_rate": 4.464030618073804e-05, |
|
"loss": 0.9475, |
|
"step": 234567 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"eval_loss": 0.9394047856330872, |
|
"eval_runtime": 177.4291, |
|
"eval_samples_per_second": 175.394, |
|
"eval_steps_per_second": 10.962, |
|
"step": 234567 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 4.45603107505998e-05, |
|
"loss": 0.9479, |
|
"step": 238068 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"eval_loss": 0.9336209893226624, |
|
"eval_runtime": 177.3352, |
|
"eval_samples_per_second": 175.487, |
|
"eval_steps_per_second": 10.968, |
|
"step": 238068 |
|
}, |
|
{ |
|
"epoch": 27.6, |
|
"learning_rate": 4.4480315320461556e-05, |
|
"loss": 0.9453, |
|
"step": 241569 |
|
}, |
|
{ |
|
"epoch": 27.6, |
|
"eval_loss": 0.9349226355552673, |
|
"eval_runtime": 177.3043, |
|
"eval_samples_per_second": 175.517, |
|
"eval_steps_per_second": 10.97, |
|
"step": 241569 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 4.4400319890323325e-05, |
|
"loss": 0.9458, |
|
"step": 245070 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.9337905645370483, |
|
"eval_runtime": 177.3396, |
|
"eval_samples_per_second": 175.482, |
|
"eval_steps_per_second": 10.968, |
|
"step": 245070 |
|
}, |
|
{ |
|
"epoch": 28.4, |
|
"learning_rate": 4.432032446018508e-05, |
|
"loss": 0.9411, |
|
"step": 248571 |
|
}, |
|
{ |
|
"epoch": 28.4, |
|
"eval_loss": 0.9311910271644592, |
|
"eval_runtime": 177.2625, |
|
"eval_samples_per_second": 175.559, |
|
"eval_steps_per_second": 10.972, |
|
"step": 248571 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 4.4240329030046844e-05, |
|
"loss": 0.9443, |
|
"step": 252072 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"eval_loss": 0.930526614189148, |
|
"eval_runtime": 177.3837, |
|
"eval_samples_per_second": 175.439, |
|
"eval_steps_per_second": 10.965, |
|
"step": 252072 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"learning_rate": 4.4160333599908606e-05, |
|
"loss": 0.9418, |
|
"step": 255573 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"eval_loss": 0.9349842667579651, |
|
"eval_runtime": 177.3409, |
|
"eval_samples_per_second": 175.481, |
|
"eval_steps_per_second": 10.968, |
|
"step": 255573 |
|
}, |
|
{ |
|
"epoch": 29.6, |
|
"learning_rate": 4.408033816977037e-05, |
|
"loss": 0.9423, |
|
"step": 259074 |
|
}, |
|
{ |
|
"epoch": 29.6, |
|
"eval_loss": 0.9312747716903687, |
|
"eval_runtime": 177.3094, |
|
"eval_samples_per_second": 175.512, |
|
"eval_steps_per_second": 10.97, |
|
"step": 259074 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 4.4000342739632125e-05, |
|
"loss": 0.9441, |
|
"step": 262575 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.9297969341278076, |
|
"eval_runtime": 177.4784, |
|
"eval_samples_per_second": 175.345, |
|
"eval_steps_per_second": 10.959, |
|
"step": 262575 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"learning_rate": 4.3920347309493895e-05, |
|
"loss": 0.9388, |
|
"step": 266076 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"eval_loss": 0.9298827052116394, |
|
"eval_runtime": 177.3405, |
|
"eval_samples_per_second": 175.482, |
|
"eval_steps_per_second": 10.968, |
|
"step": 266076 |
|
}, |
|
{ |
|
"epoch": 30.8, |
|
"learning_rate": 4.384035187935565e-05, |
|
"loss": 0.9418, |
|
"step": 269577 |
|
}, |
|
{ |
|
"epoch": 30.8, |
|
"eval_loss": 0.9313934445381165, |
|
"eval_runtime": 177.3424, |
|
"eval_samples_per_second": 175.48, |
|
"eval_steps_per_second": 10.967, |
|
"step": 269577 |
|
}, |
|
{ |
|
"epoch": 31.2, |
|
"learning_rate": 4.376035644921741e-05, |
|
"loss": 0.9402, |
|
"step": 273078 |
|
}, |
|
{ |
|
"epoch": 31.2, |
|
"eval_loss": 0.933687150478363, |
|
"eval_runtime": 177.2623, |
|
"eval_samples_per_second": 175.559, |
|
"eval_steps_per_second": 10.972, |
|
"step": 273078 |
|
}, |
|
{ |
|
"epoch": 31.6, |
|
"learning_rate": 4.3680361019079176e-05, |
|
"loss": 0.9366, |
|
"step": 276579 |
|
}, |
|
{ |
|
"epoch": 31.6, |
|
"eval_loss": 0.927956223487854, |
|
"eval_runtime": 177.2474, |
|
"eval_samples_per_second": 175.574, |
|
"eval_steps_per_second": 10.973, |
|
"step": 276579 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 4.360036558894094e-05, |
|
"loss": 0.9406, |
|
"step": 280080 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 0.9269111156463623, |
|
"eval_runtime": 177.2222, |
|
"eval_samples_per_second": 175.599, |
|
"eval_steps_per_second": 10.975, |
|
"step": 280080 |
|
}, |
|
{ |
|
"epoch": 32.4, |
|
"learning_rate": 4.3520370158802695e-05, |
|
"loss": 0.9362, |
|
"step": 283581 |
|
}, |
|
{ |
|
"epoch": 32.4, |
|
"eval_loss": 0.9323258399963379, |
|
"eval_runtime": 177.3606, |
|
"eval_samples_per_second": 175.462, |
|
"eval_steps_per_second": 10.966, |
|
"step": 283581 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"learning_rate": 4.3440374728664464e-05, |
|
"loss": 0.9389, |
|
"step": 287082 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"eval_loss": 0.9279223084449768, |
|
"eval_runtime": 177.4942, |
|
"eval_samples_per_second": 175.33, |
|
"eval_steps_per_second": 10.958, |
|
"step": 287082 |
|
}, |
|
{ |
|
"epoch": 33.2, |
|
"learning_rate": 4.336037929852622e-05, |
|
"loss": 0.9347, |
|
"step": 290583 |
|
}, |
|
{ |
|
"epoch": 33.2, |
|
"eval_loss": 0.9276158213615417, |
|
"eval_runtime": 177.5222, |
|
"eval_samples_per_second": 175.302, |
|
"eval_steps_per_second": 10.956, |
|
"step": 290583 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"learning_rate": 4.328038386838798e-05, |
|
"loss": 0.9361, |
|
"step": 294084 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"eval_loss": 0.93123459815979, |
|
"eval_runtime": 177.5834, |
|
"eval_samples_per_second": 175.242, |
|
"eval_steps_per_second": 10.953, |
|
"step": 294084 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 4.3200388438249746e-05, |
|
"loss": 0.9382, |
|
"step": 297585 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 0.928411602973938, |
|
"eval_runtime": 177.6079, |
|
"eval_samples_per_second": 175.217, |
|
"eval_steps_per_second": 10.951, |
|
"step": 297585 |
|
}, |
|
{ |
|
"epoch": 34.4, |
|
"learning_rate": 4.312039300811151e-05, |
|
"loss": 0.9319, |
|
"step": 301086 |
|
}, |
|
{ |
|
"epoch": 34.4, |
|
"eval_loss": 0.9270405173301697, |
|
"eval_runtime": 177.5221, |
|
"eval_samples_per_second": 175.302, |
|
"eval_steps_per_second": 10.956, |
|
"step": 301086 |
|
}, |
|
{ |
|
"epoch": 34.8, |
|
"learning_rate": 4.304039757797327e-05, |
|
"loss": 0.9361, |
|
"step": 304587 |
|
}, |
|
{ |
|
"epoch": 34.8, |
|
"eval_loss": 0.9271640777587891, |
|
"eval_runtime": 177.3566, |
|
"eval_samples_per_second": 175.466, |
|
"eval_steps_per_second": 10.967, |
|
"step": 304587 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"learning_rate": 4.2960402147835034e-05, |
|
"loss": 0.9339, |
|
"step": 308088 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"eval_loss": 0.9249849319458008, |
|
"eval_runtime": 177.2959, |
|
"eval_samples_per_second": 175.526, |
|
"eval_steps_per_second": 10.97, |
|
"step": 308088 |
|
}, |
|
{ |
|
"epoch": 35.6, |
|
"learning_rate": 4.288040671769679e-05, |
|
"loss": 0.9329, |
|
"step": 311589 |
|
}, |
|
{ |
|
"epoch": 35.6, |
|
"eval_loss": 0.9226134419441223, |
|
"eval_runtime": 177.4932, |
|
"eval_samples_per_second": 175.331, |
|
"eval_steps_per_second": 10.958, |
|
"step": 311589 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 4.280041128755856e-05, |
|
"loss": 0.9364, |
|
"step": 315090 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 0.9226271510124207, |
|
"eval_runtime": 177.4871, |
|
"eval_samples_per_second": 175.337, |
|
"eval_steps_per_second": 10.959, |
|
"step": 315090 |
|
}, |
|
{ |
|
"epoch": 36.4, |
|
"learning_rate": 4.2720415857420315e-05, |
|
"loss": 0.9307, |
|
"step": 318591 |
|
}, |
|
{ |
|
"epoch": 36.4, |
|
"eval_loss": 0.9248347878456116, |
|
"eval_runtime": 177.8038, |
|
"eval_samples_per_second": 175.024, |
|
"eval_steps_per_second": 10.939, |
|
"step": 318591 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"learning_rate": 4.264042042728208e-05, |
|
"loss": 0.9326, |
|
"step": 322092 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"eval_loss": 0.9248685836791992, |
|
"eval_runtime": 177.5153, |
|
"eval_samples_per_second": 175.309, |
|
"eval_steps_per_second": 10.957, |
|
"step": 322092 |
|
}, |
|
{ |
|
"epoch": 37.2, |
|
"learning_rate": 4.256042499714384e-05, |
|
"loss": 0.932, |
|
"step": 325593 |
|
}, |
|
{ |
|
"epoch": 37.2, |
|
"eval_loss": 0.9229134321212769, |
|
"eval_runtime": 177.4978, |
|
"eval_samples_per_second": 175.326, |
|
"eval_steps_per_second": 10.958, |
|
"step": 325593 |
|
}, |
|
{ |
|
"epoch": 37.6, |
|
"learning_rate": 4.2480429567005596e-05, |
|
"loss": 0.9321, |
|
"step": 329094 |
|
}, |
|
{ |
|
"epoch": 37.6, |
|
"eval_loss": 0.9236814975738525, |
|
"eval_runtime": 178.5737, |
|
"eval_samples_per_second": 174.27, |
|
"eval_steps_per_second": 10.892, |
|
"step": 329094 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 4.240043413686736e-05, |
|
"loss": 0.9315, |
|
"step": 332595 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 0.9243875741958618, |
|
"eval_runtime": 178.4244, |
|
"eval_samples_per_second": 174.416, |
|
"eval_steps_per_second": 10.901, |
|
"step": 332595 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"learning_rate": 4.232043870672912e-05, |
|
"loss": 0.9277, |
|
"step": 336096 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"eval_loss": 0.921384334564209, |
|
"eval_runtime": 178.5199, |
|
"eval_samples_per_second": 174.322, |
|
"eval_steps_per_second": 10.895, |
|
"step": 336096 |
|
}, |
|
{ |
|
"epoch": 38.8, |
|
"learning_rate": 4.2240443276590885e-05, |
|
"loss": 0.9305, |
|
"step": 339597 |
|
}, |
|
{ |
|
"epoch": 38.8, |
|
"eval_loss": 0.924416184425354, |
|
"eval_runtime": 178.4499, |
|
"eval_samples_per_second": 174.391, |
|
"eval_steps_per_second": 10.899, |
|
"step": 339597 |
|
}, |
|
{ |
|
"epoch": 39.2, |
|
"learning_rate": 4.216044784645265e-05, |
|
"loss": 0.9264, |
|
"step": 343098 |
|
}, |
|
{ |
|
"epoch": 39.2, |
|
"eval_loss": 0.9202448725700378, |
|
"eval_runtime": 178.5894, |
|
"eval_samples_per_second": 174.254, |
|
"eval_steps_per_second": 10.891, |
|
"step": 343098 |
|
}, |
|
{ |
|
"epoch": 39.6, |
|
"learning_rate": 4.208045241631441e-05, |
|
"loss": 0.9272, |
|
"step": 346599 |
|
}, |
|
{ |
|
"epoch": 39.6, |
|
"eval_loss": 0.9173216223716736, |
|
"eval_runtime": 178.587, |
|
"eval_samples_per_second": 174.257, |
|
"eval_steps_per_second": 10.891, |
|
"step": 346599 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 4.2000456986176166e-05, |
|
"loss": 0.9292, |
|
"step": 350100 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.9211925268173218, |
|
"eval_runtime": 177.5554, |
|
"eval_samples_per_second": 175.269, |
|
"eval_steps_per_second": 10.954, |
|
"step": 350100 |
|
}, |
|
{ |
|
"epoch": 40.4, |
|
"learning_rate": 4.1920461556037935e-05, |
|
"loss": 0.9237, |
|
"step": 353601 |
|
}, |
|
{ |
|
"epoch": 40.4, |
|
"eval_loss": 0.9185072183609009, |
|
"eval_runtime": 177.3103, |
|
"eval_samples_per_second": 175.512, |
|
"eval_steps_per_second": 10.969, |
|
"step": 353601 |
|
}, |
|
{ |
|
"epoch": 40.8, |
|
"learning_rate": 4.184046612589969e-05, |
|
"loss": 0.9277, |
|
"step": 357102 |
|
}, |
|
{ |
|
"epoch": 40.8, |
|
"eval_loss": 0.9214913845062256, |
|
"eval_runtime": 177.444, |
|
"eval_samples_per_second": 175.379, |
|
"eval_steps_per_second": 10.961, |
|
"step": 357102 |
|
}, |
|
{ |
|
"epoch": 41.2, |
|
"learning_rate": 4.1760470695761454e-05, |
|
"loss": 0.9262, |
|
"step": 360603 |
|
}, |
|
{ |
|
"epoch": 41.2, |
|
"eval_loss": 0.9183996319770813, |
|
"eval_runtime": 177.4174, |
|
"eval_samples_per_second": 175.406, |
|
"eval_steps_per_second": 10.963, |
|
"step": 360603 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"learning_rate": 4.168047526562322e-05, |
|
"loss": 0.9247, |
|
"step": 364104 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"eval_loss": 0.921753466129303, |
|
"eval_runtime": 177.4686, |
|
"eval_samples_per_second": 175.355, |
|
"eval_steps_per_second": 10.96, |
|
"step": 364104 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 4.160047983548498e-05, |
|
"loss": 0.9265, |
|
"step": 367605 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 0.9194549918174744, |
|
"eval_runtime": 177.4828, |
|
"eval_samples_per_second": 175.341, |
|
"eval_steps_per_second": 10.959, |
|
"step": 367605 |
|
}, |
|
{ |
|
"epoch": 42.4, |
|
"learning_rate": 4.1520484405346736e-05, |
|
"loss": 0.9228, |
|
"step": 371106 |
|
}, |
|
{ |
|
"epoch": 42.4, |
|
"eval_loss": 0.9182046055793762, |
|
"eval_runtime": 177.5745, |
|
"eval_samples_per_second": 175.25, |
|
"eval_steps_per_second": 10.953, |
|
"step": 371106 |
|
}, |
|
{ |
|
"epoch": 42.8, |
|
"learning_rate": 4.1440488975208505e-05, |
|
"loss": 0.9239, |
|
"step": 374607 |
|
}, |
|
{ |
|
"epoch": 42.8, |
|
"eval_loss": 0.9209604859352112, |
|
"eval_runtime": 177.5469, |
|
"eval_samples_per_second": 175.278, |
|
"eval_steps_per_second": 10.955, |
|
"step": 374607 |
|
}, |
|
{ |
|
"epoch": 43.2, |
|
"learning_rate": 4.136049354507026e-05, |
|
"loss": 0.9234, |
|
"step": 378108 |
|
}, |
|
{ |
|
"epoch": 43.2, |
|
"eval_loss": 0.9235459566116333, |
|
"eval_runtime": 177.4486, |
|
"eval_samples_per_second": 175.375, |
|
"eval_steps_per_second": 10.961, |
|
"step": 378108 |
|
}, |
|
{ |
|
"epoch": 43.6, |
|
"learning_rate": 4.1280498114932024e-05, |
|
"loss": 0.9235, |
|
"step": 381609 |
|
}, |
|
{ |
|
"epoch": 43.6, |
|
"eval_loss": 0.9178280234336853, |
|
"eval_runtime": 177.5134, |
|
"eval_samples_per_second": 175.311, |
|
"eval_steps_per_second": 10.957, |
|
"step": 381609 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 4.1200502684793786e-05, |
|
"loss": 0.9251, |
|
"step": 385110 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 0.913722574710846, |
|
"eval_runtime": 177.523, |
|
"eval_samples_per_second": 175.301, |
|
"eval_steps_per_second": 10.956, |
|
"step": 385110 |
|
}, |
|
{ |
|
"epoch": 44.4, |
|
"learning_rate": 4.112050725465555e-05, |
|
"loss": 0.9188, |
|
"step": 388611 |
|
}, |
|
{ |
|
"epoch": 44.4, |
|
"eval_loss": 0.9187389612197876, |
|
"eval_runtime": 177.5478, |
|
"eval_samples_per_second": 175.277, |
|
"eval_steps_per_second": 10.955, |
|
"step": 388611 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"learning_rate": 4.1040511824517305e-05, |
|
"loss": 0.9215, |
|
"step": 392112 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"eval_loss": 0.9166533946990967, |
|
"eval_runtime": 177.4821, |
|
"eval_samples_per_second": 175.342, |
|
"eval_steps_per_second": 10.959, |
|
"step": 392112 |
|
}, |
|
{ |
|
"epoch": 45.2, |
|
"learning_rate": 4.0960516394379075e-05, |
|
"loss": 0.9203, |
|
"step": 395613 |
|
}, |
|
{ |
|
"epoch": 45.2, |
|
"eval_loss": 0.9170865416526794, |
|
"eval_runtime": 177.4835, |
|
"eval_samples_per_second": 175.34, |
|
"eval_steps_per_second": 10.959, |
|
"step": 395613 |
|
}, |
|
{ |
|
"epoch": 45.6, |
|
"learning_rate": 4.088052096424083e-05, |
|
"loss": 0.9206, |
|
"step": 399114 |
|
}, |
|
{ |
|
"epoch": 45.6, |
|
"eval_loss": 0.9114164113998413, |
|
"eval_runtime": 177.3931, |
|
"eval_samples_per_second": 175.43, |
|
"eval_steps_per_second": 10.964, |
|
"step": 399114 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 4.080052553410259e-05, |
|
"loss": 0.9226, |
|
"step": 402615 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 0.9157847762107849, |
|
"eval_runtime": 177.4174, |
|
"eval_samples_per_second": 175.406, |
|
"eval_steps_per_second": 10.963, |
|
"step": 402615 |
|
}, |
|
{ |
|
"epoch": 46.4, |
|
"learning_rate": 4.0720530103964356e-05, |
|
"loss": 0.9172, |
|
"step": 406116 |
|
}, |
|
{ |
|
"epoch": 46.4, |
|
"eval_loss": 0.9159908294677734, |
|
"eval_runtime": 177.5134, |
|
"eval_samples_per_second": 175.311, |
|
"eval_steps_per_second": 10.957, |
|
"step": 406116 |
|
}, |
|
{ |
|
"epoch": 46.8, |
|
"learning_rate": 4.064053467382612e-05, |
|
"loss": 0.918, |
|
"step": 409617 |
|
}, |
|
{ |
|
"epoch": 46.8, |
|
"eval_loss": 0.9126763343811035, |
|
"eval_runtime": 177.4115, |
|
"eval_samples_per_second": 175.411, |
|
"eval_steps_per_second": 10.963, |
|
"step": 409617 |
|
}, |
|
{ |
|
"epoch": 47.2, |
|
"learning_rate": 4.056053924368788e-05, |
|
"loss": 0.9177, |
|
"step": 413118 |
|
}, |
|
{ |
|
"epoch": 47.2, |
|
"eval_loss": 0.9146538376808167, |
|
"eval_runtime": 177.5159, |
|
"eval_samples_per_second": 175.308, |
|
"eval_steps_per_second": 10.957, |
|
"step": 413118 |
|
}, |
|
{ |
|
"epoch": 47.6, |
|
"learning_rate": 4.0480543813549644e-05, |
|
"loss": 0.918, |
|
"step": 416619 |
|
}, |
|
{ |
|
"epoch": 47.6, |
|
"eval_loss": 0.9144261479377747, |
|
"eval_runtime": 177.4299, |
|
"eval_samples_per_second": 175.393, |
|
"eval_steps_per_second": 10.962, |
|
"step": 416619 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 4.04005483834114e-05, |
|
"loss": 0.9186, |
|
"step": 420120 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 0.9152446389198303, |
|
"eval_runtime": 177.4928, |
|
"eval_samples_per_second": 175.331, |
|
"eval_steps_per_second": 10.958, |
|
"step": 420120 |
|
}, |
|
{ |
|
"epoch": 48.4, |
|
"learning_rate": 4.032055295327317e-05, |
|
"loss": 0.9145, |
|
"step": 423621 |
|
}, |
|
{ |
|
"epoch": 48.4, |
|
"eval_loss": 0.9146500825881958, |
|
"eval_runtime": 177.6162, |
|
"eval_samples_per_second": 175.209, |
|
"eval_steps_per_second": 10.951, |
|
"step": 423621 |
|
}, |
|
{ |
|
"epoch": 48.8, |
|
"learning_rate": 4.0240557523134926e-05, |
|
"loss": 0.9176, |
|
"step": 427122 |
|
}, |
|
{ |
|
"epoch": 48.8, |
|
"eval_loss": 0.91416335105896, |
|
"eval_runtime": 177.5943, |
|
"eval_samples_per_second": 175.231, |
|
"eval_steps_per_second": 10.952, |
|
"step": 427122 |
|
}, |
|
{ |
|
"epoch": 49.2, |
|
"learning_rate": 4.016056209299669e-05, |
|
"loss": 0.9146, |
|
"step": 430623 |
|
}, |
|
{ |
|
"epoch": 49.2, |
|
"eval_loss": 0.9143691658973694, |
|
"eval_runtime": 177.6008, |
|
"eval_samples_per_second": 175.224, |
|
"eval_steps_per_second": 10.952, |
|
"step": 430623 |
|
}, |
|
{ |
|
"epoch": 49.6, |
|
"learning_rate": 4.008056666285845e-05, |
|
"loss": 0.9163, |
|
"step": 434124 |
|
}, |
|
{ |
|
"epoch": 49.6, |
|
"eval_loss": 0.9136433601379395, |
|
"eval_runtime": 177.4417, |
|
"eval_samples_per_second": 175.382, |
|
"eval_steps_per_second": 10.961, |
|
"step": 434124 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 4.0000571232720214e-05, |
|
"loss": 0.9164, |
|
"step": 437625 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 0.9115509390830994, |
|
"eval_runtime": 178.4645, |
|
"eval_samples_per_second": 174.376, |
|
"eval_steps_per_second": 10.899, |
|
"step": 437625 |
|
}, |
|
{ |
|
"epoch": 50.4, |
|
"learning_rate": 3.992057580258197e-05, |
|
"loss": 0.9102, |
|
"step": 441126 |
|
}, |
|
{ |
|
"epoch": 50.4, |
|
"eval_loss": 0.9133750796318054, |
|
"eval_runtime": 178.2605, |
|
"eval_samples_per_second": 174.576, |
|
"eval_steps_per_second": 10.911, |
|
"step": 441126 |
|
}, |
|
{ |
|
"epoch": 50.8, |
|
"learning_rate": 3.984058037244374e-05, |
|
"loss": 0.9149, |
|
"step": 444627 |
|
}, |
|
{ |
|
"epoch": 50.8, |
|
"eval_loss": 0.9099620580673218, |
|
"eval_runtime": 177.7712, |
|
"eval_samples_per_second": 175.056, |
|
"eval_steps_per_second": 10.941, |
|
"step": 444627 |
|
}, |
|
{ |
|
"epoch": 51.2, |
|
"learning_rate": 3.9760584942305495e-05, |
|
"loss": 0.9155, |
|
"step": 448128 |
|
}, |
|
{ |
|
"epoch": 51.2, |
|
"eval_loss": 0.9076240658760071, |
|
"eval_runtime": 177.4509, |
|
"eval_samples_per_second": 175.372, |
|
"eval_steps_per_second": 10.961, |
|
"step": 448128 |
|
}, |
|
{ |
|
"epoch": 51.6, |
|
"learning_rate": 3.968058951216726e-05, |
|
"loss": 0.9128, |
|
"step": 451629 |
|
}, |
|
{ |
|
"epoch": 51.6, |
|
"eval_loss": 0.9097868204116821, |
|
"eval_runtime": 177.4759, |
|
"eval_samples_per_second": 175.348, |
|
"eval_steps_per_second": 10.959, |
|
"step": 451629 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 3.960059408202902e-05, |
|
"loss": 0.9128, |
|
"step": 455130 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_loss": 0.9142981171607971, |
|
"eval_runtime": 177.8314, |
|
"eval_samples_per_second": 174.997, |
|
"eval_steps_per_second": 10.937, |
|
"step": 455130 |
|
}, |
|
{ |
|
"epoch": 52.4, |
|
"learning_rate": 3.952059865189078e-05, |
|
"loss": 0.9098, |
|
"step": 458631 |
|
}, |
|
{ |
|
"epoch": 52.4, |
|
"eval_loss": 0.9127160310745239, |
|
"eval_runtime": 177.596, |
|
"eval_samples_per_second": 175.229, |
|
"eval_steps_per_second": 10.952, |
|
"step": 458631 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"learning_rate": 3.944060322175254e-05, |
|
"loss": 0.9121, |
|
"step": 462132 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"eval_loss": 0.911176323890686, |
|
"eval_runtime": 177.6183, |
|
"eval_samples_per_second": 175.207, |
|
"eval_steps_per_second": 10.95, |
|
"step": 462132 |
|
}, |
|
{ |
|
"epoch": 53.2, |
|
"learning_rate": 3.936060779161431e-05, |
|
"loss": 0.9128, |
|
"step": 465633 |
|
}, |
|
{ |
|
"epoch": 53.2, |
|
"eval_loss": 0.9088250398635864, |
|
"eval_runtime": 177.3192, |
|
"eval_samples_per_second": 175.503, |
|
"eval_steps_per_second": 10.969, |
|
"step": 465633 |
|
}, |
|
{ |
|
"epoch": 53.6, |
|
"learning_rate": 3.9280612361476065e-05, |
|
"loss": 0.9068, |
|
"step": 469134 |
|
}, |
|
{ |
|
"epoch": 53.6, |
|
"eval_loss": 0.9073150157928467, |
|
"eval_runtime": 177.6629, |
|
"eval_samples_per_second": 175.163, |
|
"eval_steps_per_second": 10.948, |
|
"step": 469134 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 3.920061693133783e-05, |
|
"loss": 0.9116, |
|
"step": 472635 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_loss": 0.9075030088424683, |
|
"eval_runtime": 177.5925, |
|
"eval_samples_per_second": 175.233, |
|
"eval_steps_per_second": 10.952, |
|
"step": 472635 |
|
}, |
|
{ |
|
"epoch": 54.4, |
|
"learning_rate": 3.912062150119959e-05, |
|
"loss": 0.9071, |
|
"step": 476136 |
|
}, |
|
{ |
|
"epoch": 54.4, |
|
"eval_loss": 0.9067005515098572, |
|
"eval_runtime": 177.5266, |
|
"eval_samples_per_second": 175.298, |
|
"eval_steps_per_second": 10.956, |
|
"step": 476136 |
|
}, |
|
{ |
|
"epoch": 54.8, |
|
"learning_rate": 3.904062607106135e-05, |
|
"loss": 0.9104, |
|
"step": 479637 |
|
}, |
|
{ |
|
"epoch": 54.8, |
|
"eval_loss": 0.9073493480682373, |
|
"eval_runtime": 177.4472, |
|
"eval_samples_per_second": 175.376, |
|
"eval_steps_per_second": 10.961, |
|
"step": 479637 |
|
}, |
|
{ |
|
"epoch": 55.2, |
|
"learning_rate": 3.8960630640923115e-05, |
|
"loss": 0.9054, |
|
"step": 483138 |
|
}, |
|
{ |
|
"epoch": 55.2, |
|
"eval_loss": 0.9102022051811218, |
|
"eval_runtime": 177.6725, |
|
"eval_samples_per_second": 175.154, |
|
"eval_steps_per_second": 10.947, |
|
"step": 483138 |
|
}, |
|
{ |
|
"epoch": 55.6, |
|
"learning_rate": 3.888063521078488e-05, |
|
"loss": 0.9097, |
|
"step": 486639 |
|
}, |
|
{ |
|
"epoch": 55.6, |
|
"eval_loss": 0.9054126143455505, |
|
"eval_runtime": 177.6461, |
|
"eval_samples_per_second": 175.18, |
|
"eval_steps_per_second": 10.949, |
|
"step": 486639 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 3.8800639780646634e-05, |
|
"loss": 0.9091, |
|
"step": 490140 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 0.9067865014076233, |
|
"eval_runtime": 177.5605, |
|
"eval_samples_per_second": 175.264, |
|
"eval_steps_per_second": 10.954, |
|
"step": 490140 |
|
}, |
|
{ |
|
"epoch": 56.4, |
|
"learning_rate": 3.8720644350508404e-05, |
|
"loss": 0.9047, |
|
"step": 493641 |
|
}, |
|
{ |
|
"epoch": 56.4, |
|
"eval_loss": 0.9057883024215698, |
|
"eval_runtime": 177.3995, |
|
"eval_samples_per_second": 175.423, |
|
"eval_steps_per_second": 10.964, |
|
"step": 493641 |
|
}, |
|
{ |
|
"epoch": 56.8, |
|
"learning_rate": 3.864064892037016e-05, |
|
"loss": 0.9076, |
|
"step": 497142 |
|
}, |
|
{ |
|
"epoch": 56.8, |
|
"eval_loss": 0.9096718430519104, |
|
"eval_runtime": 177.7746, |
|
"eval_samples_per_second": 175.053, |
|
"eval_steps_per_second": 10.941, |
|
"step": 497142 |
|
}, |
|
{ |
|
"epoch": 57.2, |
|
"learning_rate": 3.856065349023192e-05, |
|
"loss": 0.9052, |
|
"step": 500643 |
|
}, |
|
{ |
|
"epoch": 57.2, |
|
"eval_loss": 0.9001969695091248, |
|
"eval_runtime": 177.6819, |
|
"eval_samples_per_second": 175.144, |
|
"eval_steps_per_second": 10.947, |
|
"step": 500643 |
|
}, |
|
{ |
|
"epoch": 57.6, |
|
"learning_rate": 3.8480658060093685e-05, |
|
"loss": 0.9047, |
|
"step": 504144 |
|
}, |
|
{ |
|
"epoch": 57.6, |
|
"eval_loss": 0.9067962765693665, |
|
"eval_runtime": 177.5691, |
|
"eval_samples_per_second": 175.256, |
|
"eval_steps_per_second": 10.953, |
|
"step": 504144 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 3.840066262995545e-05, |
|
"loss": 0.9061, |
|
"step": 507645 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_loss": 0.9069446921348572, |
|
"eval_runtime": 177.6225, |
|
"eval_samples_per_second": 175.203, |
|
"eval_steps_per_second": 10.95, |
|
"step": 507645 |
|
}, |
|
{ |
|
"epoch": 58.4, |
|
"learning_rate": 3.8320667199817204e-05, |
|
"loss": 0.9004, |
|
"step": 511146 |
|
}, |
|
{ |
|
"epoch": 58.4, |
|
"eval_loss": 0.9056394696235657, |
|
"eval_runtime": 177.6384, |
|
"eval_samples_per_second": 175.187, |
|
"eval_steps_per_second": 10.949, |
|
"step": 511146 |
|
}, |
|
{ |
|
"epoch": 58.8, |
|
"learning_rate": 3.824067176967897e-05, |
|
"loss": 0.9056, |
|
"step": 514647 |
|
}, |
|
{ |
|
"epoch": 58.8, |
|
"eval_loss": 0.9041665196418762, |
|
"eval_runtime": 177.5114, |
|
"eval_samples_per_second": 175.313, |
|
"eval_steps_per_second": 10.957, |
|
"step": 514647 |
|
}, |
|
{ |
|
"epoch": 59.2, |
|
"learning_rate": 3.816067633954073e-05, |
|
"loss": 0.9038, |
|
"step": 518148 |
|
}, |
|
{ |
|
"epoch": 59.2, |
|
"eval_loss": 0.9006583094596863, |
|
"eval_runtime": 177.5976, |
|
"eval_samples_per_second": 175.228, |
|
"eval_steps_per_second": 10.952, |
|
"step": 518148 |
|
}, |
|
{ |
|
"epoch": 59.6, |
|
"learning_rate": 3.808068090940249e-05, |
|
"loss": 0.9033, |
|
"step": 521649 |
|
}, |
|
{ |
|
"epoch": 59.6, |
|
"eval_loss": 0.905081570148468, |
|
"eval_runtime": 177.5456, |
|
"eval_samples_per_second": 175.279, |
|
"eval_steps_per_second": 10.955, |
|
"step": 521649 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 3.8000685479264255e-05, |
|
"loss": 0.903, |
|
"step": 525150 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 0.9015125632286072, |
|
"eval_runtime": 177.5682, |
|
"eval_samples_per_second": 175.257, |
|
"eval_steps_per_second": 10.954, |
|
"step": 525150 |
|
}, |
|
{ |
|
"epoch": 60.4, |
|
"learning_rate": 3.792069004912602e-05, |
|
"loss": 0.9001, |
|
"step": 528651 |
|
}, |
|
{ |
|
"epoch": 60.4, |
|
"eval_loss": 0.9008970856666565, |
|
"eval_runtime": 177.5673, |
|
"eval_samples_per_second": 175.258, |
|
"eval_steps_per_second": 10.954, |
|
"step": 528651 |
|
}, |
|
{ |
|
"epoch": 60.8, |
|
"learning_rate": 3.784069461898777e-05, |
|
"loss": 0.9039, |
|
"step": 532152 |
|
}, |
|
{ |
|
"epoch": 60.8, |
|
"eval_loss": 0.8996139764785767, |
|
"eval_runtime": 177.5892, |
|
"eval_samples_per_second": 175.236, |
|
"eval_steps_per_second": 10.952, |
|
"step": 532152 |
|
}, |
|
{ |
|
"epoch": 61.2, |
|
"learning_rate": 3.776069918884954e-05, |
|
"loss": 0.9026, |
|
"step": 535653 |
|
}, |
|
{ |
|
"epoch": 61.2, |
|
"eval_loss": 0.9031299948692322, |
|
"eval_runtime": 177.6018, |
|
"eval_samples_per_second": 175.223, |
|
"eval_steps_per_second": 10.951, |
|
"step": 535653 |
|
}, |
|
{ |
|
"epoch": 61.6, |
|
"learning_rate": 3.76807037587113e-05, |
|
"loss": 0.901, |
|
"step": 539154 |
|
}, |
|
{ |
|
"epoch": 61.6, |
|
"eval_loss": 0.9000225067138672, |
|
"eval_runtime": 177.4957, |
|
"eval_samples_per_second": 175.328, |
|
"eval_steps_per_second": 10.958, |
|
"step": 539154 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"learning_rate": 3.760070832857306e-05, |
|
"loss": 0.9026, |
|
"step": 542655 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_loss": 0.8991663455963135, |
|
"eval_runtime": 177.5752, |
|
"eval_samples_per_second": 175.25, |
|
"eval_steps_per_second": 10.953, |
|
"step": 542655 |
|
}, |
|
{ |
|
"epoch": 62.4, |
|
"learning_rate": 3.7520712898434824e-05, |
|
"loss": 0.8988, |
|
"step": 546156 |
|
}, |
|
{ |
|
"epoch": 62.4, |
|
"eval_loss": 0.9008954763412476, |
|
"eval_runtime": 177.533, |
|
"eval_samples_per_second": 175.291, |
|
"eval_steps_per_second": 10.956, |
|
"step": 546156 |
|
}, |
|
{ |
|
"epoch": 62.8, |
|
"learning_rate": 3.744071746829659e-05, |
|
"loss": 0.8994, |
|
"step": 549657 |
|
}, |
|
{ |
|
"epoch": 62.8, |
|
"eval_loss": 0.9005922675132751, |
|
"eval_runtime": 177.6151, |
|
"eval_samples_per_second": 175.21, |
|
"eval_steps_per_second": 10.951, |
|
"step": 549657 |
|
}, |
|
{ |
|
"epoch": 63.2, |
|
"learning_rate": 3.736072203815835e-05, |
|
"loss": 0.8983, |
|
"step": 553158 |
|
}, |
|
{ |
|
"epoch": 63.2, |
|
"eval_loss": 0.9030284881591797, |
|
"eval_runtime": 177.5458, |
|
"eval_samples_per_second": 175.279, |
|
"eval_steps_per_second": 10.955, |
|
"step": 553158 |
|
}, |
|
{ |
|
"epoch": 63.6, |
|
"learning_rate": 3.728072660802011e-05, |
|
"loss": 0.9, |
|
"step": 556659 |
|
}, |
|
{ |
|
"epoch": 63.6, |
|
"eval_loss": 0.8991916179656982, |
|
"eval_runtime": 177.4727, |
|
"eval_samples_per_second": 175.351, |
|
"eval_steps_per_second": 10.959, |
|
"step": 556659 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 3.720073117788187e-05, |
|
"loss": 0.8994, |
|
"step": 560160 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_loss": 0.8985081315040588, |
|
"eval_runtime": 177.5551, |
|
"eval_samples_per_second": 175.27, |
|
"eval_steps_per_second": 10.954, |
|
"step": 560160 |
|
}, |
|
{ |
|
"epoch": 64.4, |
|
"learning_rate": 3.712073574774364e-05, |
|
"loss": 0.8953, |
|
"step": 563661 |
|
}, |
|
{ |
|
"epoch": 64.4, |
|
"eval_loss": 0.900728166103363, |
|
"eval_runtime": 177.7452, |
|
"eval_samples_per_second": 175.082, |
|
"eval_steps_per_second": 10.943, |
|
"step": 563661 |
|
}, |
|
{ |
|
"epoch": 64.8, |
|
"learning_rate": 3.7040740317605394e-05, |
|
"loss": 0.8991, |
|
"step": 567162 |
|
}, |
|
{ |
|
"epoch": 64.8, |
|
"eval_loss": 0.902450680732727, |
|
"eval_runtime": 177.6955, |
|
"eval_samples_per_second": 175.131, |
|
"eval_steps_per_second": 10.946, |
|
"step": 567162 |
|
}, |
|
{ |
|
"epoch": 65.2, |
|
"learning_rate": 3.6960744887467156e-05, |
|
"loss": 0.8964, |
|
"step": 570663 |
|
}, |
|
{ |
|
"epoch": 65.2, |
|
"eval_loss": 0.8983866572380066, |
|
"eval_runtime": 177.5924, |
|
"eval_samples_per_second": 175.233, |
|
"eval_steps_per_second": 10.952, |
|
"step": 570663 |
|
}, |
|
{ |
|
"epoch": 65.6, |
|
"learning_rate": 3.688074945732892e-05, |
|
"loss": 0.8959, |
|
"step": 574164 |
|
}, |
|
{ |
|
"epoch": 65.6, |
|
"eval_loss": 0.9006548523902893, |
|
"eval_runtime": 177.7333, |
|
"eval_samples_per_second": 175.094, |
|
"eval_steps_per_second": 10.943, |
|
"step": 574164 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"learning_rate": 3.680075402719068e-05, |
|
"loss": 0.8981, |
|
"step": 577665 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_loss": 0.8985300064086914, |
|
"eval_runtime": 177.5714, |
|
"eval_samples_per_second": 175.253, |
|
"eval_steps_per_second": 10.953, |
|
"step": 577665 |
|
}, |
|
{ |
|
"epoch": 66.4, |
|
"learning_rate": 3.672075859705244e-05, |
|
"loss": 0.8922, |
|
"step": 581166 |
|
}, |
|
{ |
|
"epoch": 66.4, |
|
"eval_loss": 0.8925117254257202, |
|
"eval_runtime": 177.5096, |
|
"eval_samples_per_second": 175.314, |
|
"eval_steps_per_second": 10.957, |
|
"step": 581166 |
|
}, |
|
{ |
|
"epoch": 66.8, |
|
"learning_rate": 3.664076316691421e-05, |
|
"loss": 0.896, |
|
"step": 584667 |
|
}, |
|
{ |
|
"epoch": 66.8, |
|
"eval_loss": 0.8964714407920837, |
|
"eval_runtime": 177.5858, |
|
"eval_samples_per_second": 175.239, |
|
"eval_steps_per_second": 10.952, |
|
"step": 584667 |
|
}, |
|
{ |
|
"epoch": 67.2, |
|
"learning_rate": 3.656076773677596e-05, |
|
"loss": 0.8942, |
|
"step": 588168 |
|
}, |
|
{ |
|
"epoch": 67.2, |
|
"eval_loss": 0.8949043154716492, |
|
"eval_runtime": 177.7347, |
|
"eval_samples_per_second": 175.092, |
|
"eval_steps_per_second": 10.943, |
|
"step": 588168 |
|
}, |
|
{ |
|
"epoch": 67.6, |
|
"learning_rate": 3.6480772306637726e-05, |
|
"loss": 0.8937, |
|
"step": 591669 |
|
}, |
|
{ |
|
"epoch": 67.6, |
|
"eval_loss": 0.8952317237854004, |
|
"eval_runtime": 177.6327, |
|
"eval_samples_per_second": 175.193, |
|
"eval_steps_per_second": 10.95, |
|
"step": 591669 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 3.640077687649949e-05, |
|
"loss": 0.8953, |
|
"step": 595170 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_loss": 0.894290566444397, |
|
"eval_runtime": 177.4358, |
|
"eval_samples_per_second": 175.387, |
|
"eval_steps_per_second": 10.962, |
|
"step": 595170 |
|
}, |
|
{ |
|
"epoch": 68.4, |
|
"learning_rate": 3.632078144636125e-05, |
|
"loss": 0.8907, |
|
"step": 598671 |
|
}, |
|
{ |
|
"epoch": 68.4, |
|
"eval_loss": 0.8952488899230957, |
|
"eval_runtime": 177.6568, |
|
"eval_samples_per_second": 175.169, |
|
"eval_steps_per_second": 10.948, |
|
"step": 598671 |
|
}, |
|
{ |
|
"epoch": 68.8, |
|
"learning_rate": 3.624078601622301e-05, |
|
"loss": 0.8919, |
|
"step": 602172 |
|
}, |
|
{ |
|
"epoch": 68.8, |
|
"eval_loss": 0.8931904435157776, |
|
"eval_runtime": 177.5937, |
|
"eval_samples_per_second": 175.231, |
|
"eval_steps_per_second": 10.952, |
|
"step": 602172 |
|
}, |
|
{ |
|
"epoch": 69.2, |
|
"learning_rate": 3.616079058608478e-05, |
|
"loss": 0.8933, |
|
"step": 605673 |
|
}, |
|
{ |
|
"epoch": 69.2, |
|
"eval_loss": 0.89773029088974, |
|
"eval_runtime": 177.7324, |
|
"eval_samples_per_second": 175.095, |
|
"eval_steps_per_second": 10.943, |
|
"step": 605673 |
|
}, |
|
{ |
|
"epoch": 69.6, |
|
"learning_rate": 3.608079515594653e-05, |
|
"loss": 0.891, |
|
"step": 609174 |
|
}, |
|
{ |
|
"epoch": 69.6, |
|
"eval_loss": 0.894548773765564, |
|
"eval_runtime": 177.5908, |
|
"eval_samples_per_second": 175.234, |
|
"eval_steps_per_second": 10.952, |
|
"step": 609174 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 3.6000799725808295e-05, |
|
"loss": 0.8932, |
|
"step": 612675 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_loss": 0.8936890959739685, |
|
"eval_runtime": 177.5711, |
|
"eval_samples_per_second": 175.254, |
|
"eval_steps_per_second": 10.953, |
|
"step": 612675 |
|
}, |
|
{ |
|
"epoch": 70.4, |
|
"learning_rate": 3.592080429567006e-05, |
|
"loss": 0.8882, |
|
"step": 616176 |
|
}, |
|
{ |
|
"epoch": 70.4, |
|
"eval_loss": 0.8970974683761597, |
|
"eval_runtime": 177.5388, |
|
"eval_samples_per_second": 175.286, |
|
"eval_steps_per_second": 10.955, |
|
"step": 616176 |
|
}, |
|
{ |
|
"epoch": 70.8, |
|
"learning_rate": 3.584080886553182e-05, |
|
"loss": 0.8907, |
|
"step": 619677 |
|
}, |
|
{ |
|
"epoch": 70.8, |
|
"eval_loss": 0.8894772529602051, |
|
"eval_runtime": 178.5288, |
|
"eval_samples_per_second": 174.314, |
|
"eval_steps_per_second": 10.895, |
|
"step": 619677 |
|
}, |
|
{ |
|
"epoch": 71.2, |
|
"learning_rate": 3.5760813435393584e-05, |
|
"loss": 0.8893, |
|
"step": 623178 |
|
}, |
|
{ |
|
"epoch": 71.2, |
|
"eval_loss": 0.8943666219711304, |
|
"eval_runtime": 177.452, |
|
"eval_samples_per_second": 175.371, |
|
"eval_steps_per_second": 10.961, |
|
"step": 623178 |
|
}, |
|
{ |
|
"epoch": 71.6, |
|
"learning_rate": 3.5680818005255346e-05, |
|
"loss": 0.8883, |
|
"step": 626679 |
|
}, |
|
{ |
|
"epoch": 71.6, |
|
"eval_loss": 0.892691969871521, |
|
"eval_runtime": 177.2653, |
|
"eval_samples_per_second": 175.556, |
|
"eval_steps_per_second": 10.972, |
|
"step": 626679 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 3.56008225751171e-05, |
|
"loss": 0.8917, |
|
"step": 630180 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_loss": 0.890504777431488, |
|
"eval_runtime": 177.5314, |
|
"eval_samples_per_second": 175.293, |
|
"eval_steps_per_second": 10.956, |
|
"step": 630180 |
|
}, |
|
{ |
|
"epoch": 72.4, |
|
"learning_rate": 3.552082714497887e-05, |
|
"loss": 0.8862, |
|
"step": 633681 |
|
}, |
|
{ |
|
"epoch": 72.4, |
|
"eval_loss": 0.8900084495544434, |
|
"eval_runtime": 177.3085, |
|
"eval_samples_per_second": 175.513, |
|
"eval_steps_per_second": 10.97, |
|
"step": 633681 |
|
}, |
|
{ |
|
"epoch": 72.8, |
|
"learning_rate": 3.544083171484063e-05, |
|
"loss": 0.8886, |
|
"step": 637182 |
|
}, |
|
{ |
|
"epoch": 72.8, |
|
"eval_loss": 0.8953748941421509, |
|
"eval_runtime": 177.334, |
|
"eval_samples_per_second": 175.488, |
|
"eval_steps_per_second": 10.968, |
|
"step": 637182 |
|
}, |
|
{ |
|
"epoch": 73.2, |
|
"learning_rate": 3.536083628470239e-05, |
|
"loss": 0.8874, |
|
"step": 640683 |
|
}, |
|
{ |
|
"epoch": 73.2, |
|
"eval_loss": 0.8892679810523987, |
|
"eval_runtime": 177.3431, |
|
"eval_samples_per_second": 175.479, |
|
"eval_steps_per_second": 10.967, |
|
"step": 640683 |
|
}, |
|
{ |
|
"epoch": 73.6, |
|
"learning_rate": 3.528084085456415e-05, |
|
"loss": 0.8866, |
|
"step": 644184 |
|
}, |
|
{ |
|
"epoch": 73.6, |
|
"eval_loss": 0.8939085602760315, |
|
"eval_runtime": 177.3428, |
|
"eval_samples_per_second": 175.479, |
|
"eval_steps_per_second": 10.967, |
|
"step": 644184 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"learning_rate": 3.520084542442591e-05, |
|
"loss": 0.8893, |
|
"step": 647685 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_loss": 0.8904389142990112, |
|
"eval_runtime": 177.4175, |
|
"eval_samples_per_second": 175.406, |
|
"eval_steps_per_second": 10.963, |
|
"step": 647685 |
|
}, |
|
{ |
|
"epoch": 74.4, |
|
"learning_rate": 3.512084999428767e-05, |
|
"loss": 0.8838, |
|
"step": 651186 |
|
}, |
|
{ |
|
"epoch": 74.4, |
|
"eval_loss": 0.8905112743377686, |
|
"eval_runtime": 177.3904, |
|
"eval_samples_per_second": 175.432, |
|
"eval_steps_per_second": 10.965, |
|
"step": 651186 |
|
}, |
|
{ |
|
"epoch": 74.8, |
|
"learning_rate": 3.5040854564149435e-05, |
|
"loss": 0.8846, |
|
"step": 654687 |
|
}, |
|
{ |
|
"epoch": 74.8, |
|
"eval_loss": 0.8922948241233826, |
|
"eval_runtime": 177.434, |
|
"eval_samples_per_second": 175.389, |
|
"eval_steps_per_second": 10.962, |
|
"step": 654687 |
|
}, |
|
{ |
|
"epoch": 75.2, |
|
"learning_rate": 3.49608591340112e-05, |
|
"loss": 0.8862, |
|
"step": 658188 |
|
}, |
|
{ |
|
"epoch": 75.2, |
|
"eval_loss": 0.8935458660125732, |
|
"eval_runtime": 178.2544, |
|
"eval_samples_per_second": 174.582, |
|
"eval_steps_per_second": 10.911, |
|
"step": 658188 |
|
}, |
|
{ |
|
"epoch": 75.6, |
|
"learning_rate": 3.488086370387296e-05, |
|
"loss": 0.8832, |
|
"step": 661689 |
|
}, |
|
{ |
|
"epoch": 75.6, |
|
"eval_loss": 0.8909444808959961, |
|
"eval_runtime": 177.513, |
|
"eval_samples_per_second": 175.311, |
|
"eval_steps_per_second": 10.957, |
|
"step": 661689 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"learning_rate": 3.480086827373472e-05, |
|
"loss": 0.8843, |
|
"step": 665190 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_loss": 0.8939051032066345, |
|
"eval_runtime": 177.5511, |
|
"eval_samples_per_second": 175.273, |
|
"eval_steps_per_second": 10.955, |
|
"step": 665190 |
|
}, |
|
{ |
|
"epoch": 76.4, |
|
"learning_rate": 3.472087284359648e-05, |
|
"loss": 0.8819, |
|
"step": 668691 |
|
}, |
|
{ |
|
"epoch": 76.4, |
|
"eval_loss": 0.8863644599914551, |
|
"eval_runtime": 177.5405, |
|
"eval_samples_per_second": 175.284, |
|
"eval_steps_per_second": 10.955, |
|
"step": 668691 |
|
}, |
|
{ |
|
"epoch": 76.8, |
|
"learning_rate": 3.464087741345825e-05, |
|
"loss": 0.8848, |
|
"step": 672192 |
|
}, |
|
{ |
|
"epoch": 76.8, |
|
"eval_loss": 0.8871325850486755, |
|
"eval_runtime": 177.6785, |
|
"eval_samples_per_second": 175.148, |
|
"eval_steps_per_second": 10.947, |
|
"step": 672192 |
|
}, |
|
{ |
|
"epoch": 77.2, |
|
"learning_rate": 3.4560881983320004e-05, |
|
"loss": 0.8825, |
|
"step": 675693 |
|
}, |
|
{ |
|
"epoch": 77.2, |
|
"eval_loss": 0.8879318237304688, |
|
"eval_runtime": 177.5519, |
|
"eval_samples_per_second": 175.273, |
|
"eval_steps_per_second": 10.955, |
|
"step": 675693 |
|
}, |
|
{ |
|
"epoch": 77.6, |
|
"learning_rate": 3.448088655318177e-05, |
|
"loss": 0.8812, |
|
"step": 679194 |
|
}, |
|
{ |
|
"epoch": 77.6, |
|
"eval_loss": 0.88758385181427, |
|
"eval_runtime": 177.5541, |
|
"eval_samples_per_second": 175.271, |
|
"eval_steps_per_second": 10.954, |
|
"step": 679194 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"learning_rate": 3.440089112304353e-05, |
|
"loss": 0.8833, |
|
"step": 682695 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_loss": 0.8914999961853027, |
|
"eval_runtime": 178.2748, |
|
"eval_samples_per_second": 174.562, |
|
"eval_steps_per_second": 10.91, |
|
"step": 682695 |
|
}, |
|
{ |
|
"epoch": 78.4, |
|
"learning_rate": 3.432089569290529e-05, |
|
"loss": 0.8817, |
|
"step": 686196 |
|
}, |
|
{ |
|
"epoch": 78.4, |
|
"eval_loss": 0.8898407816886902, |
|
"eval_runtime": 178.1701, |
|
"eval_samples_per_second": 174.665, |
|
"eval_steps_per_second": 10.917, |
|
"step": 686196 |
|
}, |
|
{ |
|
"epoch": 78.8, |
|
"learning_rate": 3.424090026276705e-05, |
|
"loss": 0.8834, |
|
"step": 689697 |
|
}, |
|
{ |
|
"epoch": 78.8, |
|
"eval_loss": 0.8876122236251831, |
|
"eval_runtime": 178.2895, |
|
"eval_samples_per_second": 174.548, |
|
"eval_steps_per_second": 10.909, |
|
"step": 689697 |
|
}, |
|
{ |
|
"epoch": 79.2, |
|
"learning_rate": 3.416090483262882e-05, |
|
"loss": 0.8808, |
|
"step": 693198 |
|
}, |
|
{ |
|
"epoch": 79.2, |
|
"eval_loss": 0.8858633041381836, |
|
"eval_runtime": 177.9937, |
|
"eval_samples_per_second": 174.838, |
|
"eval_steps_per_second": 10.927, |
|
"step": 693198 |
|
}, |
|
{ |
|
"epoch": 79.6, |
|
"learning_rate": 3.4080909402490574e-05, |
|
"loss": 0.8801, |
|
"step": 696699 |
|
}, |
|
{ |
|
"epoch": 79.6, |
|
"eval_loss": 0.8899697661399841, |
|
"eval_runtime": 178.1844, |
|
"eval_samples_per_second": 174.651, |
|
"eval_steps_per_second": 10.916, |
|
"step": 696699 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 3.4000913972352336e-05, |
|
"loss": 0.8799, |
|
"step": 700200 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 0.887626051902771, |
|
"eval_runtime": 178.2075, |
|
"eval_samples_per_second": 174.628, |
|
"eval_steps_per_second": 10.914, |
|
"step": 700200 |
|
}, |
|
{ |
|
"epoch": 80.4, |
|
"learning_rate": 3.39209185422141e-05, |
|
"loss": 0.8774, |
|
"step": 703701 |
|
}, |
|
{ |
|
"epoch": 80.4, |
|
"eval_loss": 0.8899440169334412, |
|
"eval_runtime": 178.0542, |
|
"eval_samples_per_second": 174.778, |
|
"eval_steps_per_second": 10.924, |
|
"step": 703701 |
|
}, |
|
{ |
|
"epoch": 80.8, |
|
"learning_rate": 3.384092311207586e-05, |
|
"loss": 0.8798, |
|
"step": 707202 |
|
}, |
|
{ |
|
"epoch": 80.8, |
|
"eval_loss": 0.8822316527366638, |
|
"eval_runtime": 178.2892, |
|
"eval_samples_per_second": 174.548, |
|
"eval_steps_per_second": 10.909, |
|
"step": 707202 |
|
}, |
|
{ |
|
"epoch": 81.2, |
|
"learning_rate": 3.376092768193762e-05, |
|
"loss": 0.8783, |
|
"step": 710703 |
|
}, |
|
{ |
|
"epoch": 81.2, |
|
"eval_loss": 0.889187216758728, |
|
"eval_runtime": 177.9073, |
|
"eval_samples_per_second": 174.923, |
|
"eval_steps_per_second": 10.933, |
|
"step": 710703 |
|
}, |
|
{ |
|
"epoch": 81.6, |
|
"learning_rate": 3.368093225179939e-05, |
|
"loss": 0.879, |
|
"step": 714204 |
|
}, |
|
{ |
|
"epoch": 81.6, |
|
"eval_loss": 0.8858596086502075, |
|
"eval_runtime": 177.9871, |
|
"eval_samples_per_second": 174.844, |
|
"eval_steps_per_second": 10.928, |
|
"step": 714204 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"learning_rate": 3.360093682166114e-05, |
|
"loss": 0.8805, |
|
"step": 717705 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_loss": 0.8828133940696716, |
|
"eval_runtime": 178.0841, |
|
"eval_samples_per_second": 174.749, |
|
"eval_steps_per_second": 10.922, |
|
"step": 717705 |
|
}, |
|
{ |
|
"epoch": 82.4, |
|
"learning_rate": 3.3520941391522906e-05, |
|
"loss": 0.8745, |
|
"step": 721206 |
|
}, |
|
{ |
|
"epoch": 82.4, |
|
"eval_loss": 0.8833040595054626, |
|
"eval_runtime": 178.3724, |
|
"eval_samples_per_second": 174.466, |
|
"eval_steps_per_second": 10.904, |
|
"step": 721206 |
|
}, |
|
{ |
|
"epoch": 82.8, |
|
"learning_rate": 3.344094596138467e-05, |
|
"loss": 0.8779, |
|
"step": 724707 |
|
}, |
|
{ |
|
"epoch": 82.8, |
|
"eval_loss": 0.8839106559753418, |
|
"eval_runtime": 178.0087, |
|
"eval_samples_per_second": 174.823, |
|
"eval_steps_per_second": 10.926, |
|
"step": 724707 |
|
}, |
|
{ |
|
"epoch": 83.2, |
|
"learning_rate": 3.336095053124643e-05, |
|
"loss": 0.8758, |
|
"step": 728208 |
|
}, |
|
{ |
|
"epoch": 83.2, |
|
"eval_loss": 0.8863241076469421, |
|
"eval_runtime": 178.0608, |
|
"eval_samples_per_second": 174.772, |
|
"eval_steps_per_second": 10.923, |
|
"step": 728208 |
|
}, |
|
{ |
|
"epoch": 83.6, |
|
"learning_rate": 3.3280955101108194e-05, |
|
"loss": 0.8747, |
|
"step": 731709 |
|
}, |
|
{ |
|
"epoch": 83.6, |
|
"eval_loss": 0.8861810564994812, |
|
"eval_runtime": 178.0994, |
|
"eval_samples_per_second": 174.734, |
|
"eval_steps_per_second": 10.921, |
|
"step": 731709 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"learning_rate": 3.320095967096996e-05, |
|
"loss": 0.8768, |
|
"step": 735210 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_loss": 0.8819161653518677, |
|
"eval_runtime": 178.1304, |
|
"eval_samples_per_second": 174.703, |
|
"eval_steps_per_second": 10.919, |
|
"step": 735210 |
|
}, |
|
{ |
|
"epoch": 84.4, |
|
"learning_rate": 3.312096424083171e-05, |
|
"loss": 0.8721, |
|
"step": 738711 |
|
}, |
|
{ |
|
"epoch": 84.4, |
|
"eval_loss": 0.8827645778656006, |
|
"eval_runtime": 178.1975, |
|
"eval_samples_per_second": 174.638, |
|
"eval_steps_per_second": 10.915, |
|
"step": 738711 |
|
}, |
|
{ |
|
"epoch": 84.8, |
|
"learning_rate": 3.304096881069348e-05, |
|
"loss": 0.8762, |
|
"step": 742212 |
|
}, |
|
{ |
|
"epoch": 84.8, |
|
"eval_loss": 0.8821256160736084, |
|
"eval_runtime": 178.2935, |
|
"eval_samples_per_second": 174.544, |
|
"eval_steps_per_second": 10.909, |
|
"step": 742212 |
|
}, |
|
{ |
|
"epoch": 85.2, |
|
"learning_rate": 3.296097338055524e-05, |
|
"loss": 0.8745, |
|
"step": 745713 |
|
}, |
|
{ |
|
"epoch": 85.2, |
|
"eval_loss": 0.8844091296195984, |
|
"eval_runtime": 178.1959, |
|
"eval_samples_per_second": 174.639, |
|
"eval_steps_per_second": 10.915, |
|
"step": 745713 |
|
}, |
|
{ |
|
"epoch": 85.6, |
|
"learning_rate": 3.2880977950417e-05, |
|
"loss": 0.8718, |
|
"step": 749214 |
|
}, |
|
{ |
|
"epoch": 85.6, |
|
"eval_loss": 0.8821405172348022, |
|
"eval_runtime": 178.5043, |
|
"eval_samples_per_second": 174.338, |
|
"eval_steps_per_second": 10.896, |
|
"step": 749214 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"learning_rate": 3.2800982520278764e-05, |
|
"loss": 0.8753, |
|
"step": 752715 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_loss": 0.8849576711654663, |
|
"eval_runtime": 178.2889, |
|
"eval_samples_per_second": 174.548, |
|
"eval_steps_per_second": 10.909, |
|
"step": 752715 |
|
}, |
|
{ |
|
"epoch": 86.4, |
|
"learning_rate": 3.2720987090140526e-05, |
|
"loss": 0.8702, |
|
"step": 756216 |
|
}, |
|
{ |
|
"epoch": 86.4, |
|
"eval_loss": 0.8836557865142822, |
|
"eval_runtime": 178.073, |
|
"eval_samples_per_second": 174.76, |
|
"eval_steps_per_second": 10.922, |
|
"step": 756216 |
|
}, |
|
{ |
|
"epoch": 86.8, |
|
"learning_rate": 3.264099166000228e-05, |
|
"loss": 0.8745, |
|
"step": 759717 |
|
}, |
|
{ |
|
"epoch": 86.8, |
|
"eval_loss": 0.8812028765678406, |
|
"eval_runtime": 178.1701, |
|
"eval_samples_per_second": 174.665, |
|
"eval_steps_per_second": 10.917, |
|
"step": 759717 |
|
}, |
|
{ |
|
"epoch": 87.2, |
|
"learning_rate": 3.256099622986405e-05, |
|
"loss": 0.8726, |
|
"step": 763218 |
|
}, |
|
{ |
|
"epoch": 87.2, |
|
"eval_loss": 0.8863665461540222, |
|
"eval_runtime": 178.1003, |
|
"eval_samples_per_second": 174.733, |
|
"eval_steps_per_second": 10.921, |
|
"step": 763218 |
|
}, |
|
{ |
|
"epoch": 87.59, |
|
"learning_rate": 3.248100079972581e-05, |
|
"loss": 0.8731, |
|
"step": 766719 |
|
}, |
|
{ |
|
"epoch": 87.59, |
|
"eval_loss": 0.8802133798599243, |
|
"eval_runtime": 177.7361, |
|
"eval_samples_per_second": 175.091, |
|
"eval_steps_per_second": 10.943, |
|
"step": 766719 |
|
}, |
|
{ |
|
"epoch": 87.99, |
|
"learning_rate": 3.240100536958757e-05, |
|
"loss": 0.8735, |
|
"step": 770220 |
|
}, |
|
{ |
|
"epoch": 87.99, |
|
"eval_loss": 0.87850421667099, |
|
"eval_runtime": 178.0197, |
|
"eval_samples_per_second": 174.812, |
|
"eval_steps_per_second": 10.926, |
|
"step": 770220 |
|
}, |
|
{ |
|
"epoch": 88.39, |
|
"learning_rate": 3.232100993944933e-05, |
|
"loss": 0.8684, |
|
"step": 773721 |
|
}, |
|
{ |
|
"epoch": 88.39, |
|
"eval_loss": 0.8797884583473206, |
|
"eval_runtime": 177.9064, |
|
"eval_samples_per_second": 174.923, |
|
"eval_steps_per_second": 10.933, |
|
"step": 773721 |
|
}, |
|
{ |
|
"epoch": 88.79, |
|
"learning_rate": 3.2241014509311096e-05, |
|
"loss": 0.8727, |
|
"step": 777222 |
|
}, |
|
{ |
|
"epoch": 88.79, |
|
"eval_loss": 0.8834565281867981, |
|
"eval_runtime": 178.2741, |
|
"eval_samples_per_second": 174.563, |
|
"eval_steps_per_second": 10.91, |
|
"step": 777222 |
|
}, |
|
{ |
|
"epoch": 89.19, |
|
"learning_rate": 3.216101907917285e-05, |
|
"loss": 0.8704, |
|
"step": 780723 |
|
}, |
|
{ |
|
"epoch": 89.19, |
|
"eval_loss": 0.8808990120887756, |
|
"eval_runtime": 177.9944, |
|
"eval_samples_per_second": 174.837, |
|
"eval_steps_per_second": 10.927, |
|
"step": 780723 |
|
}, |
|
{ |
|
"epoch": 89.59, |
|
"learning_rate": 3.208102364903462e-05, |
|
"loss": 0.8691, |
|
"step": 784224 |
|
}, |
|
{ |
|
"epoch": 89.59, |
|
"eval_loss": 0.8784425854682922, |
|
"eval_runtime": 177.87, |
|
"eval_samples_per_second": 174.959, |
|
"eval_steps_per_second": 10.935, |
|
"step": 784224 |
|
}, |
|
{ |
|
"epoch": 89.99, |
|
"learning_rate": 3.200102821889638e-05, |
|
"loss": 0.8718, |
|
"step": 787725 |
|
}, |
|
{ |
|
"epoch": 89.99, |
|
"eval_loss": 0.8777753114700317, |
|
"eval_runtime": 178.0354, |
|
"eval_samples_per_second": 174.797, |
|
"eval_steps_per_second": 10.925, |
|
"step": 787725 |
|
}, |
|
{ |
|
"epoch": 90.39, |
|
"learning_rate": 3.192103278875814e-05, |
|
"loss": 0.8685, |
|
"step": 791226 |
|
}, |
|
{ |
|
"epoch": 90.39, |
|
"eval_loss": 0.8818721175193787, |
|
"eval_runtime": 177.85, |
|
"eval_samples_per_second": 174.979, |
|
"eval_steps_per_second": 10.936, |
|
"step": 791226 |
|
}, |
|
{ |
|
"epoch": 90.79, |
|
"learning_rate": 3.18410373586199e-05, |
|
"loss": 0.8686, |
|
"step": 794727 |
|
}, |
|
{ |
|
"epoch": 90.79, |
|
"eval_loss": 0.878167450428009, |
|
"eval_runtime": 177.8069, |
|
"eval_samples_per_second": 175.021, |
|
"eval_steps_per_second": 10.939, |
|
"step": 794727 |
|
}, |
|
{ |
|
"epoch": 91.19, |
|
"learning_rate": 3.1761041928481665e-05, |
|
"loss": 0.8692, |
|
"step": 798228 |
|
}, |
|
{ |
|
"epoch": 91.19, |
|
"eval_loss": 0.8786768913269043, |
|
"eval_runtime": 177.7581, |
|
"eval_samples_per_second": 175.069, |
|
"eval_steps_per_second": 10.942, |
|
"step": 798228 |
|
}, |
|
{ |
|
"epoch": 91.59, |
|
"learning_rate": 3.168104649834343e-05, |
|
"loss": 0.8649, |
|
"step": 801729 |
|
}, |
|
{ |
|
"epoch": 91.59, |
|
"eval_loss": 0.8777763247489929, |
|
"eval_runtime": 178.0586, |
|
"eval_samples_per_second": 174.774, |
|
"eval_steps_per_second": 10.923, |
|
"step": 801729 |
|
}, |
|
{ |
|
"epoch": 91.99, |
|
"learning_rate": 3.160105106820519e-05, |
|
"loss": 0.8699, |
|
"step": 805230 |
|
}, |
|
{ |
|
"epoch": 91.99, |
|
"eval_loss": 0.8764858841896057, |
|
"eval_runtime": 178.3307, |
|
"eval_samples_per_second": 174.507, |
|
"eval_steps_per_second": 10.907, |
|
"step": 805230 |
|
}, |
|
{ |
|
"epoch": 92.39, |
|
"learning_rate": 3.152105563806695e-05, |
|
"loss": 0.8734, |
|
"step": 808731 |
|
}, |
|
{ |
|
"epoch": 92.39, |
|
"eval_loss": 0.8796523809432983, |
|
"eval_runtime": 178.1053, |
|
"eval_samples_per_second": 174.728, |
|
"eval_steps_per_second": 10.921, |
|
"step": 808731 |
|
}, |
|
{ |
|
"epoch": 92.79, |
|
"learning_rate": 3.1441060207928716e-05, |
|
"loss": 0.8762, |
|
"step": 812232 |
|
}, |
|
{ |
|
"epoch": 92.79, |
|
"eval_loss": 0.873812198638916, |
|
"eval_runtime": 177.9129, |
|
"eval_samples_per_second": 174.917, |
|
"eval_steps_per_second": 10.932, |
|
"step": 812232 |
|
}, |
|
{ |
|
"epoch": 93.19, |
|
"learning_rate": 3.136106477779047e-05, |
|
"loss": 0.875, |
|
"step": 815733 |
|
}, |
|
{ |
|
"epoch": 93.19, |
|
"eval_loss": 0.8739504814147949, |
|
"eval_runtime": 178.3464, |
|
"eval_samples_per_second": 174.492, |
|
"eval_steps_per_second": 10.906, |
|
"step": 815733 |
|
}, |
|
{ |
|
"epoch": 93.59, |
|
"learning_rate": 3.1281069347652235e-05, |
|
"loss": 0.8749, |
|
"step": 819234 |
|
}, |
|
{ |
|
"epoch": 93.59, |
|
"eval_loss": 0.877826988697052, |
|
"eval_runtime": 177.9396, |
|
"eval_samples_per_second": 174.891, |
|
"eval_steps_per_second": 10.931, |
|
"step": 819234 |
|
}, |
|
{ |
|
"epoch": 93.99, |
|
"learning_rate": 3.1201073917514e-05, |
|
"loss": 0.8766, |
|
"step": 822735 |
|
}, |
|
{ |
|
"epoch": 93.99, |
|
"eval_loss": 0.8729666471481323, |
|
"eval_runtime": 178.2108, |
|
"eval_samples_per_second": 174.625, |
|
"eval_steps_per_second": 10.914, |
|
"step": 822735 |
|
}, |
|
{ |
|
"epoch": 94.39, |
|
"learning_rate": 3.112107848737576e-05, |
|
"loss": 0.8726, |
|
"step": 826236 |
|
}, |
|
{ |
|
"epoch": 94.39, |
|
"eval_loss": 0.8738223910331726, |
|
"eval_runtime": 177.8319, |
|
"eval_samples_per_second": 174.997, |
|
"eval_steps_per_second": 10.937, |
|
"step": 826236 |
|
}, |
|
{ |
|
"epoch": 94.79, |
|
"learning_rate": 3.1041083057237516e-05, |
|
"loss": 0.8752, |
|
"step": 829737 |
|
}, |
|
{ |
|
"epoch": 94.79, |
|
"eval_loss": 0.8740049600601196, |
|
"eval_runtime": 178.0665, |
|
"eval_samples_per_second": 174.766, |
|
"eval_steps_per_second": 10.923, |
|
"step": 829737 |
|
}, |
|
{ |
|
"epoch": 95.19, |
|
"learning_rate": 3.0961087627099286e-05, |
|
"loss": 0.8737, |
|
"step": 833238 |
|
}, |
|
{ |
|
"epoch": 95.19, |
|
"eval_loss": 0.871174693107605, |
|
"eval_runtime": 177.988, |
|
"eval_samples_per_second": 174.843, |
|
"eval_steps_per_second": 10.928, |
|
"step": 833238 |
|
}, |
|
{ |
|
"epoch": 95.59, |
|
"learning_rate": 3.088109219696104e-05, |
|
"loss": 0.8723, |
|
"step": 836739 |
|
}, |
|
{ |
|
"epoch": 95.59, |
|
"eval_loss": 0.8785658478736877, |
|
"eval_runtime": 178.1263, |
|
"eval_samples_per_second": 174.707, |
|
"eval_steps_per_second": 10.919, |
|
"step": 836739 |
|
}, |
|
{ |
|
"epoch": 95.99, |
|
"learning_rate": 3.0801096766822805e-05, |
|
"loss": 0.8749, |
|
"step": 840240 |
|
}, |
|
{ |
|
"epoch": 95.99, |
|
"eval_loss": 0.8714969158172607, |
|
"eval_runtime": 178.1533, |
|
"eval_samples_per_second": 174.681, |
|
"eval_steps_per_second": 10.918, |
|
"step": 840240 |
|
}, |
|
{ |
|
"epoch": 96.39, |
|
"learning_rate": 3.072110133668457e-05, |
|
"loss": 0.8707, |
|
"step": 843741 |
|
}, |
|
{ |
|
"epoch": 96.39, |
|
"eval_loss": 0.8721190094947815, |
|
"eval_runtime": 177.9697, |
|
"eval_samples_per_second": 174.861, |
|
"eval_steps_per_second": 10.929, |
|
"step": 843741 |
|
}, |
|
{ |
|
"epoch": 96.79, |
|
"learning_rate": 3.064110590654633e-05, |
|
"loss": 0.8718, |
|
"step": 847242 |
|
}, |
|
{ |
|
"epoch": 96.79, |
|
"eval_loss": 0.8713057637214661, |
|
"eval_runtime": 178.2884, |
|
"eval_samples_per_second": 174.549, |
|
"eval_steps_per_second": 10.909, |
|
"step": 847242 |
|
}, |
|
{ |
|
"epoch": 97.19, |
|
"learning_rate": 3.0561110476408086e-05, |
|
"loss": 0.8731, |
|
"step": 850743 |
|
}, |
|
{ |
|
"epoch": 97.19, |
|
"eval_loss": 0.8710973858833313, |
|
"eval_runtime": 178.0006, |
|
"eval_samples_per_second": 174.831, |
|
"eval_steps_per_second": 10.927, |
|
"step": 850743 |
|
}, |
|
{ |
|
"epoch": 97.59, |
|
"learning_rate": 3.0481115046269852e-05, |
|
"loss": 0.872, |
|
"step": 854244 |
|
}, |
|
{ |
|
"epoch": 97.59, |
|
"eval_loss": 0.8724552989006042, |
|
"eval_runtime": 177.9577, |
|
"eval_samples_per_second": 174.873, |
|
"eval_steps_per_second": 10.93, |
|
"step": 854244 |
|
}, |
|
{ |
|
"epoch": 97.99, |
|
"learning_rate": 3.040111961613161e-05, |
|
"loss": 0.872, |
|
"step": 857745 |
|
}, |
|
{ |
|
"epoch": 97.99, |
|
"eval_loss": 0.8701831698417664, |
|
"eval_runtime": 177.9492, |
|
"eval_samples_per_second": 174.881, |
|
"eval_steps_per_second": 10.93, |
|
"step": 857745 |
|
}, |
|
{ |
|
"epoch": 98.39, |
|
"learning_rate": 3.0321124185993377e-05, |
|
"loss": 0.8688, |
|
"step": 861246 |
|
}, |
|
{ |
|
"epoch": 98.39, |
|
"eval_loss": 0.868754506111145, |
|
"eval_runtime": 178.1695, |
|
"eval_samples_per_second": 174.665, |
|
"eval_steps_per_second": 10.917, |
|
"step": 861246 |
|
}, |
|
{ |
|
"epoch": 98.79, |
|
"learning_rate": 3.0241128755855137e-05, |
|
"loss": 0.8712, |
|
"step": 864747 |
|
}, |
|
{ |
|
"epoch": 98.79, |
|
"eval_loss": 0.8745043873786926, |
|
"eval_runtime": 178.0858, |
|
"eval_samples_per_second": 174.747, |
|
"eval_steps_per_second": 10.922, |
|
"step": 864747 |
|
}, |
|
{ |
|
"epoch": 99.19, |
|
"learning_rate": 3.01611333257169e-05, |
|
"loss": 0.8691, |
|
"step": 868248 |
|
}, |
|
{ |
|
"epoch": 99.19, |
|
"eval_loss": 0.8719183802604675, |
|
"eval_runtime": 177.9082, |
|
"eval_samples_per_second": 174.922, |
|
"eval_steps_per_second": 10.933, |
|
"step": 868248 |
|
}, |
|
{ |
|
"epoch": 99.59, |
|
"learning_rate": 3.008113789557866e-05, |
|
"loss": 0.8693, |
|
"step": 871749 |
|
}, |
|
{ |
|
"epoch": 99.59, |
|
"eval_loss": 0.8701997399330139, |
|
"eval_runtime": 177.8864, |
|
"eval_samples_per_second": 174.943, |
|
"eval_steps_per_second": 10.934, |
|
"step": 871749 |
|
}, |
|
{ |
|
"epoch": 99.99, |
|
"learning_rate": 3.0001142465440425e-05, |
|
"loss": 0.8692, |
|
"step": 875250 |
|
}, |
|
{ |
|
"epoch": 99.99, |
|
"eval_loss": 0.8659647107124329, |
|
"eval_runtime": 177.9913, |
|
"eval_samples_per_second": 174.84, |
|
"eval_steps_per_second": 10.927, |
|
"step": 875250 |
|
}, |
|
{ |
|
"epoch": 100.39, |
|
"learning_rate": 2.9921147035302184e-05, |
|
"loss": 0.8646, |
|
"step": 878751 |
|
}, |
|
{ |
|
"epoch": 100.39, |
|
"eval_loss": 0.86882084608078, |
|
"eval_runtime": 177.9376, |
|
"eval_samples_per_second": 174.893, |
|
"eval_steps_per_second": 10.931, |
|
"step": 878751 |
|
}, |
|
{ |
|
"epoch": 100.79, |
|
"learning_rate": 2.9841151605163947e-05, |
|
"loss": 0.8698, |
|
"step": 882252 |
|
}, |
|
{ |
|
"epoch": 100.79, |
|
"eval_loss": 0.8718056082725525, |
|
"eval_runtime": 178.2288, |
|
"eval_samples_per_second": 174.607, |
|
"eval_steps_per_second": 10.913, |
|
"step": 882252 |
|
}, |
|
{ |
|
"epoch": 101.19, |
|
"learning_rate": 2.9761156175025706e-05, |
|
"loss": 0.8675, |
|
"step": 885753 |
|
}, |
|
{ |
|
"epoch": 101.19, |
|
"eval_loss": 0.8696756362915039, |
|
"eval_runtime": 178.0145, |
|
"eval_samples_per_second": 174.817, |
|
"eval_steps_per_second": 10.926, |
|
"step": 885753 |
|
}, |
|
{ |
|
"epoch": 101.59, |
|
"learning_rate": 2.968116074488747e-05, |
|
"loss": 0.8668, |
|
"step": 889254 |
|
}, |
|
{ |
|
"epoch": 101.59, |
|
"eval_loss": 0.8669666051864624, |
|
"eval_runtime": 177.9105, |
|
"eval_samples_per_second": 174.919, |
|
"eval_steps_per_second": 10.932, |
|
"step": 889254 |
|
}, |
|
{ |
|
"epoch": 101.99, |
|
"learning_rate": 2.960116531474923e-05, |
|
"loss": 0.8676, |
|
"step": 892755 |
|
}, |
|
{ |
|
"epoch": 101.99, |
|
"eval_loss": 0.8685517311096191, |
|
"eval_runtime": 178.0845, |
|
"eval_samples_per_second": 174.749, |
|
"eval_steps_per_second": 10.922, |
|
"step": 892755 |
|
}, |
|
{ |
|
"epoch": 102.39, |
|
"learning_rate": 2.9521169884610994e-05, |
|
"loss": 0.8645, |
|
"step": 896256 |
|
}, |
|
{ |
|
"epoch": 102.39, |
|
"eval_loss": 0.8676366209983826, |
|
"eval_runtime": 177.8814, |
|
"eval_samples_per_second": 174.948, |
|
"eval_steps_per_second": 10.934, |
|
"step": 896256 |
|
}, |
|
{ |
|
"epoch": 102.79, |
|
"learning_rate": 2.9441174454472754e-05, |
|
"loss": 0.8652, |
|
"step": 899757 |
|
}, |
|
{ |
|
"epoch": 102.79, |
|
"eval_loss": 0.8648022413253784, |
|
"eval_runtime": 177.9084, |
|
"eval_samples_per_second": 174.921, |
|
"eval_steps_per_second": 10.933, |
|
"step": 899757 |
|
}, |
|
{ |
|
"epoch": 103.19, |
|
"learning_rate": 2.9361179024334517e-05, |
|
"loss": 0.8646, |
|
"step": 903258 |
|
}, |
|
{ |
|
"epoch": 103.19, |
|
"eval_loss": 0.8692647814750671, |
|
"eval_runtime": 177.9213, |
|
"eval_samples_per_second": 174.909, |
|
"eval_steps_per_second": 10.932, |
|
"step": 903258 |
|
}, |
|
{ |
|
"epoch": 103.59, |
|
"learning_rate": 2.9281183594196276e-05, |
|
"loss": 0.8634, |
|
"step": 906759 |
|
}, |
|
{ |
|
"epoch": 103.59, |
|
"eval_loss": 0.8666937351226807, |
|
"eval_runtime": 178.0016, |
|
"eval_samples_per_second": 174.83, |
|
"eval_steps_per_second": 10.927, |
|
"step": 906759 |
|
}, |
|
{ |
|
"epoch": 103.99, |
|
"learning_rate": 2.9201188164058042e-05, |
|
"loss": 0.8678, |
|
"step": 910260 |
|
}, |
|
{ |
|
"epoch": 103.99, |
|
"eval_loss": 0.8698520660400391, |
|
"eval_runtime": 178.1156, |
|
"eval_samples_per_second": 174.718, |
|
"eval_steps_per_second": 10.92, |
|
"step": 910260 |
|
}, |
|
{ |
|
"epoch": 104.39, |
|
"learning_rate": 2.91211927339198e-05, |
|
"loss": 0.8616, |
|
"step": 913761 |
|
}, |
|
{ |
|
"epoch": 104.39, |
|
"eval_loss": 0.864092230796814, |
|
"eval_runtime": 177.9776, |
|
"eval_samples_per_second": 174.853, |
|
"eval_steps_per_second": 10.928, |
|
"step": 913761 |
|
}, |
|
{ |
|
"epoch": 104.79, |
|
"learning_rate": 2.9041197303781564e-05, |
|
"loss": 0.8631, |
|
"step": 917262 |
|
}, |
|
{ |
|
"epoch": 104.79, |
|
"eval_loss": 0.8643292188644409, |
|
"eval_runtime": 178.3388, |
|
"eval_samples_per_second": 174.499, |
|
"eval_steps_per_second": 10.906, |
|
"step": 917262 |
|
}, |
|
{ |
|
"epoch": 105.19, |
|
"learning_rate": 2.8961201873643323e-05, |
|
"loss": 0.8643, |
|
"step": 920763 |
|
}, |
|
{ |
|
"epoch": 105.19, |
|
"eval_loss": 0.867784857749939, |
|
"eval_runtime": 178.0618, |
|
"eval_samples_per_second": 174.771, |
|
"eval_steps_per_second": 10.923, |
|
"step": 920763 |
|
}, |
|
{ |
|
"epoch": 105.59, |
|
"learning_rate": 2.8881206443505086e-05, |
|
"loss": 0.8616, |
|
"step": 924264 |
|
}, |
|
{ |
|
"epoch": 105.59, |
|
"eval_loss": 0.8667683601379395, |
|
"eval_runtime": 177.9577, |
|
"eval_samples_per_second": 174.873, |
|
"eval_steps_per_second": 10.93, |
|
"step": 924264 |
|
}, |
|
{ |
|
"epoch": 105.99, |
|
"learning_rate": 2.8801211013366845e-05, |
|
"loss": 0.8644, |
|
"step": 927765 |
|
}, |
|
{ |
|
"epoch": 105.99, |
|
"eval_loss": 0.8647730946540833, |
|
"eval_runtime": 178.1529, |
|
"eval_samples_per_second": 174.681, |
|
"eval_steps_per_second": 10.918, |
|
"step": 927765 |
|
}, |
|
{ |
|
"epoch": 106.39, |
|
"learning_rate": 2.872121558322861e-05, |
|
"loss": 0.859, |
|
"step": 931266 |
|
}, |
|
{ |
|
"epoch": 106.39, |
|
"eval_loss": 0.8621995449066162, |
|
"eval_runtime": 178.0998, |
|
"eval_samples_per_second": 174.733, |
|
"eval_steps_per_second": 10.921, |
|
"step": 931266 |
|
}, |
|
{ |
|
"epoch": 106.79, |
|
"learning_rate": 2.864122015309037e-05, |
|
"loss": 0.8611, |
|
"step": 934767 |
|
}, |
|
{ |
|
"epoch": 106.79, |
|
"eval_loss": 0.8664916753768921, |
|
"eval_runtime": 178.2296, |
|
"eval_samples_per_second": 174.606, |
|
"eval_steps_per_second": 10.913, |
|
"step": 934767 |
|
}, |
|
{ |
|
"epoch": 107.19, |
|
"learning_rate": 2.8561224722952134e-05, |
|
"loss": 0.8608, |
|
"step": 938268 |
|
}, |
|
{ |
|
"epoch": 107.19, |
|
"eval_loss": 0.8675068020820618, |
|
"eval_runtime": 178.0022, |
|
"eval_samples_per_second": 174.829, |
|
"eval_steps_per_second": 10.927, |
|
"step": 938268 |
|
}, |
|
{ |
|
"epoch": 107.59, |
|
"learning_rate": 2.8481229292813893e-05, |
|
"loss": 0.8596, |
|
"step": 941769 |
|
}, |
|
{ |
|
"epoch": 107.59, |
|
"eval_loss": 0.8647910356521606, |
|
"eval_runtime": 177.9926, |
|
"eval_samples_per_second": 174.839, |
|
"eval_steps_per_second": 10.927, |
|
"step": 941769 |
|
}, |
|
{ |
|
"epoch": 107.99, |
|
"learning_rate": 2.840123386267566e-05, |
|
"loss": 0.863, |
|
"step": 945270 |
|
}, |
|
{ |
|
"epoch": 107.99, |
|
"eval_loss": 0.8669795393943787, |
|
"eval_runtime": 177.9511, |
|
"eval_samples_per_second": 174.879, |
|
"eval_steps_per_second": 10.93, |
|
"step": 945270 |
|
}, |
|
{ |
|
"epoch": 108.39, |
|
"learning_rate": 2.832123843253742e-05, |
|
"loss": 0.8589, |
|
"step": 948771 |
|
}, |
|
{ |
|
"epoch": 108.39, |
|
"eval_loss": 0.8626872301101685, |
|
"eval_runtime": 178.2769, |
|
"eval_samples_per_second": 174.56, |
|
"eval_steps_per_second": 10.91, |
|
"step": 948771 |
|
}, |
|
{ |
|
"epoch": 108.79, |
|
"learning_rate": 2.824124300239918e-05, |
|
"loss": 0.8605, |
|
"step": 952272 |
|
}, |
|
{ |
|
"epoch": 108.79, |
|
"eval_loss": 0.8620831370353699, |
|
"eval_runtime": 177.8108, |
|
"eval_samples_per_second": 175.018, |
|
"eval_steps_per_second": 10.939, |
|
"step": 952272 |
|
}, |
|
{ |
|
"epoch": 109.19, |
|
"learning_rate": 2.816124757226094e-05, |
|
"loss": 0.8578, |
|
"step": 955773 |
|
}, |
|
{ |
|
"epoch": 109.19, |
|
"eval_loss": 0.8637415170669556, |
|
"eval_runtime": 177.6547, |
|
"eval_samples_per_second": 175.171, |
|
"eval_steps_per_second": 10.948, |
|
"step": 955773 |
|
}, |
|
{ |
|
"epoch": 109.59, |
|
"learning_rate": 2.8081252142122706e-05, |
|
"loss": 0.8594, |
|
"step": 959274 |
|
}, |
|
{ |
|
"epoch": 109.59, |
|
"eval_loss": 0.8635110855102539, |
|
"eval_runtime": 177.7697, |
|
"eval_samples_per_second": 175.058, |
|
"eval_steps_per_second": 10.941, |
|
"step": 959274 |
|
}, |
|
{ |
|
"epoch": 109.99, |
|
"learning_rate": 2.8001256711984462e-05, |
|
"loss": 0.8619, |
|
"step": 962775 |
|
}, |
|
{ |
|
"epoch": 109.99, |
|
"eval_loss": 0.8643426299095154, |
|
"eval_runtime": 177.7171, |
|
"eval_samples_per_second": 175.11, |
|
"eval_steps_per_second": 10.944, |
|
"step": 962775 |
|
} |
|
], |
|
"max_steps": 2188250, |
|
"num_train_epochs": 250, |
|
"total_flos": 8.10959302547497e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|