|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"global_step": 6480, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 5.858560562133789, |
|
"eval_runtime": 5.0347, |
|
"eval_samples_per_second": 2349.702, |
|
"eval_steps_per_second": 2.383, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 5.509454727172852, |
|
"eval_runtime": 4.6188, |
|
"eval_samples_per_second": 2561.29, |
|
"eval_steps_per_second": 2.598, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.0001, |
|
"loss": 6.688, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 5.397603511810303, |
|
"eval_runtime": 4.6394, |
|
"eval_samples_per_second": 2549.872, |
|
"eval_steps_per_second": 2.587, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 5.356218338012695, |
|
"eval_runtime": 4.6644, |
|
"eval_samples_per_second": 2536.231, |
|
"eval_steps_per_second": 2.573, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.0002, |
|
"loss": 5.3629, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 5.291167736053467, |
|
"eval_runtime": 4.6079, |
|
"eval_samples_per_second": 2567.32, |
|
"eval_steps_per_second": 2.604, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 5.238525867462158, |
|
"eval_runtime": 4.628, |
|
"eval_samples_per_second": 2556.182, |
|
"eval_steps_per_second": 2.593, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.00018175182481751826, |
|
"loss": 5.22, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 5.195512771606445, |
|
"eval_runtime": 4.6676, |
|
"eval_samples_per_second": 2534.484, |
|
"eval_steps_per_second": 2.571, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 5.178501605987549, |
|
"eval_runtime": 4.6613, |
|
"eval_samples_per_second": 2537.906, |
|
"eval_steps_per_second": 2.574, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 5.132693767547607, |
|
"eval_runtime": 4.6567, |
|
"eval_samples_per_second": 2540.411, |
|
"eval_steps_per_second": 2.577, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 0.0001635036496350365, |
|
"loss": 5.1248, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 5.1242523193359375, |
|
"eval_runtime": 4.6307, |
|
"eval_samples_per_second": 2554.696, |
|
"eval_steps_per_second": 2.591, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 5.0888824462890625, |
|
"eval_runtime": 4.6284, |
|
"eval_samples_per_second": 2555.934, |
|
"eval_steps_per_second": 2.593, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 11.57, |
|
"learning_rate": 0.00014525547445255475, |
|
"loss": 5.0591, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 5.073211669921875, |
|
"eval_runtime": 4.7033, |
|
"eval_samples_per_second": 2515.279, |
|
"eval_steps_per_second": 2.551, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 5.041653633117676, |
|
"eval_runtime": 4.6143, |
|
"eval_samples_per_second": 2563.795, |
|
"eval_steps_per_second": 2.601, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 0.000127007299270073, |
|
"loss": 5.0094, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 5.038762092590332, |
|
"eval_runtime": 4.6173, |
|
"eval_samples_per_second": 2562.095, |
|
"eval_steps_per_second": 2.599, |
|
"step": 3024 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 4.9298810958862305, |
|
"eval_runtime": 4.5819, |
|
"eval_samples_per_second": 2581.88, |
|
"eval_steps_per_second": 2.619, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 4.299057960510254, |
|
"eval_runtime": 4.6127, |
|
"eval_samples_per_second": 2564.64, |
|
"eval_steps_per_second": 2.601, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 0.00010875912408759123, |
|
"loss": 4.7527, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 3.654055118560791, |
|
"eval_runtime": 4.5888, |
|
"eval_samples_per_second": 2578.023, |
|
"eval_steps_per_second": 2.615, |
|
"step": 3672 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 2.7825753688812256, |
|
"eval_runtime": 4.6393, |
|
"eval_samples_per_second": 2549.967, |
|
"eval_steps_per_second": 2.587, |
|
"step": 3888 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 9.051094890510949e-05, |
|
"loss": 3.4431, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 2.2795569896698, |
|
"eval_runtime": 4.647, |
|
"eval_samples_per_second": 2545.709, |
|
"eval_steps_per_second": 2.582, |
|
"step": 4104 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 2.021310806274414, |
|
"eval_runtime": 4.6303, |
|
"eval_samples_per_second": 2554.922, |
|
"eval_steps_per_second": 2.592, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"learning_rate": 7.226277372262774e-05, |
|
"loss": 2.2803, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 1.8808549642562866, |
|
"eval_runtime": 4.6167, |
|
"eval_samples_per_second": 2562.421, |
|
"eval_steps_per_second": 2.599, |
|
"step": 4536 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 1.7615374326705933, |
|
"eval_runtime": 4.6259, |
|
"eval_samples_per_second": 2557.316, |
|
"eval_steps_per_second": 2.594, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 1.6925297975540161, |
|
"eval_runtime": 4.6567, |
|
"eval_samples_per_second": 2540.444, |
|
"eval_steps_per_second": 2.577, |
|
"step": 4968 |
|
}, |
|
{ |
|
"epoch": 23.15, |
|
"learning_rate": 5.401459854014599e-05, |
|
"loss": 1.8601, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 1.6204941272735596, |
|
"eval_runtime": 4.7455, |
|
"eval_samples_per_second": 2492.914, |
|
"eval_steps_per_second": 2.529, |
|
"step": 5184 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 1.5750768184661865, |
|
"eval_runtime": 4.6733, |
|
"eval_samples_per_second": 2531.399, |
|
"eval_steps_per_second": 2.568, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 25.46, |
|
"learning_rate": 3.5766423357664236e-05, |
|
"loss": 1.6697, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 1.5390561819076538, |
|
"eval_runtime": 4.6271, |
|
"eval_samples_per_second": 2556.66, |
|
"eval_steps_per_second": 2.593, |
|
"step": 5616 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 1.520015835762024, |
|
"eval_runtime": 4.6894, |
|
"eval_samples_per_second": 2522.687, |
|
"eval_steps_per_second": 2.559, |
|
"step": 5832 |
|
}, |
|
{ |
|
"epoch": 27.78, |
|
"learning_rate": 1.7518248175182482e-05, |
|
"loss": 1.5655, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 1.4865714311599731, |
|
"eval_runtime": 4.6379, |
|
"eval_samples_per_second": 2550.726, |
|
"eval_steps_per_second": 2.587, |
|
"step": 6048 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 1.4655797481536865, |
|
"eval_runtime": 4.6124, |
|
"eval_samples_per_second": 2564.811, |
|
"eval_steps_per_second": 2.602, |
|
"step": 6264 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 1.4627275466918945, |
|
"eval_runtime": 4.9986, |
|
"eval_samples_per_second": 2366.681, |
|
"eval_steps_per_second": 2.401, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 6480, |
|
"total_flos": 5.459072646905856e+16, |
|
"train_loss": 3.818386595926167, |
|
"train_runtime": 5045.7987, |
|
"train_samples_per_second": 1310.118, |
|
"train_steps_per_second": 1.284 |
|
} |
|
], |
|
"max_steps": 6480, |
|
"num_train_epochs": 30, |
|
"total_flos": 5.459072646905856e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|