|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.9957805907173, |
|
"global_step": 5900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7731608395748838, |
|
"eval_loss": 1.273674726486206, |
|
"eval_runtime": 27.8473, |
|
"eval_samples_per_second": 71.353, |
|
"eval_steps_per_second": 3.591, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7844064852916506, |
|
"eval_loss": 1.149436354637146, |
|
"eval_runtime": 27.7157, |
|
"eval_samples_per_second": 71.692, |
|
"eval_steps_per_second": 3.608, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7886655417073725, |
|
"eval_loss": 1.115924596786499, |
|
"eval_runtime": 27.7526, |
|
"eval_samples_per_second": 71.597, |
|
"eval_steps_per_second": 3.603, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7905562627668078, |
|
"eval_loss": 1.0954861640930176, |
|
"eval_runtime": 27.7193, |
|
"eval_samples_per_second": 71.683, |
|
"eval_steps_per_second": 3.608, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 4.5762711864406784e-05, |
|
"loss": 3.3542, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7918075332800458, |
|
"eval_loss": 1.0870832204818726, |
|
"eval_runtime": 27.7224, |
|
"eval_samples_per_second": 71.675, |
|
"eval_steps_per_second": 3.607, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7928476272240149, |
|
"eval_loss": 1.0806066989898682, |
|
"eval_runtime": 27.72, |
|
"eval_samples_per_second": 71.681, |
|
"eval_steps_per_second": 3.607, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7931219594027847, |
|
"eval_loss": 1.0786826610565186, |
|
"eval_runtime": 27.7079, |
|
"eval_samples_per_second": 71.712, |
|
"eval_steps_per_second": 3.609, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7938285127840768, |
|
"eval_loss": 1.0748920440673828, |
|
"eval_runtime": 27.7168, |
|
"eval_samples_per_second": 71.689, |
|
"eval_steps_per_second": 3.608, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 4.152542372881356e-05, |
|
"loss": 1.0048, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7937554891105914, |
|
"eval_loss": 1.0768133401870728, |
|
"eval_runtime": 27.8358, |
|
"eval_samples_per_second": 71.383, |
|
"eval_steps_per_second": 3.592, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.793783119689748, |
|
"eval_loss": 1.076497197151184, |
|
"eval_runtime": 27.7645, |
|
"eval_samples_per_second": 71.566, |
|
"eval_steps_per_second": 3.602, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7941719214107384, |
|
"eval_loss": 1.0760843753814697, |
|
"eval_runtime": 28.0837, |
|
"eval_samples_per_second": 70.753, |
|
"eval_steps_per_second": 3.561, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7943712563032259, |
|
"eval_loss": 1.078471064567566, |
|
"eval_runtime": 27.7304, |
|
"eval_samples_per_second": 71.654, |
|
"eval_steps_per_second": 3.606, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 3.728813559322034e-05, |
|
"loss": 0.9221, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7942034992154889, |
|
"eval_loss": 1.0854936838150024, |
|
"eval_runtime": 27.702, |
|
"eval_samples_per_second": 71.728, |
|
"eval_steps_per_second": 3.61, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7943969132695856, |
|
"eval_loss": 1.0861722230911255, |
|
"eval_runtime": 27.7005, |
|
"eval_samples_per_second": 71.731, |
|
"eval_steps_per_second": 3.61, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7947047968659029, |
|
"eval_loss": 1.0891741514205933, |
|
"eval_runtime": 27.7017, |
|
"eval_samples_per_second": 71.729, |
|
"eval_steps_per_second": 3.61, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7944600688790866, |
|
"eval_loss": 1.0921040773391724, |
|
"eval_runtime": 27.6967, |
|
"eval_samples_per_second": 71.741, |
|
"eval_steps_per_second": 3.611, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 3.305084745762712e-05, |
|
"loss": 0.8534, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7944245438487423, |
|
"eval_loss": 1.0993601083755493, |
|
"eval_runtime": 27.7053, |
|
"eval_samples_per_second": 71.719, |
|
"eval_steps_per_second": 3.609, |
|
"step": 2006 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7944719105558681, |
|
"eval_loss": 1.100709319114685, |
|
"eval_runtime": 27.7072, |
|
"eval_samples_per_second": 71.714, |
|
"eval_steps_per_second": 3.609, |
|
"step": 2124 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7942567867610053, |
|
"eval_loss": 1.1095192432403564, |
|
"eval_runtime": 27.6924, |
|
"eval_samples_per_second": 71.753, |
|
"eval_steps_per_second": 3.611, |
|
"step": 2242 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7939962698718138, |
|
"eval_loss": 1.1117533445358276, |
|
"eval_runtime": 27.706, |
|
"eval_samples_per_second": 71.717, |
|
"eval_steps_per_second": 3.609, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7940456101917365, |
|
"eval_loss": 1.1208868026733398, |
|
"eval_runtime": 27.7121, |
|
"eval_samples_per_second": 71.702, |
|
"eval_steps_per_second": 3.609, |
|
"step": 2478 |
|
}, |
|
{ |
|
"epoch": 21.19, |
|
"learning_rate": 2.88135593220339e-05, |
|
"loss": 0.7959, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7939271934239222, |
|
"eval_loss": 1.1249679327011108, |
|
"eval_runtime": 27.7085, |
|
"eval_samples_per_second": 71.711, |
|
"eval_steps_per_second": 3.609, |
|
"step": 2596 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7937811460769512, |
|
"eval_loss": 1.1323717832565308, |
|
"eval_runtime": 27.7079, |
|
"eval_samples_per_second": 71.712, |
|
"eval_steps_per_second": 3.609, |
|
"step": 2714 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7939074572959531, |
|
"eval_loss": 1.136080026626587, |
|
"eval_runtime": 27.704, |
|
"eval_samples_per_second": 71.723, |
|
"eval_steps_per_second": 3.61, |
|
"step": 2832 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7935601014436977, |
|
"eval_loss": 1.1441864967346191, |
|
"eval_runtime": 27.6972, |
|
"eval_samples_per_second": 71.74, |
|
"eval_steps_per_second": 3.61, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"learning_rate": 2.457627118644068e-05, |
|
"loss": 0.7458, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7934377374502897, |
|
"eval_loss": 1.1540361642837524, |
|
"eval_runtime": 27.7164, |
|
"eval_samples_per_second": 71.69, |
|
"eval_steps_per_second": 3.608, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7932680067497557, |
|
"eval_loss": 1.1579915285110474, |
|
"eval_runtime": 27.7102, |
|
"eval_samples_per_second": 71.706, |
|
"eval_steps_per_second": 3.609, |
|
"step": 3186 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.793429842999102, |
|
"eval_loss": 1.1653059720993042, |
|
"eval_runtime": 27.7095, |
|
"eval_samples_per_second": 71.708, |
|
"eval_steps_per_second": 3.609, |
|
"step": 3304 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7931002496620189, |
|
"eval_loss": 1.1742562055587769, |
|
"eval_runtime": 27.7109, |
|
"eval_samples_per_second": 71.705, |
|
"eval_steps_per_second": 3.609, |
|
"step": 3422 |
|
}, |
|
{ |
|
"epoch": 29.66, |
|
"learning_rate": 2.033898305084746e-05, |
|
"loss": 0.7034, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.7932344553322084, |
|
"eval_loss": 1.1726936101913452, |
|
"eval_runtime": 27.7127, |
|
"eval_samples_per_second": 71.7, |
|
"eval_steps_per_second": 3.608, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7930153843117519, |
|
"eval_loss": 1.1811208724975586, |
|
"eval_runtime": 27.7003, |
|
"eval_samples_per_second": 71.732, |
|
"eval_steps_per_second": 3.61, |
|
"step": 3658 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7930114370861581, |
|
"eval_loss": 1.1879879236221313, |
|
"eval_runtime": 27.7125, |
|
"eval_samples_per_second": 71.701, |
|
"eval_steps_per_second": 3.608, |
|
"step": 3776 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7927528938097634, |
|
"eval_loss": 1.1952810287475586, |
|
"eval_runtime": 27.7028, |
|
"eval_samples_per_second": 71.726, |
|
"eval_steps_per_second": 3.61, |
|
"step": 3894 |
|
}, |
|
{ |
|
"epoch": 33.89, |
|
"learning_rate": 1.6101694915254237e-05, |
|
"loss": 0.6688, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.7928298647088428, |
|
"eval_loss": 1.200770378112793, |
|
"eval_runtime": 27.7087, |
|
"eval_samples_per_second": 71.71, |
|
"eval_steps_per_second": 3.609, |
|
"step": 4012 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.7926384242675429, |
|
"eval_loss": 1.205856442451477, |
|
"eval_runtime": 27.7159, |
|
"eval_samples_per_second": 71.692, |
|
"eval_steps_per_second": 3.608, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7925101394357441, |
|
"eval_loss": 1.2133612632751465, |
|
"eval_runtime": 27.7077, |
|
"eval_samples_per_second": 71.713, |
|
"eval_steps_per_second": 3.609, |
|
"step": 4248 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7922279128057866, |
|
"eval_loss": 1.2220430374145508, |
|
"eval_runtime": 27.7023, |
|
"eval_samples_per_second": 71.727, |
|
"eval_steps_per_second": 3.61, |
|
"step": 4366 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.7922910684152876, |
|
"eval_loss": 1.2201595306396484, |
|
"eval_runtime": 27.6967, |
|
"eval_samples_per_second": 71.741, |
|
"eval_steps_per_second": 3.611, |
|
"step": 4484 |
|
}, |
|
{ |
|
"epoch": 38.14, |
|
"learning_rate": 1.1864406779661018e-05, |
|
"loss": 0.6427, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.7922890948024907, |
|
"eval_loss": 1.2266799211502075, |
|
"eval_runtime": 27.7206, |
|
"eval_samples_per_second": 71.68, |
|
"eval_steps_per_second": 3.607, |
|
"step": 4602 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7919200292094694, |
|
"eval_loss": 1.2350112199783325, |
|
"eval_runtime": 27.709, |
|
"eval_samples_per_second": 71.709, |
|
"eval_steps_per_second": 3.609, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.792095680748394, |
|
"eval_loss": 1.23201584815979, |
|
"eval_runtime": 27.6985, |
|
"eval_samples_per_second": 71.737, |
|
"eval_steps_per_second": 3.61, |
|
"step": 4838 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.791866741663953, |
|
"eval_loss": 1.2356910705566406, |
|
"eval_runtime": 27.7184, |
|
"eval_samples_per_second": 71.685, |
|
"eval_steps_per_second": 3.608, |
|
"step": 4956 |
|
}, |
|
{ |
|
"epoch": 42.37, |
|
"learning_rate": 7.627118644067798e-06, |
|
"loss": 0.6219, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.791991079270158, |
|
"eval_loss": 1.23865807056427, |
|
"eval_runtime": 27.7193, |
|
"eval_samples_per_second": 71.683, |
|
"eval_steps_per_second": 3.608, |
|
"step": 5074 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7919575278526106, |
|
"eval_loss": 1.2408578395843506, |
|
"eval_runtime": 27.719, |
|
"eval_samples_per_second": 71.684, |
|
"eval_steps_per_second": 3.608, |
|
"step": 5192 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.791886477791922, |
|
"eval_loss": 1.244328260421753, |
|
"eval_runtime": 27.7249, |
|
"eval_samples_per_second": 71.668, |
|
"eval_steps_per_second": 3.607, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.7917897707648737, |
|
"eval_loss": 1.2477593421936035, |
|
"eval_runtime": 27.6986, |
|
"eval_samples_per_second": 71.736, |
|
"eval_steps_per_second": 3.61, |
|
"step": 5428 |
|
}, |
|
{ |
|
"epoch": 46.61, |
|
"learning_rate": 3.3898305084745763e-06, |
|
"loss": 0.6097, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.7917739818624984, |
|
"eval_loss": 1.2487553358078003, |
|
"eval_runtime": 27.7426, |
|
"eval_samples_per_second": 71.623, |
|
"eval_steps_per_second": 3.605, |
|
"step": 5546 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7917522721217325, |
|
"eval_loss": 1.2487850189208984, |
|
"eval_runtime": 27.7184, |
|
"eval_samples_per_second": 71.685, |
|
"eval_steps_per_second": 3.608, |
|
"step": 5664 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.7917424040577479, |
|
"eval_loss": 1.2495468854904175, |
|
"eval_runtime": 27.7228, |
|
"eval_samples_per_second": 71.674, |
|
"eval_steps_per_second": 3.607, |
|
"step": 5782 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.791762140185717, |
|
"eval_loss": 1.2498760223388672, |
|
"eval_runtime": 27.7268, |
|
"eval_samples_per_second": 71.664, |
|
"eval_steps_per_second": 3.607, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 5900, |
|
"total_flos": 6.1908182433792e+16, |
|
"train_loss": 0.9664699619099245, |
|
"train_runtime": 20866.4149, |
|
"train_samples_per_second": 22.711, |
|
"train_steps_per_second": 0.283 |
|
} |
|
], |
|
"max_steps": 5900, |
|
"num_train_epochs": 50, |
|
"total_flos": 6.1908182433792e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|