{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.9957805907173, "global_step": 5900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.7731608395748838, "eval_loss": 1.273674726486206, "eval_runtime": 27.8473, "eval_samples_per_second": 71.353, "eval_steps_per_second": 3.591, "step": 118 }, { "epoch": 2.0, "eval_accuracy": 0.7844064852916506, "eval_loss": 1.149436354637146, "eval_runtime": 27.7157, "eval_samples_per_second": 71.692, "eval_steps_per_second": 3.608, "step": 236 }, { "epoch": 3.0, "eval_accuracy": 0.7886655417073725, "eval_loss": 1.115924596786499, "eval_runtime": 27.7526, "eval_samples_per_second": 71.597, "eval_steps_per_second": 3.603, "step": 354 }, { "epoch": 4.0, "eval_accuracy": 0.7905562627668078, "eval_loss": 1.0954861640930176, "eval_runtime": 27.7193, "eval_samples_per_second": 71.683, "eval_steps_per_second": 3.608, "step": 472 }, { "epoch": 4.24, "learning_rate": 4.5762711864406784e-05, "loss": 3.3542, "step": 500 }, { "epoch": 5.0, "eval_accuracy": 0.7918075332800458, "eval_loss": 1.0870832204818726, "eval_runtime": 27.7224, "eval_samples_per_second": 71.675, "eval_steps_per_second": 3.607, "step": 590 }, { "epoch": 6.0, "eval_accuracy": 0.7928476272240149, "eval_loss": 1.0806066989898682, "eval_runtime": 27.72, "eval_samples_per_second": 71.681, "eval_steps_per_second": 3.607, "step": 708 }, { "epoch": 7.0, "eval_accuracy": 0.7931219594027847, "eval_loss": 1.0786826610565186, "eval_runtime": 27.7079, "eval_samples_per_second": 71.712, "eval_steps_per_second": 3.609, "step": 826 }, { "epoch": 8.0, "eval_accuracy": 0.7938285127840768, "eval_loss": 1.0748920440673828, "eval_runtime": 27.7168, "eval_samples_per_second": 71.689, "eval_steps_per_second": 3.608, "step": 944 }, { "epoch": 8.47, "learning_rate": 4.152542372881356e-05, "loss": 1.0048, "step": 1000 }, { "epoch": 9.0, "eval_accuracy": 0.7937554891105914, "eval_loss": 1.0768133401870728, "eval_runtime": 27.8358, "eval_samples_per_second": 71.383, "eval_steps_per_second": 3.592, "step": 1062 }, { "epoch": 10.0, "eval_accuracy": 0.793783119689748, "eval_loss": 1.076497197151184, "eval_runtime": 27.7645, "eval_samples_per_second": 71.566, "eval_steps_per_second": 3.602, "step": 1180 }, { "epoch": 11.0, "eval_accuracy": 0.7941719214107384, "eval_loss": 1.0760843753814697, "eval_runtime": 28.0837, "eval_samples_per_second": 70.753, "eval_steps_per_second": 3.561, "step": 1298 }, { "epoch": 12.0, "eval_accuracy": 0.7943712563032259, "eval_loss": 1.078471064567566, "eval_runtime": 27.7304, "eval_samples_per_second": 71.654, "eval_steps_per_second": 3.606, "step": 1416 }, { "epoch": 12.71, "learning_rate": 3.728813559322034e-05, "loss": 0.9221, "step": 1500 }, { "epoch": 13.0, "eval_accuracy": 0.7942034992154889, "eval_loss": 1.0854936838150024, "eval_runtime": 27.702, "eval_samples_per_second": 71.728, "eval_steps_per_second": 3.61, "step": 1534 }, { "epoch": 14.0, "eval_accuracy": 0.7943969132695856, "eval_loss": 1.0861722230911255, "eval_runtime": 27.7005, "eval_samples_per_second": 71.731, "eval_steps_per_second": 3.61, "step": 1652 }, { "epoch": 15.0, "eval_accuracy": 0.7947047968659029, "eval_loss": 1.0891741514205933, "eval_runtime": 27.7017, "eval_samples_per_second": 71.729, "eval_steps_per_second": 3.61, "step": 1770 }, { "epoch": 16.0, "eval_accuracy": 0.7944600688790866, "eval_loss": 1.0921040773391724, "eval_runtime": 27.6967, "eval_samples_per_second": 71.741, "eval_steps_per_second": 3.611, "step": 1888 }, { "epoch": 16.95, "learning_rate": 3.305084745762712e-05, "loss": 0.8534, "step": 2000 }, { "epoch": 17.0, "eval_accuracy": 0.7944245438487423, "eval_loss": 1.0993601083755493, "eval_runtime": 27.7053, "eval_samples_per_second": 71.719, "eval_steps_per_second": 3.609, "step": 2006 }, { "epoch": 18.0, "eval_accuracy": 0.7944719105558681, "eval_loss": 1.100709319114685, "eval_runtime": 27.7072, "eval_samples_per_second": 71.714, "eval_steps_per_second": 3.609, "step": 2124 }, { "epoch": 19.0, "eval_accuracy": 0.7942567867610053, "eval_loss": 1.1095192432403564, "eval_runtime": 27.6924, "eval_samples_per_second": 71.753, "eval_steps_per_second": 3.611, "step": 2242 }, { "epoch": 20.0, "eval_accuracy": 0.7939962698718138, "eval_loss": 1.1117533445358276, "eval_runtime": 27.706, "eval_samples_per_second": 71.717, "eval_steps_per_second": 3.609, "step": 2360 }, { "epoch": 21.0, "eval_accuracy": 0.7940456101917365, "eval_loss": 1.1208868026733398, "eval_runtime": 27.7121, "eval_samples_per_second": 71.702, "eval_steps_per_second": 3.609, "step": 2478 }, { "epoch": 21.19, "learning_rate": 2.88135593220339e-05, "loss": 0.7959, "step": 2500 }, { "epoch": 22.0, "eval_accuracy": 0.7939271934239222, "eval_loss": 1.1249679327011108, "eval_runtime": 27.7085, "eval_samples_per_second": 71.711, "eval_steps_per_second": 3.609, "step": 2596 }, { "epoch": 23.0, "eval_accuracy": 0.7937811460769512, "eval_loss": 1.1323717832565308, "eval_runtime": 27.7079, "eval_samples_per_second": 71.712, "eval_steps_per_second": 3.609, "step": 2714 }, { "epoch": 24.0, "eval_accuracy": 0.7939074572959531, "eval_loss": 1.136080026626587, "eval_runtime": 27.704, "eval_samples_per_second": 71.723, "eval_steps_per_second": 3.61, "step": 2832 }, { "epoch": 25.0, "eval_accuracy": 0.7935601014436977, "eval_loss": 1.1441864967346191, "eval_runtime": 27.6972, "eval_samples_per_second": 71.74, "eval_steps_per_second": 3.61, "step": 2950 }, { "epoch": 25.42, "learning_rate": 2.457627118644068e-05, "loss": 0.7458, "step": 3000 }, { "epoch": 26.0, "eval_accuracy": 0.7934377374502897, "eval_loss": 1.1540361642837524, "eval_runtime": 27.7164, "eval_samples_per_second": 71.69, "eval_steps_per_second": 3.608, "step": 3068 }, { "epoch": 27.0, "eval_accuracy": 0.7932680067497557, "eval_loss": 1.1579915285110474, "eval_runtime": 27.7102, "eval_samples_per_second": 71.706, "eval_steps_per_second": 3.609, "step": 3186 }, { "epoch": 28.0, "eval_accuracy": 0.793429842999102, "eval_loss": 1.1653059720993042, "eval_runtime": 27.7095, "eval_samples_per_second": 71.708, "eval_steps_per_second": 3.609, "step": 3304 }, { "epoch": 29.0, "eval_accuracy": 0.7931002496620189, "eval_loss": 1.1742562055587769, "eval_runtime": 27.7109, "eval_samples_per_second": 71.705, "eval_steps_per_second": 3.609, "step": 3422 }, { "epoch": 29.66, "learning_rate": 2.033898305084746e-05, "loss": 0.7034, "step": 3500 }, { "epoch": 30.0, "eval_accuracy": 0.7932344553322084, "eval_loss": 1.1726936101913452, "eval_runtime": 27.7127, "eval_samples_per_second": 71.7, "eval_steps_per_second": 3.608, "step": 3540 }, { "epoch": 31.0, "eval_accuracy": 0.7930153843117519, "eval_loss": 1.1811208724975586, "eval_runtime": 27.7003, "eval_samples_per_second": 71.732, "eval_steps_per_second": 3.61, "step": 3658 }, { "epoch": 32.0, "eval_accuracy": 0.7930114370861581, "eval_loss": 1.1879879236221313, "eval_runtime": 27.7125, "eval_samples_per_second": 71.701, "eval_steps_per_second": 3.608, "step": 3776 }, { "epoch": 33.0, "eval_accuracy": 0.7927528938097634, "eval_loss": 1.1952810287475586, "eval_runtime": 27.7028, "eval_samples_per_second": 71.726, "eval_steps_per_second": 3.61, "step": 3894 }, { "epoch": 33.89, "learning_rate": 1.6101694915254237e-05, "loss": 0.6688, "step": 4000 }, { "epoch": 34.0, "eval_accuracy": 0.7928298647088428, "eval_loss": 1.200770378112793, "eval_runtime": 27.7087, "eval_samples_per_second": 71.71, "eval_steps_per_second": 3.609, "step": 4012 }, { "epoch": 35.0, "eval_accuracy": 0.7926384242675429, "eval_loss": 1.205856442451477, "eval_runtime": 27.7159, "eval_samples_per_second": 71.692, "eval_steps_per_second": 3.608, "step": 4130 }, { "epoch": 36.0, "eval_accuracy": 0.7925101394357441, "eval_loss": 1.2133612632751465, "eval_runtime": 27.7077, "eval_samples_per_second": 71.713, "eval_steps_per_second": 3.609, "step": 4248 }, { "epoch": 37.0, "eval_accuracy": 0.7922279128057866, "eval_loss": 1.2220430374145508, "eval_runtime": 27.7023, "eval_samples_per_second": 71.727, "eval_steps_per_second": 3.61, "step": 4366 }, { "epoch": 38.0, "eval_accuracy": 0.7922910684152876, "eval_loss": 1.2201595306396484, "eval_runtime": 27.6967, "eval_samples_per_second": 71.741, "eval_steps_per_second": 3.611, "step": 4484 }, { "epoch": 38.14, "learning_rate": 1.1864406779661018e-05, "loss": 0.6427, "step": 4500 }, { "epoch": 39.0, "eval_accuracy": 0.7922890948024907, "eval_loss": 1.2266799211502075, "eval_runtime": 27.7206, "eval_samples_per_second": 71.68, "eval_steps_per_second": 3.607, "step": 4602 }, { "epoch": 40.0, "eval_accuracy": 0.7919200292094694, "eval_loss": 1.2350112199783325, "eval_runtime": 27.709, "eval_samples_per_second": 71.709, "eval_steps_per_second": 3.609, "step": 4720 }, { "epoch": 41.0, "eval_accuracy": 0.792095680748394, "eval_loss": 1.23201584815979, "eval_runtime": 27.6985, "eval_samples_per_second": 71.737, "eval_steps_per_second": 3.61, "step": 4838 }, { "epoch": 42.0, "eval_accuracy": 0.791866741663953, "eval_loss": 1.2356910705566406, "eval_runtime": 27.7184, "eval_samples_per_second": 71.685, "eval_steps_per_second": 3.608, "step": 4956 }, { "epoch": 42.37, "learning_rate": 7.627118644067798e-06, "loss": 0.6219, "step": 5000 }, { "epoch": 43.0, "eval_accuracy": 0.791991079270158, "eval_loss": 1.23865807056427, "eval_runtime": 27.7193, "eval_samples_per_second": 71.683, "eval_steps_per_second": 3.608, "step": 5074 }, { "epoch": 44.0, "eval_accuracy": 0.7919575278526106, "eval_loss": 1.2408578395843506, "eval_runtime": 27.719, "eval_samples_per_second": 71.684, "eval_steps_per_second": 3.608, "step": 5192 }, { "epoch": 45.0, "eval_accuracy": 0.791886477791922, "eval_loss": 1.244328260421753, "eval_runtime": 27.7249, "eval_samples_per_second": 71.668, "eval_steps_per_second": 3.607, "step": 5310 }, { "epoch": 46.0, "eval_accuracy": 0.7917897707648737, "eval_loss": 1.2477593421936035, "eval_runtime": 27.6986, "eval_samples_per_second": 71.736, "eval_steps_per_second": 3.61, "step": 5428 }, { "epoch": 46.61, "learning_rate": 3.3898305084745763e-06, "loss": 0.6097, "step": 5500 }, { "epoch": 47.0, "eval_accuracy": 0.7917739818624984, "eval_loss": 1.2487553358078003, "eval_runtime": 27.7426, "eval_samples_per_second": 71.623, "eval_steps_per_second": 3.605, "step": 5546 }, { "epoch": 48.0, "eval_accuracy": 0.7917522721217325, "eval_loss": 1.2487850189208984, "eval_runtime": 27.7184, "eval_samples_per_second": 71.685, "eval_steps_per_second": 3.608, "step": 5664 }, { "epoch": 49.0, "eval_accuracy": 0.7917424040577479, "eval_loss": 1.2495468854904175, "eval_runtime": 27.7228, "eval_samples_per_second": 71.674, "eval_steps_per_second": 3.607, "step": 5782 }, { "epoch": 50.0, "eval_accuracy": 0.791762140185717, "eval_loss": 1.2498760223388672, "eval_runtime": 27.7268, "eval_samples_per_second": 71.664, "eval_steps_per_second": 3.607, "step": 5900 }, { "epoch": 50.0, "step": 5900, "total_flos": 6.1908182433792e+16, "train_loss": 0.9664699619099245, "train_runtime": 20866.4149, "train_samples_per_second": 22.711, "train_steps_per_second": 0.283 } ], "max_steps": 5900, "num_train_epochs": 50, "total_flos": 6.1908182433792e+16, "trial_name": null, "trial_params": null }