|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 73632, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3e-05, |
|
"loss": 0.7773, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.7983698420784513, |
|
"eval_loss": 0.5241270065307617, |
|
"eval_runtime": 42.0562, |
|
"eval_samples_per_second": 233.378, |
|
"eval_steps_per_second": 29.175, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9586958916180196e-05, |
|
"loss": 0.546, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.8193581253183903, |
|
"eval_loss": 0.46290943026542664, |
|
"eval_runtime": 42.1932, |
|
"eval_samples_per_second": 232.621, |
|
"eval_steps_per_second": 29.081, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.9173917832360394e-05, |
|
"loss": 0.5032, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.8274070300560367, |
|
"eval_loss": 0.4703749418258667, |
|
"eval_runtime": 42.1306, |
|
"eval_samples_per_second": 232.966, |
|
"eval_steps_per_second": 29.124, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.876087674854059e-05, |
|
"loss": 0.4711, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.8354559347936832, |
|
"eval_loss": 0.4382944405078888, |
|
"eval_runtime": 41.9141, |
|
"eval_samples_per_second": 234.169, |
|
"eval_steps_per_second": 29.274, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.8347835664720783e-05, |
|
"loss": 0.473, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.8304635761589404, |
|
"eval_loss": 0.4652259051799774, |
|
"eval_runtime": 42.5911, |
|
"eval_samples_per_second": 230.447, |
|
"eval_steps_per_second": 28.809, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.793479458090098e-05, |
|
"loss": 0.4619, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.8386143657666837, |
|
"eval_loss": 0.42338472604751587, |
|
"eval_runtime": 42.1336, |
|
"eval_samples_per_second": 232.949, |
|
"eval_steps_per_second": 29.122, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.7521753497081176e-05, |
|
"loss": 0.4542, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.8349465104431992, |
|
"eval_loss": 0.48245278000831604, |
|
"eval_runtime": 42.4819, |
|
"eval_samples_per_second": 231.04, |
|
"eval_steps_per_second": 28.883, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.7108712413261374e-05, |
|
"loss": 0.4468, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.8513499745287825, |
|
"eval_loss": 0.39848873019218445, |
|
"eval_runtime": 42.353, |
|
"eval_samples_per_second": 231.743, |
|
"eval_steps_per_second": 28.971, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.669567132944157e-05, |
|
"loss": 0.4288, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.8493122771268466, |
|
"eval_loss": 0.40836581587791443, |
|
"eval_runtime": 42.666, |
|
"eval_samples_per_second": 230.042, |
|
"eval_steps_per_second": 28.758, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.6282630245621764e-05, |
|
"loss": 0.4354, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.8532857870606215, |
|
"eval_loss": 0.3850448429584503, |
|
"eval_runtime": 42.6387, |
|
"eval_samples_per_second": 230.19, |
|
"eval_steps_per_second": 28.777, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.5869589161801962e-05, |
|
"loss": 0.423, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.8509424350483953, |
|
"eval_loss": 0.3855280578136444, |
|
"eval_runtime": 42.4952, |
|
"eval_samples_per_second": 230.967, |
|
"eval_steps_per_second": 28.874, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5456548077982157e-05, |
|
"loss": 0.4167, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.8513499745287825, |
|
"eval_loss": 0.41219788789749146, |
|
"eval_runtime": 42.4026, |
|
"eval_samples_per_second": 231.472, |
|
"eval_steps_per_second": 28.937, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.504350699416235e-05, |
|
"loss": 0.4129, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.8550178298522669, |
|
"eval_loss": 0.40088921785354614, |
|
"eval_runtime": 42.5886, |
|
"eval_samples_per_second": 230.461, |
|
"eval_steps_per_second": 28.811, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.463046591034255e-05, |
|
"loss": 0.4135, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.8544065206316862, |
|
"eval_loss": 0.4136492609977722, |
|
"eval_runtime": 42.4803, |
|
"eval_samples_per_second": 231.048, |
|
"eval_steps_per_second": 28.884, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.4217424826522744e-05, |
|
"loss": 0.4074, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.8595007641365258, |
|
"eval_loss": 0.38692933320999146, |
|
"eval_runtime": 42.276, |
|
"eval_samples_per_second": 232.165, |
|
"eval_steps_per_second": 29.024, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.3804383742702942e-05, |
|
"loss": 0.415, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.8516556291390729, |
|
"eval_loss": 0.39110422134399414, |
|
"eval_runtime": 42.4226, |
|
"eval_samples_per_second": 231.363, |
|
"eval_steps_per_second": 28.923, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.3391342658883137e-05, |
|
"loss": 0.4095, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.8592969943963321, |
|
"eval_loss": 0.38802793622016907, |
|
"eval_runtime": 42.6242, |
|
"eval_samples_per_second": 230.268, |
|
"eval_steps_per_second": 28.786, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.2978301575063332e-05, |
|
"loss": 0.4001, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.8586856851757514, |
|
"eval_loss": 0.3907186985015869, |
|
"eval_runtime": 42.4988, |
|
"eval_samples_per_second": 230.948, |
|
"eval_steps_per_second": 28.871, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.256526049124353e-05, |
|
"loss": 0.4069, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.8629648497198166, |
|
"eval_loss": 0.3686215281486511, |
|
"eval_runtime": 42.3356, |
|
"eval_samples_per_second": 231.838, |
|
"eval_steps_per_second": 28.983, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2.2152219407423725e-05, |
|
"loss": 0.3927, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.8592969943963321, |
|
"eval_loss": 0.4008384943008423, |
|
"eval_runtime": 42.4047, |
|
"eval_samples_per_second": 231.46, |
|
"eval_steps_per_second": 28.935, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.173917832360392e-05, |
|
"loss": 0.3958, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.8638818135506877, |
|
"eval_loss": 0.37160658836364746, |
|
"eval_runtime": 42.5674, |
|
"eval_samples_per_second": 230.575, |
|
"eval_steps_per_second": 28.825, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.1326137239784118e-05, |
|
"loss": 0.4016, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.8678553234844626, |
|
"eval_loss": 0.35941794514656067, |
|
"eval_runtime": 42.472, |
|
"eval_samples_per_second": 231.093, |
|
"eval_steps_per_second": 28.89, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.0913096155964313e-05, |
|
"loss": 0.3945, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.8678553234844626, |
|
"eval_loss": 0.359527587890625, |
|
"eval_runtime": 42.614, |
|
"eval_samples_per_second": 230.324, |
|
"eval_steps_per_second": 28.793, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.050005507214451e-05, |
|
"loss": 0.3932, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.8644931227712684, |
|
"eval_loss": 0.35774311423301697, |
|
"eval_runtime": 42.4615, |
|
"eval_samples_per_second": 231.151, |
|
"eval_steps_per_second": 28.897, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.0087013988324705e-05, |
|
"loss": 0.345, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.8698930208863984, |
|
"eval_loss": 0.40802302956581116, |
|
"eval_runtime": 42.6011, |
|
"eval_samples_per_second": 230.393, |
|
"eval_steps_per_second": 28.802, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.96739729045049e-05, |
|
"loss": 0.2885, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_accuracy": 0.8674477840040754, |
|
"eval_loss": 0.39192140102386475, |
|
"eval_runtime": 42.4976, |
|
"eval_samples_per_second": 230.954, |
|
"eval_steps_per_second": 28.872, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.92609318206851e-05, |
|
"loss": 0.2858, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.8651044319918492, |
|
"eval_loss": 0.4346281588077545, |
|
"eval_runtime": 42.3673, |
|
"eval_samples_per_second": 231.664, |
|
"eval_steps_per_second": 28.961, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.8847890736865293e-05, |
|
"loss": 0.2872, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy": 0.8674477840040754, |
|
"eval_loss": 0.41050681471824646, |
|
"eval_runtime": 42.4953, |
|
"eval_samples_per_second": 230.967, |
|
"eval_steps_per_second": 28.874, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.8434849653045488e-05, |
|
"loss": 0.3002, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.8708099847172694, |
|
"eval_loss": 0.4133119583129883, |
|
"eval_runtime": 42.4588, |
|
"eval_samples_per_second": 231.165, |
|
"eval_steps_per_second": 28.899, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.8021808569225686e-05, |
|
"loss": 0.2954, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.8667345899133979, |
|
"eval_loss": 0.406183123588562, |
|
"eval_runtime": 42.2388, |
|
"eval_samples_per_second": 232.369, |
|
"eval_steps_per_second": 29.049, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.760876748540588e-05, |
|
"loss": 0.2912, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.8708099847172694, |
|
"eval_loss": 0.397215336561203, |
|
"eval_runtime": 42.5529, |
|
"eval_samples_per_second": 230.654, |
|
"eval_steps_per_second": 28.835, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.7195726401586076e-05, |
|
"loss": 0.2958, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_accuracy": 0.8731533367294957, |
|
"eval_loss": 0.3713410496711731, |
|
"eval_runtime": 42.8662, |
|
"eval_samples_per_second": 228.968, |
|
"eval_steps_per_second": 28.624, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.6782685317766274e-05, |
|
"loss": 0.293, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_accuracy": 0.871523178807947, |
|
"eval_loss": 0.3716830015182495, |
|
"eval_runtime": 42.5402, |
|
"eval_samples_per_second": 230.723, |
|
"eval_steps_per_second": 28.843, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.636964423394647e-05, |
|
"loss": 0.3001, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_accuracy": 0.8716250636780438, |
|
"eval_loss": 0.3826219141483307, |
|
"eval_runtime": 42.576, |
|
"eval_samples_per_second": 230.529, |
|
"eval_steps_per_second": 28.819, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.5956603150126667e-05, |
|
"loss": 0.2864, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.8693835965359145, |
|
"eval_loss": 0.41551369428634644, |
|
"eval_runtime": 42.2749, |
|
"eval_samples_per_second": 232.171, |
|
"eval_steps_per_second": 29.024, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.554356206630686e-05, |
|
"loss": 0.2827, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.866632705043301, |
|
"eval_loss": 0.4223748743534088, |
|
"eval_runtime": 42.4098, |
|
"eval_samples_per_second": 231.432, |
|
"eval_steps_per_second": 28.932, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.5130520982487058e-05, |
|
"loss": 0.2836, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_accuracy": 0.8743759551706571, |
|
"eval_loss": 0.3832014799118042, |
|
"eval_runtime": 42.6737, |
|
"eval_samples_per_second": 230.001, |
|
"eval_steps_per_second": 28.753, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.4717479898667254e-05, |
|
"loss": 0.2844, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.8698930208863984, |
|
"eval_loss": 0.4178868234157562, |
|
"eval_runtime": 42.5391, |
|
"eval_samples_per_second": 230.729, |
|
"eval_steps_per_second": 28.844, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.430443881484745e-05, |
|
"loss": 0.2866, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_accuracy": 0.8680590932246561, |
|
"eval_loss": 0.3968764543533325, |
|
"eval_runtime": 42.6173, |
|
"eval_samples_per_second": 230.306, |
|
"eval_steps_per_second": 28.791, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.3891397731027647e-05, |
|
"loss": 0.2883, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.8682628629648497, |
|
"eval_loss": 0.4000142514705658, |
|
"eval_runtime": 42.4575, |
|
"eval_samples_per_second": 231.172, |
|
"eval_steps_per_second": 28.899, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.3478356647207844e-05, |
|
"loss": 0.2832, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_accuracy": 0.8687722873153336, |
|
"eval_loss": 0.3853473365306854, |
|
"eval_runtime": 42.6575, |
|
"eval_samples_per_second": 230.088, |
|
"eval_steps_per_second": 28.764, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.3065315563388038e-05, |
|
"loss": 0.2876, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_accuracy": 0.8676515537442689, |
|
"eval_loss": 0.39242836833000183, |
|
"eval_runtime": 42.1832, |
|
"eval_samples_per_second": 232.675, |
|
"eval_steps_per_second": 29.087, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 1.2652274479568235e-05, |
|
"loss": 0.2855, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.8719307182883341, |
|
"eval_loss": 0.4176536798477173, |
|
"eval_runtime": 42.3591, |
|
"eval_samples_per_second": 231.709, |
|
"eval_steps_per_second": 28.967, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.2239233395748431e-05, |
|
"loss": 0.2845, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.8724401426388181, |
|
"eval_loss": 0.38765597343444824, |
|
"eval_runtime": 42.3828, |
|
"eval_samples_per_second": 231.58, |
|
"eval_steps_per_second": 28.95, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.1826192311928628e-05, |
|
"loss": 0.2882, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.8713194090677534, |
|
"eval_loss": 0.3961141109466553, |
|
"eval_runtime": 42.7036, |
|
"eval_samples_per_second": 229.84, |
|
"eval_steps_per_second": 28.733, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.1413151228108823e-05, |
|
"loss": 0.2773, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.87396841569027, |
|
"eval_loss": 0.37906670570373535, |
|
"eval_runtime": 42.5306, |
|
"eval_samples_per_second": 230.775, |
|
"eval_steps_per_second": 28.85, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.1000110144289019e-05, |
|
"loss": 0.2767, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_accuracy": 0.8779419256240448, |
|
"eval_loss": 0.3877304494380951, |
|
"eval_runtime": 42.632, |
|
"eval_samples_per_second": 230.226, |
|
"eval_steps_per_second": 28.781, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.0587069060469215e-05, |
|
"loss": 0.2772, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_accuracy": 0.8689760570555273, |
|
"eval_loss": 0.402159720659256, |
|
"eval_runtime": 42.3751, |
|
"eval_samples_per_second": 231.622, |
|
"eval_steps_per_second": 28.956, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.017402797664941e-05, |
|
"loss": 0.2816, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8731533367294957, |
|
"eval_loss": 0.3836565613746643, |
|
"eval_runtime": 42.6247, |
|
"eval_samples_per_second": 230.265, |
|
"eval_steps_per_second": 28.786, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.760986892829607e-06, |
|
"loss": 0.2068, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.872032603158431, |
|
"eval_loss": 0.4643520712852478, |
|
"eval_runtime": 42.4953, |
|
"eval_samples_per_second": 230.967, |
|
"eval_steps_per_second": 28.874, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.347945809009803e-06, |
|
"loss": 0.1914, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_accuracy": 0.8743759551706571, |
|
"eval_loss": 0.49194175004959106, |
|
"eval_runtime": 42.669, |
|
"eval_samples_per_second": 230.027, |
|
"eval_steps_per_second": 28.756, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.93490472519e-06, |
|
"loss": 0.2, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_accuracy": 0.8701986754966887, |
|
"eval_loss": 0.4869604706764221, |
|
"eval_runtime": 42.5176, |
|
"eval_samples_per_second": 230.845, |
|
"eval_steps_per_second": 28.859, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 8.521863641370194e-06, |
|
"loss": 0.1904, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_accuracy": 0.8736627610799796, |
|
"eval_loss": 0.5038197636604309, |
|
"eval_runtime": 42.6855, |
|
"eval_samples_per_second": 229.937, |
|
"eval_steps_per_second": 28.745, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.10882255755039e-06, |
|
"loss": 0.1915, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.8711156393275599, |
|
"eval_loss": 0.5232452750205994, |
|
"eval_runtime": 42.4057, |
|
"eval_samples_per_second": 231.455, |
|
"eval_steps_per_second": 28.935, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.695781473730587e-06, |
|
"loss": 0.1956, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_accuracy": 0.8746816097809476, |
|
"eval_loss": 0.5192070603370667, |
|
"eval_runtime": 42.5451, |
|
"eval_samples_per_second": 230.697, |
|
"eval_steps_per_second": 28.84, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 7.282740389910783e-06, |
|
"loss": 0.1911, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_accuracy": 0.8761079979623025, |
|
"eval_loss": 0.5215316414833069, |
|
"eval_runtime": 42.8008, |
|
"eval_samples_per_second": 229.318, |
|
"eval_steps_per_second": 28.668, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.869699306090979e-06, |
|
"loss": 0.2053, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_accuracy": 0.8737646459500764, |
|
"eval_loss": 0.460406094789505, |
|
"eval_runtime": 42.6406, |
|
"eval_samples_per_second": 230.18, |
|
"eval_steps_per_second": 28.775, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.456658222271175e-06, |
|
"loss": 0.2008, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.871523178807947, |
|
"eval_loss": 0.5162080526351929, |
|
"eval_runtime": 42.396, |
|
"eval_samples_per_second": 231.508, |
|
"eval_steps_per_second": 28.941, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.043617138451371e-06, |
|
"loss": 0.1971, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.875394803871625, |
|
"eval_loss": 0.4885903000831604, |
|
"eval_runtime": 42.2887, |
|
"eval_samples_per_second": 232.095, |
|
"eval_steps_per_second": 29.015, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5.630576054631567e-06, |
|
"loss": 0.192, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_accuracy": 0.872542027508915, |
|
"eval_loss": 0.49207794666290283, |
|
"eval_runtime": 42.6387, |
|
"eval_samples_per_second": 230.19, |
|
"eval_steps_per_second": 28.777, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 5.217534970811764e-06, |
|
"loss": 0.1937, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_accuracy": 0.8763117677024962, |
|
"eval_loss": 0.4916786253452301, |
|
"eval_runtime": 42.3165, |
|
"eval_samples_per_second": 231.943, |
|
"eval_steps_per_second": 28.996, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.80449388699196e-06, |
|
"loss": 0.1931, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.8778400407539481, |
|
"eval_loss": 0.47893819212913513, |
|
"eval_runtime": 42.4257, |
|
"eval_samples_per_second": 231.346, |
|
"eval_steps_per_second": 28.921, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.391452803172156e-06, |
|
"loss": 0.1964, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_accuracy": 0.8721344880285278, |
|
"eval_loss": 0.49971821904182434, |
|
"eval_runtime": 42.5737, |
|
"eval_samples_per_second": 230.542, |
|
"eval_steps_per_second": 28.821, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.978411719352352e-06, |
|
"loss": 0.2008, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_accuracy": 0.8755985736118187, |
|
"eval_loss": 0.4747646749019623, |
|
"eval_runtime": 42.3736, |
|
"eval_samples_per_second": 231.63, |
|
"eval_steps_per_second": 28.957, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.5653706355325475e-06, |
|
"loss": 0.1962, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_accuracy": 0.876413652572593, |
|
"eval_loss": 0.4839693605899811, |
|
"eval_runtime": 42.5306, |
|
"eval_samples_per_second": 230.775, |
|
"eval_steps_per_second": 28.85, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.1523295517127435e-06, |
|
"loss": 0.2029, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_accuracy": 0.8767193071828834, |
|
"eval_loss": 0.48892590403556824, |
|
"eval_runtime": 42.2422, |
|
"eval_samples_per_second": 232.351, |
|
"eval_steps_per_second": 29.047, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.73928846789294e-06, |
|
"loss": 0.1927, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_accuracy": 0.8758023433520122, |
|
"eval_loss": 0.4820311963558197, |
|
"eval_runtime": 42.6573, |
|
"eval_samples_per_second": 230.089, |
|
"eval_steps_per_second": 28.764, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.326247384073136e-06, |
|
"loss": 0.1926, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_accuracy": 0.8762098828323994, |
|
"eval_loss": 0.48573482036590576, |
|
"eval_runtime": 42.7141, |
|
"eval_samples_per_second": 229.783, |
|
"eval_steps_per_second": 28.726, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.913206300253332e-06, |
|
"loss": 0.1919, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_accuracy": 0.8748853795211411, |
|
"eval_loss": 0.4835805296897888, |
|
"eval_runtime": 42.3433, |
|
"eval_samples_per_second": 231.796, |
|
"eval_steps_per_second": 28.977, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.500165216433528e-06, |
|
"loss": 0.1911, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_accuracy": 0.8741721854304636, |
|
"eval_loss": 0.48588985204696655, |
|
"eval_runtime": 42.2277, |
|
"eval_samples_per_second": 232.43, |
|
"eval_steps_per_second": 29.057, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.087124132613724e-06, |
|
"loss": 0.1897, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_accuracy": 0.8766174223127865, |
|
"eval_loss": 0.48527196049690247, |
|
"eval_runtime": 42.5077, |
|
"eval_samples_per_second": 230.899, |
|
"eval_steps_per_second": 28.865, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.740830487939201e-07, |
|
"loss": 0.186, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_accuracy": 0.8768211920529801, |
|
"eval_loss": 0.4946127235889435, |
|
"eval_runtime": 42.345, |
|
"eval_samples_per_second": 231.786, |
|
"eval_steps_per_second": 28.976, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.610419649741161e-07, |
|
"loss": 0.2011, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.8767193071828834, |
|
"eval_loss": 0.4851147532463074, |
|
"eval_runtime": 42.2673, |
|
"eval_samples_per_second": 232.213, |
|
"eval_steps_per_second": 29.03, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 73632, |
|
"total_flos": 7.803448964124365e+16, |
|
"train_loss": 0.3087343405972663, |
|
"train_runtime": 16507.3954, |
|
"train_samples_per_second": 71.368, |
|
"train_steps_per_second": 4.461 |
|
} |
|
], |
|
"max_steps": 73632, |
|
"num_train_epochs": 3, |
|
"total_flos": 7.803448964124365e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|