|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.585148334503174, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5413, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7368421052631579, |
|
"eval_f1": 0.6531440162271805, |
|
"eval_loss": 0.49492353200912476, |
|
"eval_precision": 0.6762575228471654, |
|
"eval_recall": 0.6437988725222767, |
|
"eval_runtime": 1.7908, |
|
"eval_samples_per_second": 222.81, |
|
"eval_steps_per_second": 27.921, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.6699981689453125, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4306, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8245614035087719, |
|
"eval_f1": 0.8018811712631937, |
|
"eval_loss": 0.3954322338104248, |
|
"eval_precision": 0.7902494331065759, |
|
"eval_recall": 0.825877432260411, |
|
"eval_runtime": 1.8012, |
|
"eval_samples_per_second": 221.517, |
|
"eval_steps_per_second": 27.759, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.988449811935425, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3344, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8099340368327992, |
|
"eval_loss": 0.33969220519065857, |
|
"eval_precision": 0.8370422043948378, |
|
"eval_recall": 0.7928714311693035, |
|
"eval_runtime": 1.8043, |
|
"eval_samples_per_second": 221.138, |
|
"eval_steps_per_second": 27.712, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.6507456302642822, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2925, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.8058239259643384, |
|
"eval_loss": 0.32114899158477783, |
|
"eval_precision": 0.8264119601328903, |
|
"eval_recall": 0.7918257865066376, |
|
"eval_runtime": 1.7976, |
|
"eval_samples_per_second": 221.96, |
|
"eval_steps_per_second": 27.814, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.9831173419952393, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2794, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8365204824303285, |
|
"eval_loss": 0.30638387799263, |
|
"eval_precision": 0.8313636363636363, |
|
"eval_recall": 0.8424713584288053, |
|
"eval_runtime": 1.8026, |
|
"eval_samples_per_second": 221.352, |
|
"eval_steps_per_second": 27.738, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.175257444381714, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2464, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8453047161322887, |
|
"eval_loss": 0.2856903374195099, |
|
"eval_precision": 0.835631596867552, |
|
"eval_recall": 0.8585197308601564, |
|
"eval_runtime": 1.8028, |
|
"eval_samples_per_second": 221.325, |
|
"eval_steps_per_second": 27.735, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 3.4121947288513184, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2332, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8536612749904566, |
|
"eval_loss": 0.28461554646492004, |
|
"eval_precision": 0.8496330709593418, |
|
"eval_recall": 0.8581105655573741, |
|
"eval_runtime": 1.8, |
|
"eval_samples_per_second": 221.663, |
|
"eval_steps_per_second": 27.777, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.52989387512207, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2216, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8262195121951219, |
|
"eval_loss": 0.2906416654586792, |
|
"eval_precision": 0.8360165151709128, |
|
"eval_recall": 0.8181942171303873, |
|
"eval_runtime": 1.8056, |
|
"eval_samples_per_second": 220.983, |
|
"eval_steps_per_second": 27.692, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 8.88005256652832, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2123, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8386324041811847, |
|
"eval_loss": 0.2781093120574951, |
|
"eval_precision": 0.8487869670976828, |
|
"eval_recall": 0.830287324968176, |
|
"eval_runtime": 1.8334, |
|
"eval_samples_per_second": 217.634, |
|
"eval_steps_per_second": 27.272, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 2.3229901790618896, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1911, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8395201930584144, |
|
"eval_loss": 0.2896389365196228, |
|
"eval_precision": 0.8562091503267973, |
|
"eval_recall": 0.8270594653573378, |
|
"eval_runtime": 1.8031, |
|
"eval_samples_per_second": 221.286, |
|
"eval_steps_per_second": 27.73, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.697758138179779, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1878, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8495838108450449, |
|
"eval_loss": 0.28144657611846924, |
|
"eval_precision": 0.8478991596638655, |
|
"eval_recall": 0.8513366066557555, |
|
"eval_runtime": 1.8197, |
|
"eval_samples_per_second": 219.261, |
|
"eval_steps_per_second": 27.476, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.104419231414795, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1797, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8393634395533442, |
|
"eval_loss": 0.28299498558044434, |
|
"eval_precision": 0.8402278542707444, |
|
"eval_recall": 0.8385160938352427, |
|
"eval_runtime": 1.8064, |
|
"eval_samples_per_second": 220.882, |
|
"eval_steps_per_second": 27.679, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.5454129576683044, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1746, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8332268672959993, |
|
"eval_loss": 0.2900232672691345, |
|
"eval_precision": 0.849623687858982, |
|
"eval_recall": 0.8210129114384433, |
|
"eval_runtime": 1.8092, |
|
"eval_samples_per_second": 220.538, |
|
"eval_steps_per_second": 27.636, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 6.253016471862793, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1677, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.844327731092437, |
|
"eval_loss": 0.27983638644218445, |
|
"eval_precision": 0.8411320530352577, |
|
"eval_recall": 0.8477905073649754, |
|
"eval_runtime": 1.8045, |
|
"eval_samples_per_second": 221.113, |
|
"eval_steps_per_second": 27.708, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.9090829491615295, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1585, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8476882658063937, |
|
"eval_loss": 0.28229421377182007, |
|
"eval_precision": 0.8437296561519796, |
|
"eval_recall": 0.8520640116384797, |
|
"eval_runtime": 1.8037, |
|
"eval_samples_per_second": 221.212, |
|
"eval_steps_per_second": 27.721, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.9292184114456177, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1575, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8511422740140892, |
|
"eval_loss": 0.2815591096878052, |
|
"eval_precision": 0.8412921348314606, |
|
"eval_recall": 0.8645662847790507, |
|
"eval_runtime": 1.8158, |
|
"eval_samples_per_second": 219.738, |
|
"eval_steps_per_second": 27.536, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 4.232359409332275, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.146, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8359175094431583, |
|
"eval_loss": 0.30267345905303955, |
|
"eval_precision": 0.8376607470912432, |
|
"eval_recall": 0.8342425895617385, |
|
"eval_runtime": 1.8565, |
|
"eval_samples_per_second": 214.919, |
|
"eval_steps_per_second": 26.932, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.907904624938965, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1368, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8424651921601347, |
|
"eval_loss": 0.29612866044044495, |
|
"eval_precision": 0.8372140762463343, |
|
"eval_recall": 0.8485179123476996, |
|
"eval_runtime": 1.8011, |
|
"eval_samples_per_second": 221.536, |
|
"eval_steps_per_second": 27.761, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.788642168045044, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.133, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8333016825553572, |
|
"eval_loss": 0.30237412452697754, |
|
"eval_precision": 0.8341507249908615, |
|
"eval_recall": 0.8324695399163484, |
|
"eval_runtime": 1.8062, |
|
"eval_samples_per_second": 220.905, |
|
"eval_steps_per_second": 27.682, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.534482479095459, |
|
"learning_rate": 0.0, |
|
"loss": 0.1377, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8359175094431583, |
|
"eval_loss": 0.30191537737846375, |
|
"eval_precision": 0.8376607470912432, |
|
"eval_recall": 0.8342425895617385, |
|
"eval_runtime": 1.8052, |
|
"eval_samples_per_second": 221.033, |
|
"eval_steps_per_second": 27.698, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.2281026918379987, |
|
"train_runtime": 636.1271, |
|
"train_samples_per_second": 114.38, |
|
"train_steps_per_second": 3.836 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|