|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.11376428604126, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5647, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.706766917293233, |
|
"eval_f1": 0.6297262783854312, |
|
"eval_loss": 0.5166164636611938, |
|
"eval_precision": 0.6379598662207357, |
|
"eval_recall": 0.6250227314057101, |
|
"eval_runtime": 5.1423, |
|
"eval_samples_per_second": 77.591, |
|
"eval_steps_per_second": 9.723, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.067341327667236, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5067, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7343358395989975, |
|
"eval_f1": 0.6926129426129426, |
|
"eval_loss": 0.4954279363155365, |
|
"eval_precision": 0.6870370370370371, |
|
"eval_recall": 0.7020367339516276, |
|
"eval_runtime": 5.0536, |
|
"eval_samples_per_second": 78.953, |
|
"eval_steps_per_second": 9.894, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.497439861297607, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4617, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7919799498746867, |
|
"eval_f1": 0.7496919995465023, |
|
"eval_loss": 0.4390866756439209, |
|
"eval_precision": 0.7490801616502805, |
|
"eval_recall": 0.7503182396799418, |
|
"eval_runtime": 5.0492, |
|
"eval_samples_per_second": 79.022, |
|
"eval_steps_per_second": 9.903, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.5452804565429688, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4044, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8145363408521303, |
|
"eval_f1": 0.7773840400506664, |
|
"eval_loss": 0.39114564657211304, |
|
"eval_precision": 0.7760504201680672, |
|
"eval_recall": 0.7787779596290234, |
|
"eval_runtime": 5.0674, |
|
"eval_samples_per_second": 78.738, |
|
"eval_steps_per_second": 9.867, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 5.4987263679504395, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.382, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8195488721804511, |
|
"eval_f1": 0.7962206332992849, |
|
"eval_loss": 0.38273975253105164, |
|
"eval_precision": 0.7848639455782312, |
|
"eval_recall": 0.8198308783415167, |
|
"eval_runtime": 5.1217, |
|
"eval_samples_per_second": 77.905, |
|
"eval_steps_per_second": 9.762, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 6.1503801345825195, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3494, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8100071803786705, |
|
"eval_loss": 0.3528314530849457, |
|
"eval_precision": 0.8092466373122624, |
|
"eval_recall": 0.8107837788688852, |
|
"eval_runtime": 5.0631, |
|
"eval_samples_per_second": 78.806, |
|
"eval_steps_per_second": 9.875, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.3256051540374756, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3423, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8255172205802521, |
|
"eval_loss": 0.3441900908946991, |
|
"eval_precision": 0.8239495798319327, |
|
"eval_recall": 0.8271503909801782, |
|
"eval_runtime": 5.0678, |
|
"eval_samples_per_second": 78.732, |
|
"eval_steps_per_second": 9.866, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.73527455329895, |
|
"learning_rate": 3e-05, |
|
"loss": 0.33, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8341632880321839, |
|
"eval_loss": 0.3399864733219147, |
|
"eval_precision": 0.8479139504563233, |
|
"eval_recall": 0.8235133660665576, |
|
"eval_runtime": 5.0619, |
|
"eval_samples_per_second": 78.824, |
|
"eval_steps_per_second": 9.878, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 7.524757385253906, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3296, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8127815315315315, |
|
"eval_loss": 0.3349246084690094, |
|
"eval_precision": 0.8244897959183674, |
|
"eval_recall": 0.8036006546644845, |
|
"eval_runtime": 5.0712, |
|
"eval_samples_per_second": 78.68, |
|
"eval_steps_per_second": 9.86, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 6.317535877227783, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3074, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8249232119350592, |
|
"eval_loss": 0.33487534523010254, |
|
"eval_precision": 0.8467014712861889, |
|
"eval_recall": 0.8099654482633206, |
|
"eval_runtime": 5.0848, |
|
"eval_samples_per_second": 78.468, |
|
"eval_steps_per_second": 9.833, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 3.027658224105835, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2911, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8377439939939939, |
|
"eval_loss": 0.32399529218673706, |
|
"eval_precision": 0.8503401360544218, |
|
"eval_recall": 0.8277868703400618, |
|
"eval_runtime": 5.0689, |
|
"eval_samples_per_second": 78.716, |
|
"eval_steps_per_second": 9.864, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 9.344508171081543, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2855, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8454251965513313, |
|
"eval_loss": 0.3273135721683502, |
|
"eval_precision": 0.8463049835506276, |
|
"eval_recall": 0.8445626477541371, |
|
"eval_runtime": 5.059, |
|
"eval_samples_per_second": 78.87, |
|
"eval_steps_per_second": 9.883, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.517082214355469, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.2903, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8295950648528947, |
|
"eval_loss": 0.32846182584762573, |
|
"eval_precision": 0.8472157618446409, |
|
"eval_recall": 0.816739407164939, |
|
"eval_runtime": 5.0537, |
|
"eval_samples_per_second": 78.952, |
|
"eval_steps_per_second": 9.894, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 8.310128211975098, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2896, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8341632880321839, |
|
"eval_loss": 0.3254058063030243, |
|
"eval_precision": 0.8479139504563233, |
|
"eval_recall": 0.8235133660665576, |
|
"eval_runtime": 5.0704, |
|
"eval_samples_per_second": 78.691, |
|
"eval_steps_per_second": 9.861, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.1135729551315308, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2744, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8359175094431583, |
|
"eval_loss": 0.3240545392036438, |
|
"eval_precision": 0.8376607470912432, |
|
"eval_recall": 0.8342425895617385, |
|
"eval_runtime": 5.0666, |
|
"eval_samples_per_second": 78.751, |
|
"eval_steps_per_second": 9.869, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 6.9109649658203125, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2691, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8263588263588264, |
|
"eval_loss": 0.3209517002105713, |
|
"eval_precision": 0.8289473684210527, |
|
"eval_recall": 0.8239225313693399, |
|
"eval_runtime": 5.0476, |
|
"eval_samples_per_second": 79.047, |
|
"eval_steps_per_second": 9.906, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 6.5280585289001465, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2671, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8377439939939939, |
|
"eval_loss": 0.3208070397377014, |
|
"eval_precision": 0.8503401360544218, |
|
"eval_recall": 0.8277868703400618, |
|
"eval_runtime": 5.0528, |
|
"eval_samples_per_second": 78.966, |
|
"eval_steps_per_second": 9.895, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.9085135459899902, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2736, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8421640488656195, |
|
"eval_loss": 0.31788739562034607, |
|
"eval_precision": 0.8512313860252005, |
|
"eval_recall": 0.8345608292416803, |
|
"eval_runtime": 5.0515, |
|
"eval_samples_per_second": 78.987, |
|
"eval_steps_per_second": 9.898, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.626889705657959, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2662, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8404212771630449, |
|
"eval_loss": 0.3179715573787689, |
|
"eval_precision": 0.854416558018253, |
|
"eval_recall": 0.8295599199854519, |
|
"eval_runtime": 5.0479, |
|
"eval_samples_per_second": 79.043, |
|
"eval_steps_per_second": 9.905, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 9.089592933654785, |
|
"learning_rate": 0.0, |
|
"loss": 0.2664, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8413023981282901, |
|
"eval_loss": 0.3167513608932495, |
|
"eval_precision": 0.8527593534677056, |
|
"eval_recall": 0.8320603746135662, |
|
"eval_runtime": 5.0852, |
|
"eval_samples_per_second": 78.463, |
|
"eval_steps_per_second": 9.833, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7609911792720000.0, |
|
"train_loss": 0.33757916747546585, |
|
"train_runtime": 1953.353, |
|
"train_samples_per_second": 37.249, |
|
"train_steps_per_second": 1.249 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7609911792720000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|