|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.7894608974456787, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5472, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7343358395989975, |
|
"eval_f1": 0.6338815789473684, |
|
"eval_loss": 0.4992983341217041, |
|
"eval_precision": 0.6726405580300865, |
|
"eval_recall": 0.6245226404800873, |
|
"eval_runtime": 5.1542, |
|
"eval_samples_per_second": 77.413, |
|
"eval_steps_per_second": 9.701, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.12828254699707, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4484, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7944862155388471, |
|
"eval_f1": 0.7744208494208494, |
|
"eval_loss": 0.4156816005706787, |
|
"eval_precision": 0.765545388374753, |
|
"eval_recall": 0.8096017457719585, |
|
"eval_runtime": 5.1518, |
|
"eval_samples_per_second": 77.449, |
|
"eval_steps_per_second": 9.705, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.4368088245391846, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3338, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8179269882659713, |
|
"eval_loss": 0.32789668440818787, |
|
"eval_precision": 0.8510239760239761, |
|
"eval_recall": 0.7981905801054737, |
|
"eval_runtime": 5.107, |
|
"eval_samples_per_second": 78.127, |
|
"eval_steps_per_second": 9.79, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.854862689971924, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2902, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8359744037230948, |
|
"eval_loss": 0.30365845561027527, |
|
"eval_precision": 0.8448835433371515, |
|
"eval_recall": 0.828514275322786, |
|
"eval_runtime": 5.1136, |
|
"eval_samples_per_second": 78.028, |
|
"eval_steps_per_second": 9.778, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.4754557609558105, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2756, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8480717680029244, |
|
"eval_loss": 0.292182594537735, |
|
"eval_precision": 0.8498775260257195, |
|
"eval_recall": 0.8463356973995272, |
|
"eval_runtime": 5.1797, |
|
"eval_samples_per_second": 77.031, |
|
"eval_steps_per_second": 9.653, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.3565938472747803, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2514, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8446181767415888, |
|
"eval_loss": 0.3059082329273224, |
|
"eval_precision": 0.835902201887332, |
|
"eval_recall": 0.8560192762320422, |
|
"eval_runtime": 5.076, |
|
"eval_samples_per_second": 78.606, |
|
"eval_steps_per_second": 9.85, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.7644256949424744, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2338, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8347043853938569, |
|
"eval_loss": 0.2969984710216522, |
|
"eval_precision": 0.8277993283927745, |
|
"eval_recall": 0.8431987634115294, |
|
"eval_runtime": 5.1872, |
|
"eval_samples_per_second": 76.92, |
|
"eval_steps_per_second": 9.639, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.6158833503723145, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2205, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8531398028421457, |
|
"eval_loss": 0.29671400785446167, |
|
"eval_precision": 0.8783539291322455, |
|
"eval_recall": 0.835924713584288, |
|
"eval_runtime": 5.0776, |
|
"eval_samples_per_second": 78.581, |
|
"eval_steps_per_second": 9.847, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 7.319582462310791, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2153, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8401647707947546, |
|
"eval_loss": 0.29821664094924927, |
|
"eval_precision": 0.8393298751432535, |
|
"eval_recall": 0.8410165484633569, |
|
"eval_runtime": 5.1535, |
|
"eval_samples_per_second": 77.423, |
|
"eval_steps_per_second": 9.702, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.4924864768981934, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1969, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8377065410088949, |
|
"eval_loss": 0.2942851483821869, |
|
"eval_precision": 0.8423344947735192, |
|
"eval_recall": 0.8335151845790143, |
|
"eval_runtime": 5.1062, |
|
"eval_samples_per_second": 78.14, |
|
"eval_steps_per_second": 9.792, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.3620827198028564, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.185, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8375505157126486, |
|
"eval_loss": 0.29727619886398315, |
|
"eval_precision": 0.8359243697478991, |
|
"eval_recall": 0.8392434988179669, |
|
"eval_runtime": 5.1573, |
|
"eval_samples_per_second": 77.366, |
|
"eval_steps_per_second": 9.695, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 5.206456661224365, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1733, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8377065410088949, |
|
"eval_loss": 0.3074239492416382, |
|
"eval_precision": 0.8423344947735192, |
|
"eval_recall": 0.8335151845790143, |
|
"eval_runtime": 5.1717, |
|
"eval_samples_per_second": 77.151, |
|
"eval_steps_per_second": 9.668, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 2.582509994506836, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1616, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8403508771929824, |
|
"eval_loss": 0.31861984729766846, |
|
"eval_precision": 0.8460491741741742, |
|
"eval_recall": 0.8352882342244045, |
|
"eval_runtime": 5.2202, |
|
"eval_samples_per_second": 76.433, |
|
"eval_steps_per_second": 9.578, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 4.573276519775391, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.16, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8347043853938569, |
|
"eval_loss": 0.32218077778816223, |
|
"eval_precision": 0.8277993283927745, |
|
"eval_recall": 0.8431987634115294, |
|
"eval_runtime": 5.1221, |
|
"eval_samples_per_second": 77.898, |
|
"eval_steps_per_second": 9.762, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 4.897996425628662, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1494, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8464912280701754, |
|
"eval_loss": 0.3260069787502289, |
|
"eval_precision": 0.8522897897897899, |
|
"eval_recall": 0.8413347881432988, |
|
"eval_runtime": 5.1376, |
|
"eval_samples_per_second": 77.663, |
|
"eval_steps_per_second": 9.732, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.7224879264831543, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1501, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8375505157126486, |
|
"eval_loss": 0.32325395941734314, |
|
"eval_precision": 0.8359243697478991, |
|
"eval_recall": 0.8392434988179669, |
|
"eval_runtime": 5.1275, |
|
"eval_samples_per_second": 77.816, |
|
"eval_steps_per_second": 9.751, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.4223344326019287, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1468, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8385441718775052, |
|
"eval_loss": 0.32956239581108093, |
|
"eval_precision": 0.8412280701754387, |
|
"eval_recall": 0.8360156392071285, |
|
"eval_runtime": 5.1268, |
|
"eval_samples_per_second": 77.826, |
|
"eval_steps_per_second": 9.753, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 4.574972152709961, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1423, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8342105263157895, |
|
"eval_loss": 0.3366738557815552, |
|
"eval_precision": 0.8398085585585586, |
|
"eval_recall": 0.82924168030551, |
|
"eval_runtime": 5.1034, |
|
"eval_samples_per_second": 78.183, |
|
"eval_steps_per_second": 9.797, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 7.210375785827637, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1327, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8419946387230413, |
|
"eval_loss": 0.3395027816295624, |
|
"eval_precision": 0.8437691365584814, |
|
"eval_recall": 0.8402891434806329, |
|
"eval_runtime": 5.1186, |
|
"eval_samples_per_second": 77.952, |
|
"eval_steps_per_second": 9.768, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.7272565364837646, |
|
"learning_rate": 0.0, |
|
"loss": 0.1413, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8438308224802573, |
|
"eval_loss": 0.34336015582084656, |
|
"eval_precision": 0.8485409407665505, |
|
"eval_recall": 0.8395617384979087, |
|
"eval_runtime": 5.1504, |
|
"eval_samples_per_second": 77.47, |
|
"eval_steps_per_second": 9.708, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.22777412445818793, |
|
"train_runtime": 1952.8082, |
|
"train_samples_per_second": 37.259, |
|
"train_steps_per_second": 1.249 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|