|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.7894608974456787, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5472, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7343358395989975, |
|
"eval_f1": 0.6338815789473684, |
|
"eval_loss": 0.4992983341217041, |
|
"eval_precision": 0.6726405580300865, |
|
"eval_recall": 0.6245226404800873, |
|
"eval_runtime": 5.1909, |
|
"eval_samples_per_second": 76.865, |
|
"eval_steps_per_second": 9.632, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.12828254699707, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4484, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7944862155388471, |
|
"eval_f1": 0.7744208494208494, |
|
"eval_loss": 0.4156816005706787, |
|
"eval_precision": 0.765545388374753, |
|
"eval_recall": 0.8096017457719585, |
|
"eval_runtime": 5.1989, |
|
"eval_samples_per_second": 76.746, |
|
"eval_steps_per_second": 9.617, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.4368088245391846, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3338, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8179269882659713, |
|
"eval_loss": 0.32789668440818787, |
|
"eval_precision": 0.8510239760239761, |
|
"eval_recall": 0.7981905801054737, |
|
"eval_runtime": 5.1341, |
|
"eval_samples_per_second": 77.715, |
|
"eval_steps_per_second": 9.739, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.854862689971924, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2902, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8359744037230948, |
|
"eval_loss": 0.30365845561027527, |
|
"eval_precision": 0.8448835433371515, |
|
"eval_recall": 0.828514275322786, |
|
"eval_runtime": 5.0662, |
|
"eval_samples_per_second": 78.758, |
|
"eval_steps_per_second": 9.869, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.4754557609558105, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2756, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8480717680029244, |
|
"eval_loss": 0.292182594537735, |
|
"eval_precision": 0.8498775260257195, |
|
"eval_recall": 0.8463356973995272, |
|
"eval_runtime": 5.091, |
|
"eval_samples_per_second": 78.374, |
|
"eval_steps_per_second": 9.821, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.3565938472747803, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2514, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8446181767415888, |
|
"eval_loss": 0.3059082329273224, |
|
"eval_precision": 0.835902201887332, |
|
"eval_recall": 0.8560192762320422, |
|
"eval_runtime": 5.1647, |
|
"eval_samples_per_second": 77.255, |
|
"eval_steps_per_second": 9.681, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.7644256949424744, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2338, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8347043853938569, |
|
"eval_loss": 0.2969984710216522, |
|
"eval_precision": 0.8277993283927745, |
|
"eval_recall": 0.8431987634115294, |
|
"eval_runtime": 5.1318, |
|
"eval_samples_per_second": 77.751, |
|
"eval_steps_per_second": 9.743, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.6158833503723145, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2205, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8531398028421457, |
|
"eval_loss": 0.29671400785446167, |
|
"eval_precision": 0.8783539291322455, |
|
"eval_recall": 0.835924713584288, |
|
"eval_runtime": 5.1445, |
|
"eval_samples_per_second": 77.558, |
|
"eval_steps_per_second": 9.719, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 7.319582462310791, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2153, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8401647707947546, |
|
"eval_loss": 0.29821664094924927, |
|
"eval_precision": 0.8393298751432535, |
|
"eval_recall": 0.8410165484633569, |
|
"eval_runtime": 5.1158, |
|
"eval_samples_per_second": 77.993, |
|
"eval_steps_per_second": 9.774, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.4924864768981934, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1969, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8377065410088949, |
|
"eval_loss": 0.2942851483821869, |
|
"eval_precision": 0.8423344947735192, |
|
"eval_recall": 0.8335151845790143, |
|
"eval_runtime": 5.1603, |
|
"eval_samples_per_second": 77.322, |
|
"eval_steps_per_second": 9.689, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 2.3620827198028564, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.185, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8375505157126486, |
|
"eval_loss": 0.29727619886398315, |
|
"eval_precision": 0.8359243697478991, |
|
"eval_recall": 0.8392434988179669, |
|
"eval_runtime": 9.0436, |
|
"eval_samples_per_second": 44.12, |
|
"eval_steps_per_second": 5.529, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 5.206456661224365, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1733, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8377065410088949, |
|
"eval_loss": 0.3074239492416382, |
|
"eval_precision": 0.8423344947735192, |
|
"eval_recall": 0.8335151845790143, |
|
"eval_runtime": 5.1279, |
|
"eval_samples_per_second": 77.81, |
|
"eval_steps_per_second": 9.751, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 2.582509994506836, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1616, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8403508771929824, |
|
"eval_loss": 0.31861984729766846, |
|
"eval_precision": 0.8460491741741742, |
|
"eval_recall": 0.8352882342244045, |
|
"eval_runtime": 5.1383, |
|
"eval_samples_per_second": 77.652, |
|
"eval_steps_per_second": 9.731, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 4.573276519775391, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.16, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8347043853938569, |
|
"eval_loss": 0.32218077778816223, |
|
"eval_precision": 0.8277993283927745, |
|
"eval_recall": 0.8431987634115294, |
|
"eval_runtime": 5.0708, |
|
"eval_samples_per_second": 78.686, |
|
"eval_steps_per_second": 9.86, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 4.897996425628662, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1494, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8464912280701754, |
|
"eval_loss": 0.3260069787502289, |
|
"eval_precision": 0.8522897897897899, |
|
"eval_recall": 0.8413347881432988, |
|
"eval_runtime": 5.1509, |
|
"eval_samples_per_second": 77.462, |
|
"eval_steps_per_second": 9.707, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.7224879264831543, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1501, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8375505157126486, |
|
"eval_loss": 0.32325395941734314, |
|
"eval_precision": 0.8359243697478991, |
|
"eval_recall": 0.8392434988179669, |
|
"eval_runtime": 5.1583, |
|
"eval_samples_per_second": 77.352, |
|
"eval_steps_per_second": 9.693, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.4223344326019287, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1468, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8385441718775052, |
|
"eval_loss": 0.32956239581108093, |
|
"eval_precision": 0.8412280701754387, |
|
"eval_recall": 0.8360156392071285, |
|
"eval_runtime": 5.0878, |
|
"eval_samples_per_second": 78.423, |
|
"eval_steps_per_second": 9.827, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 4.574972152709961, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1423, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8342105263157895, |
|
"eval_loss": 0.3366738557815552, |
|
"eval_precision": 0.8398085585585586, |
|
"eval_recall": 0.82924168030551, |
|
"eval_runtime": 5.1002, |
|
"eval_samples_per_second": 78.232, |
|
"eval_steps_per_second": 9.804, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 7.210375785827637, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1327, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8419946387230413, |
|
"eval_loss": 0.3395027816295624, |
|
"eval_precision": 0.8437691365584814, |
|
"eval_recall": 0.8402891434806329, |
|
"eval_runtime": 5.143, |
|
"eval_samples_per_second": 77.581, |
|
"eval_steps_per_second": 9.722, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.7272565364837646, |
|
"learning_rate": 0.0, |
|
"loss": 0.1413, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8438308224802573, |
|
"eval_loss": 0.34336015582084656, |
|
"eval_precision": 0.8485409407665505, |
|
"eval_recall": 0.8395617384979087, |
|
"eval_runtime": 5.0731, |
|
"eval_samples_per_second": 78.65, |
|
"eval_steps_per_second": 9.856, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.22777412445818793, |
|
"train_runtime": 1962.6343, |
|
"train_samples_per_second": 37.073, |
|
"train_steps_per_second": 1.243 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|