|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.112319469451904, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5509, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7393483709273183, |
|
"eval_f1": 0.6507070707070707, |
|
"eval_loss": 0.4983255863189697, |
|
"eval_precision": 0.6800605637083625, |
|
"eval_recall": 0.6405710129114385, |
|
"eval_runtime": 1.7657, |
|
"eval_samples_per_second": 225.971, |
|
"eval_steps_per_second": 28.317, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.6866044998168945, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4511, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7769423558897243, |
|
"eval_f1": 0.7593078346448687, |
|
"eval_loss": 0.4377373456954956, |
|
"eval_precision": 0.7546743295019157, |
|
"eval_recall": 0.8021913075104565, |
|
"eval_runtime": 1.769, |
|
"eval_samples_per_second": 225.555, |
|
"eval_steps_per_second": 28.265, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.584764242172241, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.368, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8571428571428571, |
|
"eval_f1": 0.8196102381877741, |
|
"eval_loss": 0.32603567838668823, |
|
"eval_precision": 0.8381270903010034, |
|
"eval_recall": 0.8064193489725404, |
|
"eval_runtime": 1.7715, |
|
"eval_samples_per_second": 225.23, |
|
"eval_steps_per_second": 28.224, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.8483095169067383, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3019, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8333281762485303, |
|
"eval_loss": 0.30364951491355896, |
|
"eval_precision": 0.8410471369819678, |
|
"eval_recall": 0.8267412256773959, |
|
"eval_runtime": 1.7702, |
|
"eval_samples_per_second": 225.393, |
|
"eval_steps_per_second": 28.245, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.774143934249878, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2668, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8424651921601347, |
|
"eval_loss": 0.31921207904815674, |
|
"eval_precision": 0.8372140762463343, |
|
"eval_recall": 0.8485179123476996, |
|
"eval_runtime": 1.7714, |
|
"eval_samples_per_second": 225.248, |
|
"eval_steps_per_second": 28.227, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 4.2327117919921875, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2471, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8380263497804185, |
|
"eval_loss": 0.30589351058006287, |
|
"eval_precision": 0.830503344095941, |
|
"eval_recall": 0.8474722676850337, |
|
"eval_runtime": 1.7732, |
|
"eval_samples_per_second": 225.015, |
|
"eval_steps_per_second": 28.197, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.5115749835968018, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2422, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8524146298159436, |
|
"eval_loss": 0.2949831783771515, |
|
"eval_precision": 0.8451250578971746, |
|
"eval_recall": 0.8613384251682124, |
|
"eval_runtime": 1.7731, |
|
"eval_samples_per_second": 225.024, |
|
"eval_steps_per_second": 28.198, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 1.2918312549591064, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2258, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8454251965513313, |
|
"eval_loss": 0.29280924797058105, |
|
"eval_precision": 0.8463049835506276, |
|
"eval_recall": 0.8445626477541371, |
|
"eval_runtime": 1.7799, |
|
"eval_samples_per_second": 224.171, |
|
"eval_steps_per_second": 28.092, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.160737037658691, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2054, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8533986527862829, |
|
"eval_loss": 0.30492648482322693, |
|
"eval_precision": 0.8572003218020917, |
|
"eval_recall": 0.8498817966903074, |
|
"eval_runtime": 1.779, |
|
"eval_samples_per_second": 224.288, |
|
"eval_steps_per_second": 28.106, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.917464017868042, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2009, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8488361520276414, |
|
"eval_loss": 0.30127042531967163, |
|
"eval_precision": 0.8488361520276414, |
|
"eval_recall": 0.8488361520276414, |
|
"eval_runtime": 1.7757, |
|
"eval_samples_per_second": 224.7, |
|
"eval_steps_per_second": 28.158, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 6.667805194854736, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1755, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.858259325044405, |
|
"eval_loss": 0.30701279640197754, |
|
"eval_precision": 0.8573798178418481, |
|
"eval_recall": 0.8591562102200401, |
|
"eval_runtime": 1.7942, |
|
"eval_samples_per_second": 222.38, |
|
"eval_steps_per_second": 27.867, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 8.611730575561523, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1821, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8568221901555235, |
|
"eval_loss": 0.2995355427265167, |
|
"eval_precision": 0.8596491228070176, |
|
"eval_recall": 0.8541553009638116, |
|
"eval_runtime": 1.7796, |
|
"eval_samples_per_second": 224.202, |
|
"eval_steps_per_second": 28.095, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 2.71295428276062, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1652, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.866029197080292, |
|
"eval_loss": 0.3272043764591217, |
|
"eval_precision": 0.8552631578947368, |
|
"eval_recall": 0.8809328968903437, |
|
"eval_runtime": 1.7775, |
|
"eval_samples_per_second": 224.467, |
|
"eval_steps_per_second": 28.129, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 5.373868942260742, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1566, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8718540145985401, |
|
"eval_loss": 0.33357149362564087, |
|
"eval_precision": 0.8609022556390977, |
|
"eval_recall": 0.886979450809238, |
|
"eval_runtime": 1.7836, |
|
"eval_samples_per_second": 223.703, |
|
"eval_steps_per_second": 28.033, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 5.369639873504639, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1634, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8622899159663866, |
|
"eval_loss": 0.314995676279068, |
|
"eval_precision": 0.8589244307033712, |
|
"eval_recall": 0.8659301691216585, |
|
"eval_runtime": 1.777, |
|
"eval_samples_per_second": 224.539, |
|
"eval_steps_per_second": 28.138, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 6.779192924499512, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1496, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8696722245432793, |
|
"eval_loss": 0.3320792317390442, |
|
"eval_precision": 0.8706135006701596, |
|
"eval_recall": 0.8687488634297145, |
|
"eval_runtime": 1.7833, |
|
"eval_samples_per_second": 223.741, |
|
"eval_steps_per_second": 28.038, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 1.1515932083129883, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1355, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8616171059774413, |
|
"eval_loss": 0.32759982347488403, |
|
"eval_precision": 0.859873949579832, |
|
"eval_recall": 0.8634297144935443, |
|
"eval_runtime": 1.7782, |
|
"eval_samples_per_second": 224.387, |
|
"eval_steps_per_second": 28.119, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 1.6571087837219238, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1477, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_f1": 0.8563025210084034, |
|
"eval_loss": 0.33653610944747925, |
|
"eval_precision": 0.8529936381473334, |
|
"eval_recall": 0.8598836152027641, |
|
"eval_runtime": 1.7851, |
|
"eval_samples_per_second": 223.518, |
|
"eval_steps_per_second": 28.01, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 2.6701011657714844, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1317, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.858259325044405, |
|
"eval_loss": 0.3385031819343567, |
|
"eval_precision": 0.8573798178418481, |
|
"eval_recall": 0.8591562102200401, |
|
"eval_runtime": 1.7765, |
|
"eval_samples_per_second": 224.597, |
|
"eval_steps_per_second": 28.145, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 3.197312593460083, |
|
"learning_rate": 0.0, |
|
"loss": 0.1267, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.858259325044405, |
|
"eval_loss": 0.3389217257499695, |
|
"eval_precision": 0.8573798178418481, |
|
"eval_recall": 0.8591562102200401, |
|
"eval_runtime": 1.7779, |
|
"eval_samples_per_second": 224.423, |
|
"eval_steps_per_second": 28.123, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.22970127551282038, |
|
"train_runtime": 621.3103, |
|
"train_samples_per_second": 117.107, |
|
"train_steps_per_second": 3.927 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|