{ "best_metric": 0.555886736214605, "best_model_checkpoint": "test/checkpoint-1000", "epoch": 114.28571428571429, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 11.428571428571429, "eval_accuracy": 0.5990491283676703, "eval_f1": 0.0, "eval_loss": 2.2935469150543213, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 8.6961, "eval_samples_per_second": 4.025, "eval_steps_per_second": 2.07, "step": 100 }, { "epoch": 22.857142857142858, "eval_accuracy": 0.6756471209720021, "eval_f1": 0.24677716390423574, "eval_loss": 1.4737745523452759, "eval_precision": 0.3284313725490196, "eval_recall": 0.1976401179941003, "eval_runtime": 8.62, "eval_samples_per_second": 4.06, "eval_steps_per_second": 2.088, "step": 200 }, { "epoch": 34.285714285714285, "eval_accuracy": 0.7221341785525621, "eval_f1": 0.4099378881987578, "eval_loss": 1.2425366640090942, "eval_precision": 0.43278688524590164, "eval_recall": 0.3893805309734513, "eval_runtime": 9.0324, "eval_samples_per_second": 3.875, "eval_steps_per_second": 1.993, "step": 300 }, { "epoch": 45.714285714285715, "eval_accuracy": 0.745905969360803, "eval_f1": 0.46960486322188444, "eval_loss": 1.106683611869812, "eval_precision": 0.4843260188087774, "eval_recall": 0.4557522123893805, "eval_runtime": 8.5893, "eval_samples_per_second": 4.075, "eval_steps_per_second": 2.096, "step": 400 }, { "epoch": 57.142857142857146, "grad_norm": 0.6594957709312439, "learning_rate": 2.777777777777778e-05, "loss": 1.5451, "step": 500 }, { "epoch": 57.142857142857146, "eval_accuracy": 0.7606973058637084, "eval_f1": 0.5052316890881914, "eval_loss": 1.0804147720336914, "eval_precision": 0.5121212121212121, "eval_recall": 0.49852507374631266, "eval_runtime": 8.9633, "eval_samples_per_second": 3.905, "eval_steps_per_second": 2.008, "step": 500 }, { "epoch": 68.57142857142857, "eval_accuracy": 0.768621236133122, "eval_f1": 0.5156847742922723, "eval_loss": 1.0736337900161743, "eval_precision": 0.5357710651828299, "eval_recall": 0.4970501474926254, "eval_runtime": 8.9497, "eval_samples_per_second": 3.911, "eval_steps_per_second": 2.011, "step": 600 }, { "epoch": 80.0, "eval_accuracy": 0.7739038563127311, "eval_f1": 0.5367316341829085, "eval_loss": 1.0631266832351685, "eval_precision": 0.5457317073170732, "eval_recall": 0.528023598820059, "eval_runtime": 8.5483, "eval_samples_per_second": 4.094, "eval_steps_per_second": 2.106, "step": 700 }, { "epoch": 91.42857142857143, "eval_accuracy": 0.780243000528262, "eval_f1": 0.548314606741573, "eval_loss": 1.0633281469345093, "eval_precision": 0.5570776255707762, "eval_recall": 0.5398230088495575, "eval_runtime": 8.5977, "eval_samples_per_second": 4.071, "eval_steps_per_second": 2.094, "step": 800 }, { "epoch": 102.85714285714286, "eval_accuracy": 0.7823560486001057, "eval_f1": 0.5561338289962826, "eval_loss": 1.0636277198791504, "eval_precision": 0.56071964017991, "eval_recall": 0.551622418879056, "eval_runtime": 8.6698, "eval_samples_per_second": 4.037, "eval_steps_per_second": 2.076, "step": 900 }, { "epoch": 114.28571428571429, "grad_norm": 0.5495575666427612, "learning_rate": 0.0, "loss": 0.2997, "step": 1000 }, { "epoch": 114.28571428571429, "eval_accuracy": 0.7834125726360275, "eval_f1": 0.555886736214605, "eval_loss": 1.0617624521255493, "eval_precision": 0.5617469879518072, "eval_recall": 0.5501474926253688, "eval_runtime": 8.6165, "eval_samples_per_second": 4.062, "eval_steps_per_second": 2.089, "step": 1000 } ], "logging_steps": 500, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 125, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.478527859088384e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }