{ "best_metric": 0.9882958046457017, "best_model_checkpoint": "distilhubert-finetuned-cry-detector/checkpoint-700", "epoch": 18.6046511627907, "eval_steps": 100, "global_step": 800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.1627906976744187, "grad_norm": 0.9607858657836914, "learning_rate": 1.744186046511628e-05, "loss": 0.6124, "step": 50 }, { "epoch": 2.3255813953488373, "grad_norm": 2.0142862796783447, "learning_rate": 2.997578868976474e-05, "loss": 0.3124, "step": 100 }, { "epoch": 2.3255813953488373, "eval_accuracy": 0.9641025641025641, "eval_confusion_matrix": [ [ 948, 22 ], [ 27, 368 ] ], "eval_f1": 0.9640343817214121, "eval_loss": 0.2739163041114807, "eval_precision": 0.9639973701512163, "eval_recall": 0.9641025641025641, "eval_runtime": 3.48, "eval_samples_per_second": 392.238, "eval_steps_per_second": 6.322, "step": 100 }, { "epoch": 3.488372093023256, "grad_norm": 1.6254416704177856, "learning_rate": 2.9496736466239833e-05, "loss": 0.2509, "step": 150 }, { "epoch": 4.651162790697675, "grad_norm": 1.0478523969650269, "learning_rate": 2.8422656841613377e-05, "loss": 0.2337, "step": 200 }, { "epoch": 4.651162790697675, "eval_accuracy": 0.9736263736263736, "eval_confusion_matrix": [ [ 950, 20 ], [ 16, 379 ] ], "eval_f1": 0.9736655604528286, "eval_loss": 0.23845618963241577, "eval_precision": 0.9737247242968067, "eval_recall": 0.9736263736263736, "eval_runtime": 3.3782, "eval_samples_per_second": 404.066, "eval_steps_per_second": 6.512, "step": 200 }, { "epoch": 5.813953488372093, "grad_norm": 0.19202682375907898, "learning_rate": 2.679763606999547e-05, "loss": 0.213, "step": 250 }, { "epoch": 6.976744186046512, "grad_norm": 3.5125396251678467, "learning_rate": 2.4688374121322346e-05, "loss": 0.2064, "step": 300 }, { "epoch": 6.976744186046512, "eval_accuracy": 0.9831501831501831, "eval_confusion_matrix": [ [ 958, 12 ], [ 11, 384 ] ], "eval_f1": 0.9831564891330211, "eval_loss": 0.2294875830411911, "eval_precision": 0.983164066755398, "eval_recall": 0.9831501831501831, "eval_runtime": 3.421, "eval_samples_per_second": 399.01, "eval_steps_per_second": 6.431, "step": 300 }, { "epoch": 8.13953488372093, "grad_norm": 0.06433115154504776, "learning_rate": 2.218144694035814e-05, "loss": 0.2044, "step": 350 }, { "epoch": 9.30232558139535, "grad_norm": 0.1391066163778305, "learning_rate": 1.9379752884171956e-05, "loss": 0.2023, "step": 400 }, { "epoch": 9.30232558139535, "eval_accuracy": 0.9868131868131869, "eval_confusion_matrix": [ [ 957, 13 ], [ 5, 390 ] ], "eval_f1": 0.9868519842367506, "eval_loss": 0.22771421074867249, "eval_precision": 0.986971783497838, "eval_recall": 0.9868131868131869, "eval_runtime": 3.414, "eval_samples_per_second": 399.822, "eval_steps_per_second": 6.444, "step": 400 }, { "epoch": 10.465116279069768, "grad_norm": 0.03317234292626381, "learning_rate": 1.6398289196234188e-05, "loss": 0.201, "step": 450 }, { "epoch": 11.627906976744185, "grad_norm": 0.0141974575817585, "learning_rate": 1.3359431874331888e-05, "loss": 0.2003, "step": 500 }, { "epoch": 11.627906976744185, "eval_accuracy": 0.9875457875457876, "eval_confusion_matrix": [ [ 960, 10 ], [ 7, 388 ] ], "eval_f1": 0.9875597008648797, "eval_loss": 0.22541972994804382, "eval_precision": 0.9875850990732533, "eval_recall": 0.9875457875457876, "eval_runtime": 3.4267, "eval_samples_per_second": 398.347, "eval_steps_per_second": 6.42, "step": 500 }, { "epoch": 12.790697674418604, "grad_norm": 0.020028484985232353, "learning_rate": 1.0387912673007252e-05, "loss": 0.2005, "step": 550 }, { "epoch": 13.953488372093023, "grad_norm": 0.013127407990396023, "learning_rate": 7.605699412521431e-06, "loss": 0.2002, "step": 600 }, { "epoch": 13.953488372093023, "eval_accuracy": 0.9875457875457876, "eval_confusion_matrix": [ [ 959, 11 ], [ 6, 389 ] ], "eval_f1": 0.9875688610221299, "eval_loss": 0.2259330302476883, "eval_precision": 0.9876237449942113, "eval_recall": 0.9875457875457876, "eval_runtime": 3.3216, "eval_samples_per_second": 410.95, "eval_steps_per_second": 6.623, "step": 600 }, { "epoch": 15.116279069767442, "grad_norm": 0.016032038256525993, "learning_rate": 5.126989735188782e-06, "loss": 0.2012, "step": 650 }, { "epoch": 16.27906976744186, "grad_norm": 0.012081542983651161, "learning_rate": 3.0535237934100274e-06, "loss": 0.1994, "step": 700 }, { "epoch": 16.27906976744186, "eval_accuracy": 0.9882783882783883, "eval_confusion_matrix": [ [ 960, 10 ], [ 6, 389 ] ], "eval_f1": 0.9882958046457017, "eval_loss": 0.22554340958595276, "eval_precision": 0.9883336307004172, "eval_recall": 0.9882783882783883, "eval_runtime": 3.3839, "eval_samples_per_second": 403.383, "eval_steps_per_second": 6.501, "step": 700 }, { "epoch": 17.441860465116278, "grad_norm": 0.015530905686318874, "learning_rate": 1.470408262986479e-06, "loss": 0.2008, "step": 750 }, { "epoch": 18.6046511627907, "grad_norm": 0.03649911284446716, "learning_rate": 4.4262308763892087e-07, "loss": 0.1997, "step": 800 }, { "epoch": 18.6046511627907, "eval_accuracy": 0.9882783882783883, "eval_confusion_matrix": [ [ 960, 10 ], [ 6, 389 ] ], "eval_f1": 0.9882958046457017, "eval_loss": 0.22540099918842316, "eval_precision": 0.9883336307004172, "eval_recall": 0.9882783882783883, "eval_runtime": 3.38, "eval_samples_per_second": 403.852, "eval_steps_per_second": 6.509, "step": 800 } ], "logging_steps": 50, "max_steps": 860, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 100, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.001 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.31074598563328e+17, "train_batch_size": 64, "trial_name": null, "trial_params": null }