|
{ |
|
"best_metric": 0.9882958046457017, |
|
"best_model_checkpoint": "distilhubert-finetuned-cry-detector/checkpoint-700", |
|
"epoch": 20.0, |
|
"eval_steps": 100, |
|
"global_step": 860, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.1627906976744187, |
|
"grad_norm": 0.9607858657836914, |
|
"learning_rate": 1.744186046511628e-05, |
|
"loss": 0.6124, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.3255813953488373, |
|
"grad_norm": 2.0142862796783447, |
|
"learning_rate": 2.997578868976474e-05, |
|
"loss": 0.3124, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.3255813953488373, |
|
"eval_accuracy": 0.9641025641025641, |
|
"eval_confusion_matrix": [ |
|
[ |
|
948, |
|
22 |
|
], |
|
[ |
|
27, |
|
368 |
|
] |
|
], |
|
"eval_f1": 0.9640343817214121, |
|
"eval_loss": 0.2739163041114807, |
|
"eval_precision": 0.9639973701512163, |
|
"eval_recall": 0.9641025641025641, |
|
"eval_runtime": 3.48, |
|
"eval_samples_per_second": 392.238, |
|
"eval_steps_per_second": 6.322, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.488372093023256, |
|
"grad_norm": 1.6254416704177856, |
|
"learning_rate": 2.9496736466239833e-05, |
|
"loss": 0.2509, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.651162790697675, |
|
"grad_norm": 1.0478523969650269, |
|
"learning_rate": 2.8422656841613377e-05, |
|
"loss": 0.2337, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.651162790697675, |
|
"eval_accuracy": 0.9736263736263736, |
|
"eval_confusion_matrix": [ |
|
[ |
|
950, |
|
20 |
|
], |
|
[ |
|
16, |
|
379 |
|
] |
|
], |
|
"eval_f1": 0.9736655604528286, |
|
"eval_loss": 0.23845618963241577, |
|
"eval_precision": 0.9737247242968067, |
|
"eval_recall": 0.9736263736263736, |
|
"eval_runtime": 3.3782, |
|
"eval_samples_per_second": 404.066, |
|
"eval_steps_per_second": 6.512, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.813953488372093, |
|
"grad_norm": 0.19202682375907898, |
|
"learning_rate": 2.679763606999547e-05, |
|
"loss": 0.213, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.976744186046512, |
|
"grad_norm": 3.5125396251678467, |
|
"learning_rate": 2.4688374121322346e-05, |
|
"loss": 0.2064, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.976744186046512, |
|
"eval_accuracy": 0.9831501831501831, |
|
"eval_confusion_matrix": [ |
|
[ |
|
958, |
|
12 |
|
], |
|
[ |
|
11, |
|
384 |
|
] |
|
], |
|
"eval_f1": 0.9831564891330211, |
|
"eval_loss": 0.2294875830411911, |
|
"eval_precision": 0.983164066755398, |
|
"eval_recall": 0.9831501831501831, |
|
"eval_runtime": 3.421, |
|
"eval_samples_per_second": 399.01, |
|
"eval_steps_per_second": 6.431, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.13953488372093, |
|
"grad_norm": 0.06433115154504776, |
|
"learning_rate": 2.218144694035814e-05, |
|
"loss": 0.2044, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 9.30232558139535, |
|
"grad_norm": 0.1391066163778305, |
|
"learning_rate": 1.9379752884171956e-05, |
|
"loss": 0.2023, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 9.30232558139535, |
|
"eval_accuracy": 0.9868131868131869, |
|
"eval_confusion_matrix": [ |
|
[ |
|
957, |
|
13 |
|
], |
|
[ |
|
5, |
|
390 |
|
] |
|
], |
|
"eval_f1": 0.9868519842367506, |
|
"eval_loss": 0.22771421074867249, |
|
"eval_precision": 0.986971783497838, |
|
"eval_recall": 0.9868131868131869, |
|
"eval_runtime": 3.414, |
|
"eval_samples_per_second": 399.822, |
|
"eval_steps_per_second": 6.444, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.465116279069768, |
|
"grad_norm": 0.03317234292626381, |
|
"learning_rate": 1.6398289196234188e-05, |
|
"loss": 0.201, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 11.627906976744185, |
|
"grad_norm": 0.0141974575817585, |
|
"learning_rate": 1.3359431874331888e-05, |
|
"loss": 0.2003, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 11.627906976744185, |
|
"eval_accuracy": 0.9875457875457876, |
|
"eval_confusion_matrix": [ |
|
[ |
|
960, |
|
10 |
|
], |
|
[ |
|
7, |
|
388 |
|
] |
|
], |
|
"eval_f1": 0.9875597008648797, |
|
"eval_loss": 0.22541972994804382, |
|
"eval_precision": 0.9875850990732533, |
|
"eval_recall": 0.9875457875457876, |
|
"eval_runtime": 3.4267, |
|
"eval_samples_per_second": 398.347, |
|
"eval_steps_per_second": 6.42, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.790697674418604, |
|
"grad_norm": 0.020028484985232353, |
|
"learning_rate": 1.0387912673007252e-05, |
|
"loss": 0.2005, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 13.953488372093023, |
|
"grad_norm": 0.013127407990396023, |
|
"learning_rate": 7.605699412521431e-06, |
|
"loss": 0.2002, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 13.953488372093023, |
|
"eval_accuracy": 0.9875457875457876, |
|
"eval_confusion_matrix": [ |
|
[ |
|
959, |
|
11 |
|
], |
|
[ |
|
6, |
|
389 |
|
] |
|
], |
|
"eval_f1": 0.9875688610221299, |
|
"eval_loss": 0.2259330302476883, |
|
"eval_precision": 0.9876237449942113, |
|
"eval_recall": 0.9875457875457876, |
|
"eval_runtime": 3.3216, |
|
"eval_samples_per_second": 410.95, |
|
"eval_steps_per_second": 6.623, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 15.116279069767442, |
|
"grad_norm": 0.016032038256525993, |
|
"learning_rate": 5.126989735188782e-06, |
|
"loss": 0.2012, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 16.27906976744186, |
|
"grad_norm": 0.012081542983651161, |
|
"learning_rate": 3.0535237934100274e-06, |
|
"loss": 0.1994, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 16.27906976744186, |
|
"eval_accuracy": 0.9882783882783883, |
|
"eval_confusion_matrix": [ |
|
[ |
|
960, |
|
10 |
|
], |
|
[ |
|
6, |
|
389 |
|
] |
|
], |
|
"eval_f1": 0.9882958046457017, |
|
"eval_loss": 0.22554340958595276, |
|
"eval_precision": 0.9883336307004172, |
|
"eval_recall": 0.9882783882783883, |
|
"eval_runtime": 3.3839, |
|
"eval_samples_per_second": 403.383, |
|
"eval_steps_per_second": 6.501, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 17.441860465116278, |
|
"grad_norm": 0.015530905686318874, |
|
"learning_rate": 1.470408262986479e-06, |
|
"loss": 0.2008, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 18.6046511627907, |
|
"grad_norm": 0.03649911284446716, |
|
"learning_rate": 4.4262308763892087e-07, |
|
"loss": 0.1997, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 18.6046511627907, |
|
"eval_accuracy": 0.9882783882783883, |
|
"eval_confusion_matrix": [ |
|
[ |
|
960, |
|
10 |
|
], |
|
[ |
|
6, |
|
389 |
|
] |
|
], |
|
"eval_f1": 0.9882958046457017, |
|
"eval_loss": 0.22540099918842316, |
|
"eval_precision": 0.9883336307004172, |
|
"eval_recall": 0.9882783882783883, |
|
"eval_runtime": 3.38, |
|
"eval_samples_per_second": 403.852, |
|
"eval_steps_per_second": 6.509, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 19.767441860465116, |
|
"grad_norm": 0.17561723291873932, |
|
"learning_rate": 1.2354337535766536e-08, |
|
"loss": 0.2008, |
|
"step": 850 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 860, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.001 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.483401519872e+17, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|