{ "best_metric": 0.012786287814378738, "best_model_checkpoint": "convnext-tiny-224-finetuned-barkley\\checkpoint-1102", "epoch": 30.0, "eval_steps": 500, "global_step": 1140, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "train_accuracy": 0.2138157894736842 }, { "epoch": 1.0, "grad_norm": 2.7230629920959473, "learning_rate": 8.365843306988109e-07, "loss": 1.6288, "step": 38 }, { "epoch": 1.0, "eval_accuracy": 0.2371292106586224, "eval_error_rate": 0.7628707893413775, "eval_f1": 0.20430842832369756, "eval_loss": 1.6004831790924072, "eval_precision": 0.2132792246940375, "eval_recall": 0.26973684210526316, "eval_runtime": 41.666, "eval_samples_per_second": 3.648, "eval_steps_per_second": 0.456, "eval_top1_accuracy": 0.26973684210526316, "step": 38 }, { "epoch": 2.0, "train_accuracy": 0.25 }, { "epoch": 2.0, "grad_norm": 2.8333628177642822, "learning_rate": 9.460584867009364e-07, "loss": 1.6059, "step": 76 }, { "epoch": 2.0, "eval_accuracy": 0.24728506787330323, "eval_error_rate": 0.7527149321266968, "eval_f1": 0.2243479907737179, "eval_loss": 1.5801552534103394, "eval_precision": 0.2383676582761251, "eval_recall": 0.27631578947368424, "eval_runtime": 40.3741, "eval_samples_per_second": 3.765, "eval_steps_per_second": 0.471, "eval_top1_accuracy": 0.27631578947368424, "step": 76 }, { "epoch": 3.0, "train_accuracy": 0.2894736842105263 }, { "epoch": 3.0, "grad_norm": 2.8583333492279053, "learning_rate": 1.1275880849384341e-06, "loss": 1.5808, "step": 114 }, { "epoch": 3.0, "eval_accuracy": 0.2744343891402715, "eval_error_rate": 0.7255656108597285, "eval_f1": 0.25945030547058157, "eval_loss": 1.5570068359375, "eval_precision": 0.2778035204289794, "eval_recall": 0.3026315789473684, "eval_runtime": 25.2172, "eval_samples_per_second": 6.028, "eval_steps_per_second": 0.753, "eval_top1_accuracy": 0.3026315789473684, "step": 114 }, { "epoch": 4.0, "train_accuracy": 0.32383040935672514 }, { "epoch": 4.0, "grad_norm": 2.166118860244751, "learning_rate": 1.3797895548168056e-06, "loss": 1.5555, "step": 152 }, { "epoch": 4.0, "eval_accuracy": 0.3510859728506787, "eval_error_rate": 0.6489140271493212, "eval_f1": 0.3490548373858623, "eval_loss": 1.5290976762771606, "eval_precision": 0.3831372574793627, "eval_recall": 0.375, "eval_runtime": 28.1947, "eval_samples_per_second": 5.391, "eval_steps_per_second": 0.674, "eval_top1_accuracy": 0.375, "step": 152 }, { "epoch": 5.0, "train_accuracy": 0.38742690058479534 }, { "epoch": 5.0, "grad_norm": 2.677858591079712, "learning_rate": 1.7007406834242322e-06, "loss": 1.5232, "step": 190 }, { "epoch": 5.0, "eval_accuracy": 0.41472599296128704, "eval_error_rate": 0.585274007038713, "eval_f1": 0.41544045031957555, "eval_loss": 1.4932990074157715, "eval_precision": 0.4251577933805101, "eval_recall": 0.4407894736842105, "eval_runtime": 25.5314, "eval_samples_per_second": 5.953, "eval_steps_per_second": 0.744, "eval_top1_accuracy": 0.4407894736842105, "step": 190 }, { "epoch": 6.0, "train_accuracy": 0.47076023391812866 }, { "epoch": 6.0, "grad_norm": 2.4222543239593506, "learning_rate": 2.087995266130065e-06, "loss": 1.4784, "step": 228 }, { "epoch": 6.0, "eval_accuracy": 0.49721468074409253, "eval_error_rate": 0.5027853192559075, "eval_f1": 0.4925881060058483, "eval_loss": 1.4484349489212036, "eval_precision": 0.5076020884114232, "eval_recall": 0.5197368421052632, "eval_runtime": 56.6448, "eval_samples_per_second": 2.683, "eval_steps_per_second": 0.335, "eval_top1_accuracy": 0.5197368421052632, "step": 228 }, { "epoch": 7.0, "train_accuracy": 0.5701754385964912 }, { "epoch": 7.0, "grad_norm": 3.0168983936309814, "learning_rate": 2.5386017509096417e-06, "loss": 1.4242, "step": 266 }, { "epoch": 7.0, "eval_accuracy": 0.6248768225238813, "eval_error_rate": 0.3751231774761187, "eval_f1": 0.6307132735050356, "eval_loss": 1.390175223350525, "eval_precision": 0.6856800923548139, "eval_recall": 0.6381578947368421, "eval_runtime": 59.0359, "eval_samples_per_second": 2.575, "eval_steps_per_second": 0.322, "eval_top1_accuracy": 0.6381578947368421, "step": 266 }, { "epoch": 8.0, "train_accuracy": 0.6498538011695907 }, { "epoch": 8.0, "grad_norm": 2.808101177215576, "learning_rate": 3.049125734293339e-06, "loss": 1.3586, "step": 304 }, { "epoch": 8.0, "eval_accuracy": 0.7134087481146305, "eval_error_rate": 0.2865912518853695, "eval_f1": 0.7165677244674166, "eval_loss": 1.318568229675293, "eval_precision": 0.7728468899521532, "eval_recall": 0.7171052631578947, "eval_runtime": 55.4154, "eval_samples_per_second": 2.743, "eval_steps_per_second": 0.343, "eval_top1_accuracy": 0.7171052631578947, "step": 304 }, { "epoch": 9.0, "train_accuracy": 0.7149122807017544 }, { "epoch": 9.0, "grad_norm": 2.8706772327423096, "learning_rate": 3.6156761374816205e-06, "loss": 1.276, "step": 342 }, { "epoch": 9.0, "eval_accuracy": 0.805972850678733, "eval_error_rate": 0.19402714932126697, "eval_f1": 0.8108964691671459, "eval_loss": 1.223597526550293, "eval_precision": 0.8547017577806745, "eval_recall": 0.8026315789473685, "eval_runtime": 55.5305, "eval_samples_per_second": 2.737, "eval_steps_per_second": 0.342, "eval_top1_accuracy": 0.8026315789473685, "step": 342 }, { "epoch": 10.0, "train_accuracy": 0.8033625730994152 }, { "epoch": 10.0, "grad_norm": 3.2001399993896484, "learning_rate": 4.233934863118696e-06, "loss": 1.1778, "step": 380 }, { "epoch": 10.0, "eval_accuracy": 0.8600754147812971, "eval_error_rate": 0.13992458521870288, "eval_f1": 0.8609113468479812, "eval_loss": 1.1122019290924072, "eval_precision": 0.8898553355968974, "eval_recall": 0.8552631578947368, "eval_runtime": 53.869, "eval_samples_per_second": 2.822, "eval_steps_per_second": 0.353, "eval_top1_accuracy": 0.8552631578947368, "step": 380 }, { "epoch": 11.0, "train_accuracy": 0.868421052631579 }, { "epoch": 11.0, "grad_norm": 3.1884777545928955, "learning_rate": 4.899189706688707e-06, "loss": 1.0543, "step": 418 }, { "epoch": 11.0, "eval_accuracy": 0.9004725992961287, "eval_error_rate": 0.09952740070387134, "eval_f1": 0.8958336111023342, "eval_loss": 0.9839252829551697, "eval_precision": 0.9063624095203042, "eval_recall": 0.8947368421052632, "eval_runtime": 54.1079, "eval_samples_per_second": 2.809, "eval_steps_per_second": 0.351, "eval_top1_accuracy": 0.8947368421052632, "step": 418 }, { "epoch": 12.0, "train_accuracy": 0.902046783625731 }, { "epoch": 12.0, "grad_norm": 3.692692279815674, "learning_rate": 5.6063702716924525e-06, "loss": 0.921, "step": 456 }, { "epoch": 12.0, "eval_accuracy": 0.9575364504776269, "eval_error_rate": 0.04246354952237308, "eval_f1": 0.9537314326645565, "eval_loss": 0.8418065309524536, "eval_precision": 0.9540851522650184, "eval_recall": 0.9539473684210527, "eval_runtime": 54.166, "eval_samples_per_second": 2.806, "eval_steps_per_second": 0.351, "eval_top1_accuracy": 0.9539473684210527, "step": 456 }, { "epoch": 13.0, "train_accuracy": 0.9371345029239766 }, { "epoch": 13.0, "grad_norm": 3.067211389541626, "learning_rate": 6.350086614868708e-06, "loss": 0.773, "step": 494 }, { "epoch": 13.0, "eval_accuracy": 0.9652287581699346, "eval_error_rate": 0.034771241830065414, "eval_f1": 0.9604621710987601, "eval_loss": 0.6935167908668518, "eval_precision": 0.9623944201691405, "eval_recall": 0.9605263157894737, "eval_runtime": 67.8683, "eval_samples_per_second": 2.24, "eval_steps_per_second": 0.28, "eval_top1_accuracy": 0.9605263157894737, "step": 494 }, { "epoch": 14.0, "train_accuracy": 0.9576023391812866 }, { "epoch": 14.0, "grad_norm": 2.8400356769561768, "learning_rate": 7.124670326916465e-06, "loss": 0.6204, "step": 532 }, { "epoch": 14.0, "eval_accuracy": 0.9707843137254901, "eval_error_rate": 0.029215686274509878, "eval_f1": 0.96721850419808, "eval_loss": 0.5515281558036804, "eval_precision": 0.9687600085406214, "eval_recall": 0.9671052631578947, "eval_runtime": 75.408, "eval_samples_per_second": 2.016, "eval_steps_per_second": 0.252, "eval_top1_accuracy": 0.9671052631578947, "step": 532 }, { "epoch": 15.0, "train_accuracy": 0.9707602339181286 }, { "epoch": 15.0, "grad_norm": 2.3800432682037354, "learning_rate": 7.924217735611663e-06, "loss": 0.4835, "step": 570 }, { "epoch": 15.0, "eval_accuracy": 0.9696732026143791, "eval_error_rate": 0.03032679738562094, "eval_f1": 0.9676301640599072, "eval_loss": 0.4145788848400116, "eval_precision": 0.9704260651629072, "eval_recall": 0.9671052631578947, "eval_runtime": 43.8505, "eval_samples_per_second": 3.466, "eval_steps_per_second": 0.433, "eval_top1_accuracy": 0.9671052631578947, "step": 570 }, { "epoch": 16.0, "train_accuracy": 0.9736842105263158 }, { "epoch": 16.0, "grad_norm": 3.108427047729492, "learning_rate": 8.742634902035743e-06, "loss": 0.3641, "step": 608 }, { "epoch": 16.0, "eval_accuracy": 0.9830065359477125, "eval_error_rate": 0.01699346405228752, "eval_f1": 0.980191060766086, "eval_loss": 0.3042638599872589, "eval_precision": 0.9805310767959324, "eval_recall": 0.9802631578947368, "eval_runtime": 32.0995, "eval_samples_per_second": 4.735, "eval_steps_per_second": 0.592, "eval_top1_accuracy": 0.9802631578947368, "step": 608 }, { "epoch": 17.0, "train_accuracy": 0.9788011695906432 }, { "epoch": 17.0, "grad_norm": 2.5407485961914062, "learning_rate": 9.573684066966612e-06, "loss": 0.2706, "step": 646 }, { "epoch": 17.0, "eval_accuracy": 0.9830065359477125, "eval_error_rate": 0.01699346405228752, "eval_f1": 0.980191060766086, "eval_loss": 0.22474148869514465, "eval_precision": 0.9805310767959324, "eval_recall": 0.9802631578947368, "eval_runtime": 44.9328, "eval_samples_per_second": 3.383, "eval_steps_per_second": 0.423, "eval_top1_accuracy": 0.9802631578947368, "step": 646 }, { "epoch": 18.0, "train_accuracy": 0.9817251461988304 }, { "epoch": 18.0, "grad_norm": 3.097386598587036, "learning_rate": 1.0411031193429937e-05, "loss": 0.1998, "step": 684 }, { "epoch": 18.0, "eval_accuracy": 0.9888888888888889, "eval_error_rate": 0.011111111111111072, "eval_f1": 0.9867701266776593, "eval_loss": 0.17049287259578705, "eval_precision": 0.9872979940891655, "eval_recall": 0.9868421052631579, "eval_runtime": 39.0056, "eval_samples_per_second": 3.897, "eval_steps_per_second": 0.487, "eval_top1_accuracy": 0.9868421052631579, "step": 684 }, { "epoch": 19.0, "train_accuracy": 0.9890350877192983 }, { "epoch": 19.0, "grad_norm": 2.5862479209899902, "learning_rate": 1.1248294243054004e-05, "loss": 0.1446, "step": 722 }, { "epoch": 19.0, "eval_accuracy": 0.9944444444444445, "eval_error_rate": 0.005555555555555536, "eval_f1": 0.99343678755752, "eval_loss": 0.12706294655799866, "eval_precision": 0.9936647173489279, "eval_recall": 0.993421052631579, "eval_runtime": 35.4207, "eval_samples_per_second": 4.291, "eval_steps_per_second": 0.536, "eval_top1_accuracy": 0.993421052631579, "step": 722 }, { "epoch": 20.0, "train_accuracy": 0.9897660818713451 }, { "epoch": 20.0, "grad_norm": 4.520482540130615, "learning_rate": 1.2079091818278531e-05, "loss": 0.1106, "step": 760 }, { "epoch": 20.0, "eval_accuracy": 0.9888888888888889, "eval_error_rate": 0.011111111111111072, "eval_f1": 0.9867701266776593, "eval_loss": 0.1046518012881279, "eval_precision": 0.9872979940891655, "eval_recall": 0.9868421052631579, "eval_runtime": 41.2892, "eval_samples_per_second": 3.681, "eval_steps_per_second": 0.46, "eval_top1_accuracy": 0.9868421052631579, "step": 760 }, { "epoch": 21.0, "train_accuracy": 0.9883040935672515 }, { "epoch": 21.0, "grad_norm": 9.188246726989746, "learning_rate": 1.2897091799679402e-05, "loss": 0.0872, "step": 798 }, { "epoch": 21.0, "eval_accuracy": 0.9944444444444445, "eval_error_rate": 0.005555555555555536, "eval_f1": 0.99343678755752, "eval_loss": 0.0779847577214241, "eval_precision": 0.9936647173489279, "eval_recall": 0.993421052631579, "eval_runtime": 68.0671, "eval_samples_per_second": 2.233, "eval_steps_per_second": 0.279, "eval_top1_accuracy": 0.993421052631579, "step": 798 }, { "epoch": 22.0, "train_accuracy": 0.993421052631579 }, { "epoch": 22.0, "grad_norm": 1.4848785400390625, "learning_rate": 1.3696059607708444e-05, "loss": 0.0614, "step": 836 }, { "epoch": 22.0, "eval_accuracy": 0.9888888888888889, "eval_error_rate": 0.011111111111111072, "eval_f1": 0.9867701266776593, "eval_loss": 0.07387673109769821, "eval_precision": 0.9872979940891655, "eval_recall": 0.9868421052631579, "eval_runtime": 57.9913, "eval_samples_per_second": 2.621, "eval_steps_per_second": 0.328, "eval_top1_accuracy": 0.9868421052631579, "step": 836 }, { "epoch": 23.0, "train_accuracy": 0.9948830409356725 }, { "epoch": 23.0, "grad_norm": 0.7044534683227539, "learning_rate": 1.4469905721010107e-05, "loss": 0.0491, "step": 874 }, { "epoch": 23.0, "eval_accuracy": 0.9944444444444445, "eval_error_rate": 0.005555555555555536, "eval_f1": 0.99343678755752, "eval_loss": 0.05167735368013382, "eval_precision": 0.9936647173489279, "eval_recall": 0.993421052631579, "eval_runtime": 43.1178, "eval_samples_per_second": 3.525, "eval_steps_per_second": 0.441, "eval_top1_accuracy": 0.993421052631579, "step": 874 }, { "epoch": 24.0, "train_accuracy": 0.9956140350877193 }, { "epoch": 24.0, "grad_norm": 4.517460346221924, "learning_rate": 1.5212732089142938e-05, "loss": 0.0365, "step": 912 }, { "epoch": 24.0, "eval_accuracy": 0.9877777777777779, "eval_error_rate": 0.012222222222222134, "eval_f1": 0.9868484170131115, "eval_loss": 0.04010358452796936, "eval_precision": 0.9870857699805068, "eval_recall": 0.9868421052631579, "eval_runtime": 37.7666, "eval_samples_per_second": 4.025, "eval_steps_per_second": 0.503, "eval_top1_accuracy": 0.9868421052631579, "step": 912 }, { "epoch": 25.0, "train_accuracy": 0.9956140350877193 }, { "epoch": 25.0, "grad_norm": 0.25067076086997986, "learning_rate": 1.590081029331129e-05, "loss": 0.0255, "step": 950 }, { "epoch": 25.0, "eval_accuracy": 0.9944444444444445, "eval_error_rate": 0.005555555555555536, "eval_f1": 0.99343678755752, "eval_loss": 0.033598948270082474, "eval_precision": 0.9936647173489279, "eval_recall": 0.993421052631579, "eval_runtime": 41.2598, "eval_samples_per_second": 3.684, "eval_steps_per_second": 0.46, "eval_top1_accuracy": 0.993421052631579, "step": 950 }, { "epoch": 26.0, "train_accuracy": 0.9963450292397661 }, { "epoch": 26.0, "grad_norm": 3.2572007179260254, "learning_rate": 1.65660651368813e-05, "loss": 0.0212, "step": 988 }, { "epoch": 26.0, "eval_accuracy": 0.9888888888888889, "eval_error_rate": 0.011111111111111072, "eval_f1": 0.9867701266776593, "eval_loss": 0.037725575268268585, "eval_precision": 0.9872979940891655, "eval_recall": 0.9868421052631579, "eval_runtime": 39.6757, "eval_samples_per_second": 3.831, "eval_steps_per_second": 0.479, "eval_top1_accuracy": 0.9868421052631579, "step": 988 }, { "epoch": 27.0, "train_accuracy": 0.9956140350877193 }, { "epoch": 27.0, "grad_norm": 0.9294455051422119, "learning_rate": 1.7184323862518377e-05, "loss": 0.0175, "step": 1026 }, { "epoch": 27.0, "eval_accuracy": 0.9944444444444445, "eval_error_rate": 0.005555555555555536, "eval_f1": 0.99343678755752, "eval_loss": 0.019463175907731056, "eval_precision": 0.9936647173489279, "eval_recall": 0.993421052631579, "eval_runtime": 41.079, "eval_samples_per_second": 3.7, "eval_steps_per_second": 0.463, "eval_top1_accuracy": 0.993421052631579, "step": 1026 }, { "epoch": 28.0, "train_accuracy": 0.9978070175438597 }, { "epoch": 28.0, "grad_norm": 3.1146209239959717, "learning_rate": 1.775087426570666e-05, "loss": 0.0125, "step": 1064 }, { "epoch": 28.0, "eval_accuracy": 0.9933333333333334, "eval_error_rate": 0.006666666666666599, "eval_f1": 0.9934103601236665, "eval_loss": 0.02142958901822567, "eval_precision": 0.9935988620199147, "eval_recall": 0.993421052631579, "eval_runtime": 71.7993, "eval_samples_per_second": 2.117, "eval_steps_per_second": 0.265, "eval_top1_accuracy": 0.993421052631579, "step": 1064 }, { "epoch": 29.0, "train_accuracy": 0.9948830409356725 }, { "epoch": 29.0, "grad_norm": 0.6761703491210938, "learning_rate": 1.826139824909036e-05, "loss": 0.0155, "step": 1102 }, { "epoch": 29.0, "eval_accuracy": 1.0, "eval_error_rate": 0.0, "eval_f1": 1.0, "eval_loss": 0.012786287814378738, "eval_precision": 1.0, "eval_recall": 1.0, "eval_runtime": 70.5861, "eval_samples_per_second": 2.153, "eval_steps_per_second": 0.269, "eval_top1_accuracy": 1.0, "step": 1102 }, { "epoch": 30.0, "train_accuracy": 0.9985380116959064 }, { "epoch": 30.0, "grad_norm": 0.24630075693130493, "learning_rate": 1.8712004733869936e-05, "loss": 0.0104, "step": 1140 }, { "epoch": 30.0, "eval_accuracy": 0.9944444444444445, "eval_error_rate": 0.005555555555555536, "eval_f1": 0.99343678755752, "eval_loss": 0.0159451961517334, "eval_precision": 0.9936647173489279, "eval_recall": 0.993421052631579, "eval_runtime": 68.8635, "eval_samples_per_second": 2.207, "eval_steps_per_second": 0.276, "eval_top1_accuracy": 0.993421052631579, "step": 1140 }, { "epoch": 30.0, "step": 1140, "total_flos": 9.167322602550067e+17, "train_loss": 0.6629255272840199, "train_runtime": 13519.4546, "train_samples_per_second": 2.698, "train_steps_per_second": 0.084 } ], "logging_steps": 10, "max_steps": 1140, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 2, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.167322602550067e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }