{ "best_metric": 0.9906542056074766, "best_model_checkpoint": "resnet-50-finetuned-FBark\\checkpoint-198", "epoch": 34.339622641509436, "eval_steps": 500, "global_step": 455, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.98, "step": 13, "train_accuracy": 0.18867924528301888, "train_f1": 0.07866239279216843, "train_loss": 1.6040071249008179, "train_precision": 0.10334148329258355, "train_recall": 0.20698380566801616, "train_runtime": 132.5495, "train_samples_per_second": 3.199, "train_steps_per_second": 0.4 }, { "epoch": 0.98, "grad_norm": 0.8319346904754639, "learning_rate": 8.478260869565217e-05, "loss": 1.6424, "step": 13 }, { "epoch": 0.98, "eval_accuracy": 0.2336448598130841, "eval_f1": 0.10987810004203447, "eval_loss": 1.5959796905517578, "eval_precision": 0.1749174917491749, "eval_recall": 0.22424242424242422, "eval_runtime": 39.1296, "eval_samples_per_second": 2.735, "eval_steps_per_second": 0.358, "step": 13 }, { "epoch": 1.96, "step": 26, "train_accuracy": 0.33962264150943394, "train_f1": 0.24495658674026793, "train_loss": 1.5701098442077637, "train_precision": 0.3021095248242063, "train_recall": 0.3113901059286784, "train_runtime": 127.6225, "train_samples_per_second": 3.322, "train_steps_per_second": 0.415 }, { "epoch": 1.96, "grad_norm": 1.3721247911453247, "learning_rate": 0.00016956521739130433, "loss": 1.621, "step": 26 }, { "epoch": 1.96, "eval_accuracy": 0.411214953271028, "eval_f1": 0.26165216896924215, "eval_loss": 1.546158790588379, "eval_precision": 0.3088888888888889, "eval_recall": 0.3116883116883117, "eval_runtime": 39.3684, "eval_samples_per_second": 2.718, "eval_steps_per_second": 0.356, "step": 26 }, { "epoch": 2.94, "step": 39, "train_accuracy": 0.37264150943396224, "train_f1": 0.2637971738836966, "train_loss": 1.5183203220367432, "train_precision": 0.35881109762129587, "train_recall": 0.34996256447229773, "train_runtime": 132.7347, "train_samples_per_second": 3.194, "train_steps_per_second": 0.399 }, { "epoch": 2.94, "grad_norm": 1.4928213357925415, "learning_rate": 0.00025434782608695647, "loss": 1.567, "step": 39 }, { "epoch": 2.94, "eval_accuracy": 0.4766355140186916, "eval_f1": 0.30495432955791013, "eval_loss": 1.4607292413711548, "eval_precision": 0.3638922888616891, "eval_recall": 0.3748917748917749, "eval_runtime": 41.5534, "eval_samples_per_second": 2.575, "eval_steps_per_second": 0.337, "step": 39 }, { "epoch": 4.0, "step": 53, "train_accuracy": 0.4386792452830189, "train_f1": 0.3491466500711846, "train_loss": 1.346737027168274, "train_precision": 0.7565600797484855, "train_recall": 0.41960074270933767, "train_runtime": 137.4133, "train_samples_per_second": 3.086, "train_steps_per_second": 0.386 }, { "epoch": 4.0, "grad_norm": 1.9480384588241577, "learning_rate": 0.00029486552567237163, "loss": 1.357, "step": 53 }, { "epoch": 4.0, "eval_accuracy": 0.514018691588785, "eval_f1": 0.38211575211575216, "eval_loss": 1.2584657669067383, "eval_precision": 0.8378205128205127, "eval_recall": 0.4251082251082251, "eval_runtime": 43.051, "eval_samples_per_second": 2.485, "eval_steps_per_second": 0.325, "step": 53 }, { "epoch": 4.98, "step": 66, "train_accuracy": 0.6957547169811321, "train_f1": 0.6891574214359025, "train_loss": 1.138260841369629, "train_precision": 0.8253012477718361, "train_recall": 0.6805251227537071, "train_runtime": 126.2306, "train_samples_per_second": 3.359, "train_steps_per_second": 0.42 }, { "epoch": 4.98, "grad_norm": 1.9529035091400146, "learning_rate": 0.0002853300733496332, "loss": 1.3203, "step": 66 }, { "epoch": 4.98, "eval_accuracy": 0.7476635514018691, "eval_f1": 0.7225305110805734, "eval_loss": 1.0523829460144043, "eval_precision": 0.8176507936507935, "eval_recall": 0.7064935064935065, "eval_runtime": 39.1299, "eval_samples_per_second": 2.734, "eval_steps_per_second": 0.358, "step": 66 }, { "epoch": 5.96, "step": 79, "train_accuracy": 0.7594339622641509, "train_f1": 0.7384580979394952, "train_loss": 0.8829485774040222, "train_precision": 0.86050056869729, "train_recall": 0.7432874367985035, "train_runtime": 133.9565, "train_samples_per_second": 3.165, "train_steps_per_second": 0.396 }, { "epoch": 5.96, "grad_norm": 2.4536993503570557, "learning_rate": 0.00027579462102689484, "loss": 1.1706, "step": 79 }, { "epoch": 5.96, "eval_accuracy": 0.794392523364486, "eval_f1": 0.7846877954646693, "eval_loss": 0.8008124828338623, "eval_precision": 0.8854145854145855, "eval_recall": 0.767965367965368, "eval_runtime": 40.731, "eval_samples_per_second": 2.627, "eval_steps_per_second": 0.344, "step": 79 }, { "epoch": 6.94, "step": 92, "train_accuracy": 0.8632075471698113, "train_f1": 0.8590685733770105, "train_loss": 0.7464644312858582, "train_precision": 0.892835269329224, "train_recall": 0.8548507199297516, "train_runtime": 130.0886, "train_samples_per_second": 3.259, "train_steps_per_second": 0.407 }, { "epoch": 6.94, "grad_norm": 2.5925121307373047, "learning_rate": 0.00026625916870415647, "loss": 0.9929, "step": 92 }, { "epoch": 6.94, "eval_accuracy": 0.9158878504672897, "eval_f1": 0.9236075036075035, "eval_loss": 0.6253050565719604, "eval_precision": 0.9368429298864083, "eval_recall": 0.9212121212121211, "eval_runtime": 38.4775, "eval_samples_per_second": 2.781, "eval_steps_per_second": 0.364, "step": 92 }, { "epoch": 8.0, "step": 106, "train_accuracy": 0.9080188679245284, "train_f1": 0.9066600370668294, "train_loss": 0.5274814963340759, "train_precision": 0.9124982372811825, "train_recall": 0.9042126570890489, "train_runtime": 129.4721, "train_samples_per_second": 3.275, "train_steps_per_second": 0.409 }, { "epoch": 8.0, "grad_norm": 2.86348032951355, "learning_rate": 0.0002559902200488997, "loss": 0.7633, "step": 106 }, { "epoch": 8.0, "eval_accuracy": 0.9345794392523364, "eval_f1": 0.9342456932845948, "eval_loss": 0.46806150674819946, "eval_precision": 0.9339420289855072, "eval_recall": 0.9437229437229437, "eval_runtime": 39.4048, "eval_samples_per_second": 2.715, "eval_steps_per_second": 0.355, "step": 106 }, { "epoch": 8.98, "step": 119, "train_accuracy": 0.9363207547169812, "train_f1": 0.9358357324202157, "train_loss": 0.46290820837020874, "train_precision": 0.938202392067757, "train_recall": 0.9342575372358033, "train_runtime": 133.0687, "train_samples_per_second": 3.186, "train_steps_per_second": 0.398 }, { "epoch": 8.98, "grad_norm": 3.642646074295044, "learning_rate": 0.00024645476772616135, "loss": 0.6367, "step": 119 }, { "epoch": 8.98, "eval_accuracy": 0.9158878504672897, "eval_f1": 0.9145165945165944, "eval_loss": 0.3800387382507324, "eval_precision": 0.912056277056277, "eval_recall": 0.9194805194805195, "eval_runtime": 48.5414, "eval_samples_per_second": 2.204, "eval_steps_per_second": 0.288, "step": 119 }, { "epoch": 9.96, "step": 132, "train_accuracy": 0.9410377358490566, "train_f1": 0.9413105716667156, "train_loss": 0.36001139879226685, "train_precision": 0.941837807815579, "train_recall": 0.9410602738360391, "train_runtime": 133.0768, "train_samples_per_second": 3.186, "train_steps_per_second": 0.398 }, { "epoch": 9.96, "grad_norm": 2.934262275695801, "learning_rate": 0.00023691931540342298, "loss": 0.5834, "step": 132 }, { "epoch": 9.96, "eval_accuracy": 0.9532710280373832, "eval_f1": 0.9551136455716718, "eval_loss": 0.26904991269111633, "eval_precision": 0.959457478005865, "eval_recall": 0.9541125541125541, "eval_runtime": 42.7878, "eval_samples_per_second": 2.501, "eval_steps_per_second": 0.327, "step": 132 }, { "epoch": 10.94, "step": 145, "train_accuracy": 0.9504716981132075, "train_f1": 0.9503020748526174, "train_loss": 0.25279200077056885, "train_precision": 0.9508748114630468, "train_recall": 0.9503638914618925, "train_runtime": 134.3359, "train_samples_per_second": 3.156, "train_steps_per_second": 0.395 }, { "epoch": 10.94, "grad_norm": 3.635103464126587, "learning_rate": 0.00022738386308068459, "loss": 0.4842, "step": 145 }, { "epoch": 10.94, "eval_accuracy": 0.9813084112149533, "eval_f1": 0.984659090909091, "eval_loss": 0.16999471187591553, "eval_precision": 0.9826086956521738, "eval_recall": 0.9878787878787879, "eval_runtime": 49.1618, "eval_samples_per_second": 2.176, "eval_steps_per_second": 0.285, "step": 145 }, { "epoch": 12.0, "step": 159, "train_accuracy": 0.9693396226415094, "train_f1": 0.9695757850179305, "train_loss": 0.1881800889968872, "train_precision": 0.9693384564611929, "train_recall": 0.9709818221559601, "train_runtime": 128.8721, "train_samples_per_second": 3.29, "train_steps_per_second": 0.411 }, { "epoch": 12.0, "grad_norm": 2.419642925262451, "learning_rate": 0.00021711491442542784, "loss": 0.4302, "step": 159 }, { "epoch": 12.0, "eval_accuracy": 0.9626168224299065, "eval_f1": 0.9676432095036744, "eval_loss": 0.17427879571914673, "eval_precision": 0.9648221343873518, "eval_recall": 0.9722943722943723, "eval_runtime": 41.3417, "eval_samples_per_second": 2.588, "eval_steps_per_second": 0.339, "step": 159 }, { "epoch": 12.98, "step": 172, "train_accuracy": 0.964622641509434, "train_f1": 0.9651368628644732, "train_loss": 0.1646902710199356, "train_precision": 0.9645891898165841, "train_recall": 0.9665469535253202, "train_runtime": 134.1065, "train_samples_per_second": 3.162, "train_steps_per_second": 0.395 }, { "epoch": 12.98, "grad_norm": 4.919209003448486, "learning_rate": 0.00020757946210268947, "loss": 0.4422, "step": 172 }, { "epoch": 12.98, "eval_accuracy": 0.9719626168224299, "eval_f1": 0.9771428571428572, "eval_loss": 0.13857078552246094, "eval_precision": 0.975, "eval_recall": 0.9818181818181818, "eval_runtime": 39.4778, "eval_samples_per_second": 2.71, "eval_steps_per_second": 0.355, "step": 172 }, { "epoch": 13.96, "step": 185, "train_accuracy": 0.9716981132075472, "train_f1": 0.9709900945487153, "train_loss": 0.13838660717010498, "train_precision": 0.9710717151425976, "train_recall": 0.9710504067284639, "train_runtime": 130.0755, "train_samples_per_second": 3.26, "train_steps_per_second": 0.407 }, { "epoch": 13.96, "grad_norm": 4.021721839904785, "learning_rate": 0.0001980440097799511, "loss": 0.4237, "step": 185 }, { "epoch": 13.96, "eval_accuracy": 0.9626168224299065, "eval_f1": 0.9697054698457223, "eval_loss": 0.12292856723070145, "eval_precision": 0.968, "eval_recall": 0.9757575757575758, "eval_runtime": 39.3533, "eval_samples_per_second": 2.719, "eval_steps_per_second": 0.356, "step": 185 }, { "epoch": 14.94, "step": 198, "train_accuracy": 0.9716981132075472, "train_f1": 0.9707363445629333, "train_loss": 0.15636524558067322, "train_precision": 0.9719248605013513, "train_recall": 0.9701030873944789, "train_runtime": 128.9206, "train_samples_per_second": 3.289, "train_steps_per_second": 0.411 }, { "epoch": 14.94, "grad_norm": 2.6144134998321533, "learning_rate": 0.00018850855745721268, "loss": 0.367, "step": 198 }, { "epoch": 14.94, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.1049351617693901, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 39.5629, "eval_samples_per_second": 2.705, "eval_steps_per_second": 0.354, "step": 198 }, { "epoch": 16.0, "step": 212, "train_accuracy": 0.9858490566037735, "train_f1": 0.9859794210341276, "train_loss": 0.11216574162244797, "train_precision": 0.9859128049064834, "train_recall": 0.986104018607261, "train_runtime": 132.2515, "train_samples_per_second": 3.206, "train_steps_per_second": 0.401 }, { "epoch": 16.0, "grad_norm": 2.7048161029815674, "learning_rate": 0.000178239608801956, "loss": 0.4376, "step": 212 }, { "epoch": 16.0, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.08710027486085892, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 42.0413, "eval_samples_per_second": 2.545, "eval_steps_per_second": 0.333, "step": 212 }, { "epoch": 16.98, "step": 225, "train_accuracy": 0.9787735849056604, "train_f1": 0.978423027691737, "train_loss": 0.10880015045404434, "train_precision": 0.9783562367864693, "train_recall": 0.9791313538827833, "train_runtime": 131.0592, "train_samples_per_second": 3.235, "train_steps_per_second": 0.404 }, { "epoch": 16.98, "grad_norm": 2.9348771572113037, "learning_rate": 0.0001687041564792176, "loss": 0.3638, "step": 225 }, { "epoch": 16.98, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.07979033887386322, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 38.9013, "eval_samples_per_second": 2.751, "eval_steps_per_second": 0.36, "step": 225 }, { "epoch": 17.96, "step": 238, "train_accuracy": 0.9811320754716981, "train_f1": 0.9813568397733909, "train_loss": 0.12247739732265472, "train_precision": 0.9824647159390165, "train_recall": 0.9805006998510924, "train_runtime": 135.9878, "train_samples_per_second": 3.118, "train_steps_per_second": 0.39 }, { "epoch": 17.96, "grad_norm": 2.9127988815307617, "learning_rate": 0.0001591687041564792, "loss": 0.3758, "step": 238 }, { "epoch": 17.96, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.05758798122406006, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 40.1837, "eval_samples_per_second": 2.663, "eval_steps_per_second": 0.348, "step": 238 }, { "epoch": 18.94, "step": 251, "train_accuracy": 0.9858490566037735, "train_f1": 0.9849505768779323, "train_loss": 0.08141080290079117, "train_precision": 0.9856198097123687, "train_recall": 0.9845945870999945, "train_runtime": 131.3411, "train_samples_per_second": 3.228, "train_steps_per_second": 0.404 }, { "epoch": 18.94, "grad_norm": 2.887089252471924, "learning_rate": 0.00014963325183374083, "loss": 0.2759, "step": 251 }, { "epoch": 18.94, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.06044730544090271, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 56.9665, "eval_samples_per_second": 1.878, "eval_steps_per_second": 0.246, "step": 251 }, { "epoch": 20.0, "step": 265, "train_accuracy": 0.9834905660377359, "train_f1": 0.9841577997732367, "train_loss": 0.09728587418794632, "train_precision": 0.9841397108638489, "train_recall": 0.9842617289830912, "train_runtime": 131.2678, "train_samples_per_second": 3.23, "train_steps_per_second": 0.404 }, { "epoch": 20.0, "grad_norm": 2.2641186714172363, "learning_rate": 0.00013936430317848408, "loss": 0.3212, "step": 265 }, { "epoch": 20.0, "eval_accuracy": 0.9813084112149533, "eval_f1": 0.984659090909091, "eval_loss": 0.09081904590129852, "eval_precision": 0.9826086956521738, "eval_recall": 0.9878787878787879, "eval_runtime": 39.7053, "eval_samples_per_second": 2.695, "eval_steps_per_second": 0.353, "step": 265 }, { "epoch": 20.98, "step": 278, "train_accuracy": 0.9740566037735849, "train_f1": 0.9730482239763667, "train_loss": 0.12885905802249908, "train_precision": 0.9755676794807229, "train_recall": 0.9715263957551142, "train_runtime": 129.4684, "train_samples_per_second": 3.275, "train_steps_per_second": 0.409 }, { "epoch": 20.98, "grad_norm": 3.7218384742736816, "learning_rate": 0.00012982885085574571, "loss": 0.3215, "step": 278 }, { "epoch": 20.98, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.08540945500135422, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 39.1648, "eval_samples_per_second": 2.732, "eval_steps_per_second": 0.357, "step": 278 }, { "epoch": 21.96, "step": 291, "train_accuracy": 0.9882075471698113, "train_f1": 0.987640276713749, "train_loss": 0.08325836062431335, "train_precision": 0.9890280836661814, "train_recall": 0.9866258111031001, "train_runtime": 124.9541, "train_samples_per_second": 3.393, "train_steps_per_second": 0.424 }, { "epoch": 21.96, "grad_norm": 5.685765743255615, "learning_rate": 0.00012029339853300733, "loss": 0.3545, "step": 291 }, { "epoch": 21.96, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.07166730612516403, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 39.2775, "eval_samples_per_second": 2.724, "eval_steps_per_second": 0.356, "step": 291 }, { "epoch": 22.94, "step": 304, "train_accuracy": 0.9834905660377359, "train_f1": 0.9840178843462146, "train_loss": 0.09864702820777893, "train_precision": 0.9840027079631041, "train_recall": 0.9844752990764272, "train_runtime": 132.2941, "train_samples_per_second": 3.205, "train_steps_per_second": 0.401 }, { "epoch": 22.94, "grad_norm": 4.6724138259887695, "learning_rate": 0.00011075794621026893, "loss": 0.3085, "step": 304 }, { "epoch": 22.94, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.08209435641765594, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 39.8399, "eval_samples_per_second": 2.686, "eval_steps_per_second": 0.351, "step": 304 }, { "epoch": 24.0, "step": 318, "train_accuracy": 0.9858490566037735, "train_f1": 0.9856773749159446, "train_loss": 0.08332642912864685, "train_precision": 0.9871632432676922, "train_recall": 0.9844707203986204, "train_runtime": 130.1118, "train_samples_per_second": 3.259, "train_steps_per_second": 0.407 }, { "epoch": 24.0, "grad_norm": 4.654536724090576, "learning_rate": 0.00010048899755501222, "loss": 0.2637, "step": 318 }, { "epoch": 24.0, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.0654672160744667, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 39.6356, "eval_samples_per_second": 2.7, "eval_steps_per_second": 0.353, "step": 318 }, { "epoch": 24.98, "step": 331, "train_accuracy": 0.9858490566037735, "train_f1": 0.986589461820231, "train_loss": 0.07109413295984268, "train_precision": 0.9871650821089023, "train_recall": 0.98621216568729, "train_runtime": 128.7284, "train_samples_per_second": 3.294, "train_steps_per_second": 0.412 }, { "epoch": 24.98, "grad_norm": 4.322335243225098, "learning_rate": 9.095354523227383e-05, "loss": 0.2723, "step": 331 }, { "epoch": 24.98, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.06855478882789612, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 38.4718, "eval_samples_per_second": 2.781, "eval_steps_per_second": 0.364, "step": 331 }, { "epoch": 25.96, "step": 344, "train_accuracy": 0.9764150943396226, "train_f1": 0.9756975326292678, "train_loss": 0.09106432646512985, "train_precision": 0.9760480054398313, "train_recall": 0.9753730488175062, "train_runtime": 132.4151, "train_samples_per_second": 3.202, "train_steps_per_second": 0.4 }, { "epoch": 25.96, "grad_norm": 4.0115766525268555, "learning_rate": 8.141809290953544e-05, "loss": 0.36, "step": 344 }, { "epoch": 25.96, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.0726209431886673, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 40.7509, "eval_samples_per_second": 2.626, "eval_steps_per_second": 0.344, "step": 344 }, { "epoch": 26.94, "step": 357, "train_accuracy": 0.9740566037735849, "train_f1": 0.9741101148833959, "train_loss": 0.08721727132797241, "train_precision": 0.9743783993783992, "train_recall": 0.9741037543564772, "train_runtime": 133.2307, "train_samples_per_second": 3.182, "train_steps_per_second": 0.398 }, { "epoch": 26.94, "grad_norm": 2.0709145069122314, "learning_rate": 7.188264058679705e-05, "loss": 0.2535, "step": 357 }, { "epoch": 26.94, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.06701695173978806, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 39.1128, "eval_samples_per_second": 2.736, "eval_steps_per_second": 0.358, "step": 357 }, { "epoch": 28.0, "step": 371, "train_accuracy": 0.9811320754716981, "train_f1": 0.9811807715855693, "train_loss": 0.08411888033151627, "train_precision": 0.9816060153438795, "train_recall": 0.9810544308058603, "train_runtime": 138.1723, "train_samples_per_second": 3.069, "train_steps_per_second": 0.384 }, { "epoch": 28.0, "grad_norm": 2.612853527069092, "learning_rate": 6.161369193154034e-05, "loss": 0.2551, "step": 371 }, { "epoch": 28.0, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.0589648000895977, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 39.9978, "eval_samples_per_second": 2.675, "eval_steps_per_second": 0.35, "step": 371 }, { "epoch": 28.98, "step": 384, "train_accuracy": 0.9764150943396226, "train_f1": 0.9760814191422504, "train_loss": 0.08149362355470657, "train_precision": 0.9773233573176615, "train_recall": 0.9755777028177874, "train_runtime": 158.8587, "train_samples_per_second": 2.669, "train_steps_per_second": 0.334 }, { "epoch": 28.98, "grad_norm": 3.303999662399292, "learning_rate": 5.207823960880195e-05, "loss": 0.3202, "step": 384 }, { "epoch": 28.98, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.05450604483485222, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 40.1368, "eval_samples_per_second": 2.666, "eval_steps_per_second": 0.349, "step": 384 }, { "epoch": 29.96, "step": 397, "train_accuracy": 0.9976415094339622, "train_f1": 0.9975194273245798, "train_loss": 0.05407993122935295, "train_precision": 0.9977011494252874, "train_recall": 0.9973684210526315, "train_runtime": 137.913, "train_samples_per_second": 3.074, "train_steps_per_second": 0.384 }, { "epoch": 29.96, "grad_norm": 2.247615098953247, "learning_rate": 4.2542787286063565e-05, "loss": 0.2714, "step": 397 }, { "epoch": 29.96, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.04375358670949936, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 40.7987, "eval_samples_per_second": 2.623, "eval_steps_per_second": 0.343, "step": 397 }, { "epoch": 30.94, "step": 410, "train_accuracy": 0.9834905660377359, "train_f1": 0.9834196702554923, "train_loss": 0.06718786805868149, "train_precision": 0.9847436600428245, "train_recall": 0.9826715101769175, "train_runtime": 130.6854, "train_samples_per_second": 3.244, "train_steps_per_second": 0.406 }, { "epoch": 30.94, "grad_norm": 4.1858415603637695, "learning_rate": 3.300733496332518e-05, "loss": 0.2362, "step": 410 }, { "epoch": 30.94, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.05211889371275902, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 39.7771, "eval_samples_per_second": 2.69, "eval_steps_per_second": 0.352, "step": 410 }, { "epoch": 32.0, "step": 424, "train_accuracy": 0.9834905660377359, "train_f1": 0.9831986473322998, "train_loss": 0.09282960742712021, "train_precision": 0.9842538190364276, "train_recall": 0.9827365778544793, "train_runtime": 128.6901, "train_samples_per_second": 3.295, "train_steps_per_second": 0.412 }, { "epoch": 32.0, "grad_norm": 3.333651542663574, "learning_rate": 2.273838630806846e-05, "loss": 0.2693, "step": 424 }, { "epoch": 32.0, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.03920552134513855, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 39.2034, "eval_samples_per_second": 2.729, "eval_steps_per_second": 0.357, "step": 424 }, { "epoch": 32.98, "step": 437, "train_accuracy": 0.9834905660377359, "train_f1": 0.9830305559415786, "train_loss": 0.06464195251464844, "train_precision": 0.9849462365591398, "train_recall": 0.9821793023126837, "train_runtime": 132.3464, "train_samples_per_second": 3.204, "train_steps_per_second": 0.4 }, { "epoch": 32.98, "grad_norm": 4.870348930358887, "learning_rate": 1.3202933985330072e-05, "loss": 0.2644, "step": 437 }, { "epoch": 32.98, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.06383071094751358, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 39.8229, "eval_samples_per_second": 2.687, "eval_steps_per_second": 0.352, "step": 437 }, { "epoch": 33.96, "step": 450, "train_accuracy": 0.9834905660377359, "train_f1": 0.983306080394328, "train_loss": 0.09585532546043396, "train_precision": 0.9838161838161839, "train_recall": 0.983380012201209, "train_runtime": 133.1315, "train_samples_per_second": 3.185, "train_steps_per_second": 0.398 }, { "epoch": 33.96, "grad_norm": 3.289733409881592, "learning_rate": 3.667481662591687e-06, "loss": 0.2516, "step": 450 }, { "epoch": 33.96, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.04781457036733627, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 40.0245, "eval_samples_per_second": 2.673, "eval_steps_per_second": 0.35, "step": 450 }, { "epoch": 34.34, "step": 455, "train_accuracy": 0.9740566037735849, "train_f1": 0.9731839886499556, "train_loss": 0.0982045829296112, "train_precision": 0.9733410852713178, "train_recall": 0.9733575444357457, "train_runtime": 130.0436, "train_samples_per_second": 3.26, "train_steps_per_second": 0.408 }, { "epoch": 34.34, "grad_norm": 2.205134630203247, "learning_rate": 0.0, "loss": 0.2652, "step": 455 }, { "epoch": 34.34, "eval_accuracy": 0.9906542056074766, "eval_f1": 0.9922719141323793, "eval_loss": 0.0579226091504097, "eval_precision": 0.990909090909091, "eval_recall": 0.9939393939393939, "eval_runtime": 39.7063, "eval_samples_per_second": 2.695, "eval_steps_per_second": 0.353, "step": 455 }, { "epoch": 34.34, "step": 455, "total_flos": 3.0926830773436416e+17, "train_loss": 0.571298942723117, "train_runtime": 12111.8963, "train_samples_per_second": 1.225, "train_steps_per_second": 0.038 }, { "epoch": 34.34, "step": 455, "total_flos": 3.0926830773436416e+17, "train_loss": 0.0, "train_runtime": 0.0155, "train_samples_per_second": 955313.811, "train_steps_per_second": 29290.282 } ], "logging_steps": 50, "max_steps": 455, "num_input_tokens_seen": 0, "num_train_epochs": 35, "save_steps": 500, "total_flos": 3.0926830773436416e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }