resnet-50-finetuned-FBark / trainer_state.json
alyzbane's picture
End of training
cc2baeb verified
raw
history blame
33.6 kB
{
"best_metric": 0.9906542056074766,
"best_model_checkpoint": "resnet-50-finetuned-FBark\\checkpoint-198",
"epoch": 34.339622641509436,
"eval_steps": 500,
"global_step": 455,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.98,
"step": 13,
"train_accuracy": 0.18867924528301888,
"train_f1": 0.07866239279216843,
"train_loss": 1.6040071249008179,
"train_precision": 0.10334148329258355,
"train_recall": 0.20698380566801616,
"train_runtime": 132.5495,
"train_samples_per_second": 3.199,
"train_steps_per_second": 0.4
},
{
"epoch": 0.98,
"grad_norm": 0.8319346904754639,
"learning_rate": 8.478260869565217e-05,
"loss": 1.6424,
"step": 13
},
{
"epoch": 0.98,
"eval_accuracy": 0.2336448598130841,
"eval_f1": 0.10987810004203447,
"eval_loss": 1.5959796905517578,
"eval_precision": 0.1749174917491749,
"eval_recall": 0.22424242424242422,
"eval_runtime": 39.1296,
"eval_samples_per_second": 2.735,
"eval_steps_per_second": 0.358,
"step": 13
},
{
"epoch": 1.96,
"step": 26,
"train_accuracy": 0.33962264150943394,
"train_f1": 0.24495658674026793,
"train_loss": 1.5701098442077637,
"train_precision": 0.3021095248242063,
"train_recall": 0.3113901059286784,
"train_runtime": 127.6225,
"train_samples_per_second": 3.322,
"train_steps_per_second": 0.415
},
{
"epoch": 1.96,
"grad_norm": 1.3721247911453247,
"learning_rate": 0.00016956521739130433,
"loss": 1.621,
"step": 26
},
{
"epoch": 1.96,
"eval_accuracy": 0.411214953271028,
"eval_f1": 0.26165216896924215,
"eval_loss": 1.546158790588379,
"eval_precision": 0.3088888888888889,
"eval_recall": 0.3116883116883117,
"eval_runtime": 39.3684,
"eval_samples_per_second": 2.718,
"eval_steps_per_second": 0.356,
"step": 26
},
{
"epoch": 2.94,
"step": 39,
"train_accuracy": 0.37264150943396224,
"train_f1": 0.2637971738836966,
"train_loss": 1.5183203220367432,
"train_precision": 0.35881109762129587,
"train_recall": 0.34996256447229773,
"train_runtime": 132.7347,
"train_samples_per_second": 3.194,
"train_steps_per_second": 0.399
},
{
"epoch": 2.94,
"grad_norm": 1.4928213357925415,
"learning_rate": 0.00025434782608695647,
"loss": 1.567,
"step": 39
},
{
"epoch": 2.94,
"eval_accuracy": 0.4766355140186916,
"eval_f1": 0.30495432955791013,
"eval_loss": 1.4607292413711548,
"eval_precision": 0.3638922888616891,
"eval_recall": 0.3748917748917749,
"eval_runtime": 41.5534,
"eval_samples_per_second": 2.575,
"eval_steps_per_second": 0.337,
"step": 39
},
{
"epoch": 4.0,
"step": 53,
"train_accuracy": 0.4386792452830189,
"train_f1": 0.3491466500711846,
"train_loss": 1.346737027168274,
"train_precision": 0.7565600797484855,
"train_recall": 0.41960074270933767,
"train_runtime": 137.4133,
"train_samples_per_second": 3.086,
"train_steps_per_second": 0.386
},
{
"epoch": 4.0,
"grad_norm": 1.9480384588241577,
"learning_rate": 0.00029486552567237163,
"loss": 1.357,
"step": 53
},
{
"epoch": 4.0,
"eval_accuracy": 0.514018691588785,
"eval_f1": 0.38211575211575216,
"eval_loss": 1.2584657669067383,
"eval_precision": 0.8378205128205127,
"eval_recall": 0.4251082251082251,
"eval_runtime": 43.051,
"eval_samples_per_second": 2.485,
"eval_steps_per_second": 0.325,
"step": 53
},
{
"epoch": 4.98,
"step": 66,
"train_accuracy": 0.6957547169811321,
"train_f1": 0.6891574214359025,
"train_loss": 1.138260841369629,
"train_precision": 0.8253012477718361,
"train_recall": 0.6805251227537071,
"train_runtime": 126.2306,
"train_samples_per_second": 3.359,
"train_steps_per_second": 0.42
},
{
"epoch": 4.98,
"grad_norm": 1.9529035091400146,
"learning_rate": 0.0002853300733496332,
"loss": 1.3203,
"step": 66
},
{
"epoch": 4.98,
"eval_accuracy": 0.7476635514018691,
"eval_f1": 0.7225305110805734,
"eval_loss": 1.0523829460144043,
"eval_precision": 0.8176507936507935,
"eval_recall": 0.7064935064935065,
"eval_runtime": 39.1299,
"eval_samples_per_second": 2.734,
"eval_steps_per_second": 0.358,
"step": 66
},
{
"epoch": 5.96,
"step": 79,
"train_accuracy": 0.7594339622641509,
"train_f1": 0.7384580979394952,
"train_loss": 0.8829485774040222,
"train_precision": 0.86050056869729,
"train_recall": 0.7432874367985035,
"train_runtime": 133.9565,
"train_samples_per_second": 3.165,
"train_steps_per_second": 0.396
},
{
"epoch": 5.96,
"grad_norm": 2.4536993503570557,
"learning_rate": 0.00027579462102689484,
"loss": 1.1706,
"step": 79
},
{
"epoch": 5.96,
"eval_accuracy": 0.794392523364486,
"eval_f1": 0.7846877954646693,
"eval_loss": 0.8008124828338623,
"eval_precision": 0.8854145854145855,
"eval_recall": 0.767965367965368,
"eval_runtime": 40.731,
"eval_samples_per_second": 2.627,
"eval_steps_per_second": 0.344,
"step": 79
},
{
"epoch": 6.94,
"step": 92,
"train_accuracy": 0.8632075471698113,
"train_f1": 0.8590685733770105,
"train_loss": 0.7464644312858582,
"train_precision": 0.892835269329224,
"train_recall": 0.8548507199297516,
"train_runtime": 130.0886,
"train_samples_per_second": 3.259,
"train_steps_per_second": 0.407
},
{
"epoch": 6.94,
"grad_norm": 2.5925121307373047,
"learning_rate": 0.00026625916870415647,
"loss": 0.9929,
"step": 92
},
{
"epoch": 6.94,
"eval_accuracy": 0.9158878504672897,
"eval_f1": 0.9236075036075035,
"eval_loss": 0.6253050565719604,
"eval_precision": 0.9368429298864083,
"eval_recall": 0.9212121212121211,
"eval_runtime": 38.4775,
"eval_samples_per_second": 2.781,
"eval_steps_per_second": 0.364,
"step": 92
},
{
"epoch": 8.0,
"step": 106,
"train_accuracy": 0.9080188679245284,
"train_f1": 0.9066600370668294,
"train_loss": 0.5274814963340759,
"train_precision": 0.9124982372811825,
"train_recall": 0.9042126570890489,
"train_runtime": 129.4721,
"train_samples_per_second": 3.275,
"train_steps_per_second": 0.409
},
{
"epoch": 8.0,
"grad_norm": 2.86348032951355,
"learning_rate": 0.0002559902200488997,
"loss": 0.7633,
"step": 106
},
{
"epoch": 8.0,
"eval_accuracy": 0.9345794392523364,
"eval_f1": 0.9342456932845948,
"eval_loss": 0.46806150674819946,
"eval_precision": 0.9339420289855072,
"eval_recall": 0.9437229437229437,
"eval_runtime": 39.4048,
"eval_samples_per_second": 2.715,
"eval_steps_per_second": 0.355,
"step": 106
},
{
"epoch": 8.98,
"step": 119,
"train_accuracy": 0.9363207547169812,
"train_f1": 0.9358357324202157,
"train_loss": 0.46290820837020874,
"train_precision": 0.938202392067757,
"train_recall": 0.9342575372358033,
"train_runtime": 133.0687,
"train_samples_per_second": 3.186,
"train_steps_per_second": 0.398
},
{
"epoch": 8.98,
"grad_norm": 3.642646074295044,
"learning_rate": 0.00024645476772616135,
"loss": 0.6367,
"step": 119
},
{
"epoch": 8.98,
"eval_accuracy": 0.9158878504672897,
"eval_f1": 0.9145165945165944,
"eval_loss": 0.3800387382507324,
"eval_precision": 0.912056277056277,
"eval_recall": 0.9194805194805195,
"eval_runtime": 48.5414,
"eval_samples_per_second": 2.204,
"eval_steps_per_second": 0.288,
"step": 119
},
{
"epoch": 9.96,
"step": 132,
"train_accuracy": 0.9410377358490566,
"train_f1": 0.9413105716667156,
"train_loss": 0.36001139879226685,
"train_precision": 0.941837807815579,
"train_recall": 0.9410602738360391,
"train_runtime": 133.0768,
"train_samples_per_second": 3.186,
"train_steps_per_second": 0.398
},
{
"epoch": 9.96,
"grad_norm": 2.934262275695801,
"learning_rate": 0.00023691931540342298,
"loss": 0.5834,
"step": 132
},
{
"epoch": 9.96,
"eval_accuracy": 0.9532710280373832,
"eval_f1": 0.9551136455716718,
"eval_loss": 0.26904991269111633,
"eval_precision": 0.959457478005865,
"eval_recall": 0.9541125541125541,
"eval_runtime": 42.7878,
"eval_samples_per_second": 2.501,
"eval_steps_per_second": 0.327,
"step": 132
},
{
"epoch": 10.94,
"step": 145,
"train_accuracy": 0.9504716981132075,
"train_f1": 0.9503020748526174,
"train_loss": 0.25279200077056885,
"train_precision": 0.9508748114630468,
"train_recall": 0.9503638914618925,
"train_runtime": 134.3359,
"train_samples_per_second": 3.156,
"train_steps_per_second": 0.395
},
{
"epoch": 10.94,
"grad_norm": 3.635103464126587,
"learning_rate": 0.00022738386308068459,
"loss": 0.4842,
"step": 145
},
{
"epoch": 10.94,
"eval_accuracy": 0.9813084112149533,
"eval_f1": 0.984659090909091,
"eval_loss": 0.16999471187591553,
"eval_precision": 0.9826086956521738,
"eval_recall": 0.9878787878787879,
"eval_runtime": 49.1618,
"eval_samples_per_second": 2.176,
"eval_steps_per_second": 0.285,
"step": 145
},
{
"epoch": 12.0,
"step": 159,
"train_accuracy": 0.9693396226415094,
"train_f1": 0.9695757850179305,
"train_loss": 0.1881800889968872,
"train_precision": 0.9693384564611929,
"train_recall": 0.9709818221559601,
"train_runtime": 128.8721,
"train_samples_per_second": 3.29,
"train_steps_per_second": 0.411
},
{
"epoch": 12.0,
"grad_norm": 2.419642925262451,
"learning_rate": 0.00021711491442542784,
"loss": 0.4302,
"step": 159
},
{
"epoch": 12.0,
"eval_accuracy": 0.9626168224299065,
"eval_f1": 0.9676432095036744,
"eval_loss": 0.17427879571914673,
"eval_precision": 0.9648221343873518,
"eval_recall": 0.9722943722943723,
"eval_runtime": 41.3417,
"eval_samples_per_second": 2.588,
"eval_steps_per_second": 0.339,
"step": 159
},
{
"epoch": 12.98,
"step": 172,
"train_accuracy": 0.964622641509434,
"train_f1": 0.9651368628644732,
"train_loss": 0.1646902710199356,
"train_precision": 0.9645891898165841,
"train_recall": 0.9665469535253202,
"train_runtime": 134.1065,
"train_samples_per_second": 3.162,
"train_steps_per_second": 0.395
},
{
"epoch": 12.98,
"grad_norm": 4.919209003448486,
"learning_rate": 0.00020757946210268947,
"loss": 0.4422,
"step": 172
},
{
"epoch": 12.98,
"eval_accuracy": 0.9719626168224299,
"eval_f1": 0.9771428571428572,
"eval_loss": 0.13857078552246094,
"eval_precision": 0.975,
"eval_recall": 0.9818181818181818,
"eval_runtime": 39.4778,
"eval_samples_per_second": 2.71,
"eval_steps_per_second": 0.355,
"step": 172
},
{
"epoch": 13.96,
"step": 185,
"train_accuracy": 0.9716981132075472,
"train_f1": 0.9709900945487153,
"train_loss": 0.13838660717010498,
"train_precision": 0.9710717151425976,
"train_recall": 0.9710504067284639,
"train_runtime": 130.0755,
"train_samples_per_second": 3.26,
"train_steps_per_second": 0.407
},
{
"epoch": 13.96,
"grad_norm": 4.021721839904785,
"learning_rate": 0.0001980440097799511,
"loss": 0.4237,
"step": 185
},
{
"epoch": 13.96,
"eval_accuracy": 0.9626168224299065,
"eval_f1": 0.9697054698457223,
"eval_loss": 0.12292856723070145,
"eval_precision": 0.968,
"eval_recall": 0.9757575757575758,
"eval_runtime": 39.3533,
"eval_samples_per_second": 2.719,
"eval_steps_per_second": 0.356,
"step": 185
},
{
"epoch": 14.94,
"step": 198,
"train_accuracy": 0.9716981132075472,
"train_f1": 0.9707363445629333,
"train_loss": 0.15636524558067322,
"train_precision": 0.9719248605013513,
"train_recall": 0.9701030873944789,
"train_runtime": 128.9206,
"train_samples_per_second": 3.289,
"train_steps_per_second": 0.411
},
{
"epoch": 14.94,
"grad_norm": 2.6144134998321533,
"learning_rate": 0.00018850855745721268,
"loss": 0.367,
"step": 198
},
{
"epoch": 14.94,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.1049351617693901,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 39.5629,
"eval_samples_per_second": 2.705,
"eval_steps_per_second": 0.354,
"step": 198
},
{
"epoch": 16.0,
"step": 212,
"train_accuracy": 0.9858490566037735,
"train_f1": 0.9859794210341276,
"train_loss": 0.11216574162244797,
"train_precision": 0.9859128049064834,
"train_recall": 0.986104018607261,
"train_runtime": 132.2515,
"train_samples_per_second": 3.206,
"train_steps_per_second": 0.401
},
{
"epoch": 16.0,
"grad_norm": 2.7048161029815674,
"learning_rate": 0.000178239608801956,
"loss": 0.4376,
"step": 212
},
{
"epoch": 16.0,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.08710027486085892,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 42.0413,
"eval_samples_per_second": 2.545,
"eval_steps_per_second": 0.333,
"step": 212
},
{
"epoch": 16.98,
"step": 225,
"train_accuracy": 0.9787735849056604,
"train_f1": 0.978423027691737,
"train_loss": 0.10880015045404434,
"train_precision": 0.9783562367864693,
"train_recall": 0.9791313538827833,
"train_runtime": 131.0592,
"train_samples_per_second": 3.235,
"train_steps_per_second": 0.404
},
{
"epoch": 16.98,
"grad_norm": 2.9348771572113037,
"learning_rate": 0.0001687041564792176,
"loss": 0.3638,
"step": 225
},
{
"epoch": 16.98,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.07979033887386322,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 38.9013,
"eval_samples_per_second": 2.751,
"eval_steps_per_second": 0.36,
"step": 225
},
{
"epoch": 17.96,
"step": 238,
"train_accuracy": 0.9811320754716981,
"train_f1": 0.9813568397733909,
"train_loss": 0.12247739732265472,
"train_precision": 0.9824647159390165,
"train_recall": 0.9805006998510924,
"train_runtime": 135.9878,
"train_samples_per_second": 3.118,
"train_steps_per_second": 0.39
},
{
"epoch": 17.96,
"grad_norm": 2.9127988815307617,
"learning_rate": 0.0001591687041564792,
"loss": 0.3758,
"step": 238
},
{
"epoch": 17.96,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.05758798122406006,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 40.1837,
"eval_samples_per_second": 2.663,
"eval_steps_per_second": 0.348,
"step": 238
},
{
"epoch": 18.94,
"step": 251,
"train_accuracy": 0.9858490566037735,
"train_f1": 0.9849505768779323,
"train_loss": 0.08141080290079117,
"train_precision": 0.9856198097123687,
"train_recall": 0.9845945870999945,
"train_runtime": 131.3411,
"train_samples_per_second": 3.228,
"train_steps_per_second": 0.404
},
{
"epoch": 18.94,
"grad_norm": 2.887089252471924,
"learning_rate": 0.00014963325183374083,
"loss": 0.2759,
"step": 251
},
{
"epoch": 18.94,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.06044730544090271,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 56.9665,
"eval_samples_per_second": 1.878,
"eval_steps_per_second": 0.246,
"step": 251
},
{
"epoch": 20.0,
"step": 265,
"train_accuracy": 0.9834905660377359,
"train_f1": 0.9841577997732367,
"train_loss": 0.09728587418794632,
"train_precision": 0.9841397108638489,
"train_recall": 0.9842617289830912,
"train_runtime": 131.2678,
"train_samples_per_second": 3.23,
"train_steps_per_second": 0.404
},
{
"epoch": 20.0,
"grad_norm": 2.2641186714172363,
"learning_rate": 0.00013936430317848408,
"loss": 0.3212,
"step": 265
},
{
"epoch": 20.0,
"eval_accuracy": 0.9813084112149533,
"eval_f1": 0.984659090909091,
"eval_loss": 0.09081904590129852,
"eval_precision": 0.9826086956521738,
"eval_recall": 0.9878787878787879,
"eval_runtime": 39.7053,
"eval_samples_per_second": 2.695,
"eval_steps_per_second": 0.353,
"step": 265
},
{
"epoch": 20.98,
"step": 278,
"train_accuracy": 0.9740566037735849,
"train_f1": 0.9730482239763667,
"train_loss": 0.12885905802249908,
"train_precision": 0.9755676794807229,
"train_recall": 0.9715263957551142,
"train_runtime": 129.4684,
"train_samples_per_second": 3.275,
"train_steps_per_second": 0.409
},
{
"epoch": 20.98,
"grad_norm": 3.7218384742736816,
"learning_rate": 0.00012982885085574571,
"loss": 0.3215,
"step": 278
},
{
"epoch": 20.98,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.08540945500135422,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 39.1648,
"eval_samples_per_second": 2.732,
"eval_steps_per_second": 0.357,
"step": 278
},
{
"epoch": 21.96,
"step": 291,
"train_accuracy": 0.9882075471698113,
"train_f1": 0.987640276713749,
"train_loss": 0.08325836062431335,
"train_precision": 0.9890280836661814,
"train_recall": 0.9866258111031001,
"train_runtime": 124.9541,
"train_samples_per_second": 3.393,
"train_steps_per_second": 0.424
},
{
"epoch": 21.96,
"grad_norm": 5.685765743255615,
"learning_rate": 0.00012029339853300733,
"loss": 0.3545,
"step": 291
},
{
"epoch": 21.96,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.07166730612516403,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 39.2775,
"eval_samples_per_second": 2.724,
"eval_steps_per_second": 0.356,
"step": 291
},
{
"epoch": 22.94,
"step": 304,
"train_accuracy": 0.9834905660377359,
"train_f1": 0.9840178843462146,
"train_loss": 0.09864702820777893,
"train_precision": 0.9840027079631041,
"train_recall": 0.9844752990764272,
"train_runtime": 132.2941,
"train_samples_per_second": 3.205,
"train_steps_per_second": 0.401
},
{
"epoch": 22.94,
"grad_norm": 4.6724138259887695,
"learning_rate": 0.00011075794621026893,
"loss": 0.3085,
"step": 304
},
{
"epoch": 22.94,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.08209435641765594,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 39.8399,
"eval_samples_per_second": 2.686,
"eval_steps_per_second": 0.351,
"step": 304
},
{
"epoch": 24.0,
"step": 318,
"train_accuracy": 0.9858490566037735,
"train_f1": 0.9856773749159446,
"train_loss": 0.08332642912864685,
"train_precision": 0.9871632432676922,
"train_recall": 0.9844707203986204,
"train_runtime": 130.1118,
"train_samples_per_second": 3.259,
"train_steps_per_second": 0.407
},
{
"epoch": 24.0,
"grad_norm": 4.654536724090576,
"learning_rate": 0.00010048899755501222,
"loss": 0.2637,
"step": 318
},
{
"epoch": 24.0,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.0654672160744667,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 39.6356,
"eval_samples_per_second": 2.7,
"eval_steps_per_second": 0.353,
"step": 318
},
{
"epoch": 24.98,
"step": 331,
"train_accuracy": 0.9858490566037735,
"train_f1": 0.986589461820231,
"train_loss": 0.07109413295984268,
"train_precision": 0.9871650821089023,
"train_recall": 0.98621216568729,
"train_runtime": 128.7284,
"train_samples_per_second": 3.294,
"train_steps_per_second": 0.412
},
{
"epoch": 24.98,
"grad_norm": 4.322335243225098,
"learning_rate": 9.095354523227383e-05,
"loss": 0.2723,
"step": 331
},
{
"epoch": 24.98,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.06855478882789612,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 38.4718,
"eval_samples_per_second": 2.781,
"eval_steps_per_second": 0.364,
"step": 331
},
{
"epoch": 25.96,
"step": 344,
"train_accuracy": 0.9764150943396226,
"train_f1": 0.9756975326292678,
"train_loss": 0.09106432646512985,
"train_precision": 0.9760480054398313,
"train_recall": 0.9753730488175062,
"train_runtime": 132.4151,
"train_samples_per_second": 3.202,
"train_steps_per_second": 0.4
},
{
"epoch": 25.96,
"grad_norm": 4.0115766525268555,
"learning_rate": 8.141809290953544e-05,
"loss": 0.36,
"step": 344
},
{
"epoch": 25.96,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.0726209431886673,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 40.7509,
"eval_samples_per_second": 2.626,
"eval_steps_per_second": 0.344,
"step": 344
},
{
"epoch": 26.94,
"step": 357,
"train_accuracy": 0.9740566037735849,
"train_f1": 0.9741101148833959,
"train_loss": 0.08721727132797241,
"train_precision": 0.9743783993783992,
"train_recall": 0.9741037543564772,
"train_runtime": 133.2307,
"train_samples_per_second": 3.182,
"train_steps_per_second": 0.398
},
{
"epoch": 26.94,
"grad_norm": 2.0709145069122314,
"learning_rate": 7.188264058679705e-05,
"loss": 0.2535,
"step": 357
},
{
"epoch": 26.94,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.06701695173978806,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 39.1128,
"eval_samples_per_second": 2.736,
"eval_steps_per_second": 0.358,
"step": 357
},
{
"epoch": 28.0,
"step": 371,
"train_accuracy": 0.9811320754716981,
"train_f1": 0.9811807715855693,
"train_loss": 0.08411888033151627,
"train_precision": 0.9816060153438795,
"train_recall": 0.9810544308058603,
"train_runtime": 138.1723,
"train_samples_per_second": 3.069,
"train_steps_per_second": 0.384
},
{
"epoch": 28.0,
"grad_norm": 2.612853527069092,
"learning_rate": 6.161369193154034e-05,
"loss": 0.2551,
"step": 371
},
{
"epoch": 28.0,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.0589648000895977,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 39.9978,
"eval_samples_per_second": 2.675,
"eval_steps_per_second": 0.35,
"step": 371
},
{
"epoch": 28.98,
"step": 384,
"train_accuracy": 0.9764150943396226,
"train_f1": 0.9760814191422504,
"train_loss": 0.08149362355470657,
"train_precision": 0.9773233573176615,
"train_recall": 0.9755777028177874,
"train_runtime": 158.8587,
"train_samples_per_second": 2.669,
"train_steps_per_second": 0.334
},
{
"epoch": 28.98,
"grad_norm": 3.303999662399292,
"learning_rate": 5.207823960880195e-05,
"loss": 0.3202,
"step": 384
},
{
"epoch": 28.98,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.05450604483485222,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 40.1368,
"eval_samples_per_second": 2.666,
"eval_steps_per_second": 0.349,
"step": 384
},
{
"epoch": 29.96,
"step": 397,
"train_accuracy": 0.9976415094339622,
"train_f1": 0.9975194273245798,
"train_loss": 0.05407993122935295,
"train_precision": 0.9977011494252874,
"train_recall": 0.9973684210526315,
"train_runtime": 137.913,
"train_samples_per_second": 3.074,
"train_steps_per_second": 0.384
},
{
"epoch": 29.96,
"grad_norm": 2.247615098953247,
"learning_rate": 4.2542787286063565e-05,
"loss": 0.2714,
"step": 397
},
{
"epoch": 29.96,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.04375358670949936,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 40.7987,
"eval_samples_per_second": 2.623,
"eval_steps_per_second": 0.343,
"step": 397
},
{
"epoch": 30.94,
"step": 410,
"train_accuracy": 0.9834905660377359,
"train_f1": 0.9834196702554923,
"train_loss": 0.06718786805868149,
"train_precision": 0.9847436600428245,
"train_recall": 0.9826715101769175,
"train_runtime": 130.6854,
"train_samples_per_second": 3.244,
"train_steps_per_second": 0.406
},
{
"epoch": 30.94,
"grad_norm": 4.1858415603637695,
"learning_rate": 3.300733496332518e-05,
"loss": 0.2362,
"step": 410
},
{
"epoch": 30.94,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.05211889371275902,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 39.7771,
"eval_samples_per_second": 2.69,
"eval_steps_per_second": 0.352,
"step": 410
},
{
"epoch": 32.0,
"step": 424,
"train_accuracy": 0.9834905660377359,
"train_f1": 0.9831986473322998,
"train_loss": 0.09282960742712021,
"train_precision": 0.9842538190364276,
"train_recall": 0.9827365778544793,
"train_runtime": 128.6901,
"train_samples_per_second": 3.295,
"train_steps_per_second": 0.412
},
{
"epoch": 32.0,
"grad_norm": 3.333651542663574,
"learning_rate": 2.273838630806846e-05,
"loss": 0.2693,
"step": 424
},
{
"epoch": 32.0,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.03920552134513855,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 39.2034,
"eval_samples_per_second": 2.729,
"eval_steps_per_second": 0.357,
"step": 424
},
{
"epoch": 32.98,
"step": 437,
"train_accuracy": 0.9834905660377359,
"train_f1": 0.9830305559415786,
"train_loss": 0.06464195251464844,
"train_precision": 0.9849462365591398,
"train_recall": 0.9821793023126837,
"train_runtime": 132.3464,
"train_samples_per_second": 3.204,
"train_steps_per_second": 0.4
},
{
"epoch": 32.98,
"grad_norm": 4.870348930358887,
"learning_rate": 1.3202933985330072e-05,
"loss": 0.2644,
"step": 437
},
{
"epoch": 32.98,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.06383071094751358,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 39.8229,
"eval_samples_per_second": 2.687,
"eval_steps_per_second": 0.352,
"step": 437
},
{
"epoch": 33.96,
"step": 450,
"train_accuracy": 0.9834905660377359,
"train_f1": 0.983306080394328,
"train_loss": 0.09585532546043396,
"train_precision": 0.9838161838161839,
"train_recall": 0.983380012201209,
"train_runtime": 133.1315,
"train_samples_per_second": 3.185,
"train_steps_per_second": 0.398
},
{
"epoch": 33.96,
"grad_norm": 3.289733409881592,
"learning_rate": 3.667481662591687e-06,
"loss": 0.2516,
"step": 450
},
{
"epoch": 33.96,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.04781457036733627,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 40.0245,
"eval_samples_per_second": 2.673,
"eval_steps_per_second": 0.35,
"step": 450
},
{
"epoch": 34.34,
"step": 455,
"train_accuracy": 0.9740566037735849,
"train_f1": 0.9731839886499556,
"train_loss": 0.0982045829296112,
"train_precision": 0.9733410852713178,
"train_recall": 0.9733575444357457,
"train_runtime": 130.0436,
"train_samples_per_second": 3.26,
"train_steps_per_second": 0.408
},
{
"epoch": 34.34,
"grad_norm": 2.205134630203247,
"learning_rate": 0.0,
"loss": 0.2652,
"step": 455
},
{
"epoch": 34.34,
"eval_accuracy": 0.9906542056074766,
"eval_f1": 0.9922719141323793,
"eval_loss": 0.0579226091504097,
"eval_precision": 0.990909090909091,
"eval_recall": 0.9939393939393939,
"eval_runtime": 39.7063,
"eval_samples_per_second": 2.695,
"eval_steps_per_second": 0.353,
"step": 455
},
{
"epoch": 34.34,
"step": 455,
"total_flos": 3.0926830773436416e+17,
"train_loss": 0.571298942723117,
"train_runtime": 12111.8963,
"train_samples_per_second": 1.225,
"train_steps_per_second": 0.038
},
{
"epoch": 34.34,
"step": 455,
"total_flos": 3.0926830773436416e+17,
"train_loss": 0.0,
"train_runtime": 0.0155,
"train_samples_per_second": 955313.811,
"train_steps_per_second": 29290.282
}
],
"logging_steps": 50,
"max_steps": 455,
"num_input_tokens_seen": 0,
"num_train_epochs": 35,
"save_steps": 500,
"total_flos": 3.0926830773436416e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}