{ "best_metric": 0.022257013246417046, "best_model_checkpoint": "/kaggle/working/output/checkpoint-150", "epoch": 42.857142857142854, "eval_steps": 500, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8571428571428571, "eval_LCC": -0.38242422405954946, "eval_SROCC": -0.3890036014405762, "eval_loss": 0.2948198914527893, "eval_runtime": 37.5549, "eval_samples_per_second": 1.331, "eval_steps_per_second": 0.053, "step": 3 }, { "epoch": 2.0, "eval_LCC": -0.3732136136633078, "eval_SROCC": -0.36653061224489797, "eval_loss": 0.11433681845664978, "eval_runtime": 35.6391, "eval_samples_per_second": 1.403, "eval_steps_per_second": 0.056, "step": 7 }, { "epoch": 2.857142857142857, "grad_norm": 4.934920310974121, "learning_rate": 6.666666666666667e-06, "loss": 0.1552, "step": 10 }, { "epoch": 2.857142857142857, "eval_LCC": -0.3657486170203663, "eval_SROCC": -0.3477070828331333, "eval_loss": 0.07677865773439407, "eval_runtime": 35.6897, "eval_samples_per_second": 1.401, "eval_steps_per_second": 0.056, "step": 10 }, { "epoch": 4.0, "eval_LCC": -0.35044377450614, "eval_SROCC": -0.3395438175270108, "eval_loss": 0.07478620857000351, "eval_runtime": 35.8511, "eval_samples_per_second": 1.395, "eval_steps_per_second": 0.056, "step": 14 }, { "epoch": 4.857142857142857, "eval_LCC": -0.33219308626687694, "eval_SROCC": -0.34981992797118844, "eval_loss": 0.05174265429377556, "eval_runtime": 35.8384, "eval_samples_per_second": 1.395, "eval_steps_per_second": 0.056, "step": 17 }, { "epoch": 5.714285714285714, "grad_norm": 2.3777573108673096, "learning_rate": 9.966191788709716e-06, "loss": 0.0657, "step": 20 }, { "epoch": 6.0, "eval_LCC": -0.30599681663050493, "eval_SROCC": -0.33368547418967587, "eval_loss": 0.05527381971478462, "eval_runtime": 35.8215, "eval_samples_per_second": 1.396, "eval_steps_per_second": 0.056, "step": 21 }, { "epoch": 6.857142857142857, "eval_LCC": -0.28100937429201095, "eval_SROCC": -0.2921008403361345, "eval_loss": 0.043372660875320435, "eval_runtime": 35.795, "eval_samples_per_second": 1.397, "eval_steps_per_second": 0.056, "step": 24 }, { "epoch": 8.0, "eval_LCC": -0.25699939917329884, "eval_SROCC": -0.24811524609843938, "eval_loss": 0.040563274174928665, "eval_runtime": 35.9877, "eval_samples_per_second": 1.389, "eval_steps_per_second": 0.056, "step": 28 }, { "epoch": 8.571428571428571, "grad_norm": 1.0520217418670654, "learning_rate": 9.698463103929542e-06, "loss": 0.0249, "step": 30 }, { "epoch": 8.857142857142858, "eval_LCC": -0.2477874480763097, "eval_SROCC": -0.23457382953181274, "eval_loss": 0.040173906832933426, "eval_runtime": 35.9073, "eval_samples_per_second": 1.392, "eval_steps_per_second": 0.056, "step": 31 }, { "epoch": 10.0, "eval_LCC": -0.21817464996358082, "eval_SROCC": -0.20758703481392557, "eval_loss": 0.0384274497628212, "eval_runtime": 35.6916, "eval_samples_per_second": 1.401, "eval_steps_per_second": 0.056, "step": 35 }, { "epoch": 10.857142857142858, "eval_LCC": -0.19225659084922117, "eval_SROCC": -0.19193277310924367, "eval_loss": 0.03174906224012375, "eval_runtime": 35.716, "eval_samples_per_second": 1.4, "eval_steps_per_second": 0.056, "step": 38 }, { "epoch": 11.428571428571429, "grad_norm": 1.398345947265625, "learning_rate": 9.177439057064684e-06, "loss": 0.0215, "step": 40 }, { "epoch": 12.0, "eval_LCC": -0.16355167219890276, "eval_SROCC": -0.15178871548619446, "eval_loss": 0.031038017943501472, "eval_runtime": 35.6946, "eval_samples_per_second": 1.401, "eval_steps_per_second": 0.056, "step": 42 }, { "epoch": 12.857142857142858, "eval_LCC": -0.1548916268794662, "eval_SROCC": -0.1291236494597839, "eval_loss": 0.03165186941623688, "eval_runtime": 35.6768, "eval_samples_per_second": 1.401, "eval_steps_per_second": 0.056, "step": 45 }, { "epoch": 14.0, "eval_LCC": -0.12920387967048147, "eval_SROCC": -0.09752701080432173, "eval_loss": 0.03006185218691826, "eval_runtime": 35.8955, "eval_samples_per_second": 1.393, "eval_steps_per_second": 0.056, "step": 49 }, { "epoch": 14.285714285714286, "grad_norm": 0.4825093746185303, "learning_rate": 8.43120818934367e-06, "loss": 0.0154, "step": 50 }, { "epoch": 14.857142857142858, "eval_LCC": -0.10573621549419017, "eval_SROCC": -0.08043217286914765, "eval_loss": 0.0284834336489439, "eval_runtime": 35.7717, "eval_samples_per_second": 1.398, "eval_steps_per_second": 0.056, "step": 52 }, { "epoch": 16.0, "eval_LCC": -0.07621053426257447, "eval_SROCC": -0.04614645858343337, "eval_loss": 0.027657881379127502, "eval_runtime": 35.7321, "eval_samples_per_second": 1.399, "eval_steps_per_second": 0.056, "step": 56 }, { "epoch": 16.857142857142858, "eval_LCC": -0.04854793231941595, "eval_SROCC": -0.03567827130852341, "eval_loss": 0.02630411647260189, "eval_runtime": 35.6683, "eval_samples_per_second": 1.402, "eval_steps_per_second": 0.056, "step": 59 }, { "epoch": 17.142857142857142, "grad_norm": 0.7504790425300598, "learning_rate": 7.500000000000001e-06, "loss": 0.0128, "step": 60 }, { "epoch": 18.0, "eval_LCC": -0.03166657911247084, "eval_SROCC": -0.01714285714285714, "eval_loss": 0.026295064017176628, "eval_runtime": 35.6163, "eval_samples_per_second": 1.404, "eval_steps_per_second": 0.056, "step": 63 }, { "epoch": 18.857142857142858, "eval_LCC": -0.023641716533156595, "eval_SROCC": -0.003985594237695078, "eval_loss": 0.026547763496637344, "eval_runtime": 35.7061, "eval_samples_per_second": 1.4, "eval_steps_per_second": 0.056, "step": 66 }, { "epoch": 20.0, "grad_norm": 0.4742915630340576, "learning_rate": 6.434016163555452e-06, "loss": 0.0113, "step": 70 }, { "epoch": 20.0, "eval_LCC": -0.008938391820749465, "eval_SROCC": 0.022713085234093634, "eval_loss": 0.026299767196178436, "eval_runtime": 35.6413, "eval_samples_per_second": 1.403, "eval_steps_per_second": 0.056, "step": 70 }, { "epoch": 20.857142857142858, "eval_LCC": 0.008109004168265264, "eval_SROCC": 0.025402160864345734, "eval_loss": 0.025580281391739845, "eval_runtime": 35.724, "eval_samples_per_second": 1.4, "eval_steps_per_second": 0.056, "step": 73 }, { "epoch": 22.0, "eval_LCC": 0.023345863441218237, "eval_SROCC": 0.049315726290516206, "eval_loss": 0.02492944523692131, "eval_runtime": 35.624, "eval_samples_per_second": 1.404, "eval_steps_per_second": 0.056, "step": 77 }, { "epoch": 22.857142857142858, "grad_norm": 0.43411004543304443, "learning_rate": 5.290724144552379e-06, "loss": 0.0104, "step": 80 }, { "epoch": 22.857142857142858, "eval_LCC": 0.033023044716641976, "eval_SROCC": 0.06160864345738295, "eval_loss": 0.024564068764448166, "eval_runtime": 35.6925, "eval_samples_per_second": 1.401, "eval_steps_per_second": 0.056, "step": 80 }, { "epoch": 24.0, "eval_LCC": 0.04347957438577821, "eval_SROCC": 0.06909963985594238, "eval_loss": 0.02423253282904625, "eval_runtime": 35.9004, "eval_samples_per_second": 1.393, "eval_steps_per_second": 0.056, "step": 84 }, { "epoch": 24.857142857142858, "eval_LCC": 0.051818984749542364, "eval_SROCC": 0.07956782713085234, "eval_loss": 0.024022720754146576, "eval_runtime": 35.8318, "eval_samples_per_second": 1.395, "eval_steps_per_second": 0.056, "step": 87 }, { "epoch": 25.714285714285715, "grad_norm": 0.443155437707901, "learning_rate": 4.131759111665349e-06, "loss": 0.0095, "step": 90 }, { "epoch": 26.0, "eval_LCC": 0.0679416905737761, "eval_SROCC": 0.0830252100840336, "eval_loss": 0.02380475588142872, "eval_runtime": 35.6776, "eval_samples_per_second": 1.401, "eval_steps_per_second": 0.056, "step": 91 }, { "epoch": 26.857142857142858, "eval_LCC": 0.0746956496870511, "eval_SROCC": 0.09291716686674668, "eval_loss": 0.023518024012446404, "eval_runtime": 35.6658, "eval_samples_per_second": 1.402, "eval_steps_per_second": 0.056, "step": 94 }, { "epoch": 28.0, "eval_LCC": 0.08622063268749575, "eval_SROCC": 0.10031212484993997, "eval_loss": 0.02321736514568329, "eval_runtime": 35.7144, "eval_samples_per_second": 1.4, "eval_steps_per_second": 0.056, "step": 98 }, { "epoch": 28.571428571428573, "grad_norm": 0.6047067642211914, "learning_rate": 3.019601169804216e-06, "loss": 0.009, "step": 100 }, { "epoch": 28.857142857142858, "eval_LCC": 0.09545687497338319, "eval_SROCC": 0.10501800720288115, "eval_loss": 0.022862296551465988, "eval_runtime": 35.7409, "eval_samples_per_second": 1.399, "eval_steps_per_second": 0.056, "step": 101 }, { "epoch": 30.0, "eval_LCC": 0.10522076553867485, "eval_SROCC": 0.10722689075630251, "eval_loss": 0.02260303497314453, "eval_runtime": 35.8268, "eval_samples_per_second": 1.396, "eval_steps_per_second": 0.056, "step": 105 }, { "epoch": 30.857142857142858, "eval_LCC": 0.11099832718077456, "eval_SROCC": 0.11769507803121249, "eval_loss": 0.022564733400940895, "eval_runtime": 35.6873, "eval_samples_per_second": 1.401, "eval_steps_per_second": 0.056, "step": 108 }, { "epoch": 31.428571428571427, "grad_norm": 0.3605582118034363, "learning_rate": 2.0142070414860704e-06, "loss": 0.0084, "step": 110 }, { "epoch": 32.0, "eval_LCC": 0.11518191832516576, "eval_SROCC": 0.12864345738295319, "eval_loss": 0.022482411935925484, "eval_runtime": 35.5295, "eval_samples_per_second": 1.407, "eval_steps_per_second": 0.056, "step": 112 }, { "epoch": 32.857142857142854, "eval_LCC": 0.11672118384335717, "eval_SROCC": 0.12960384153661464, "eval_loss": 0.02244633622467518, "eval_runtime": 35.6625, "eval_samples_per_second": 1.402, "eval_steps_per_second": 0.056, "step": 115 }, { "epoch": 34.0, "eval_LCC": 0.11853509722750845, "eval_SROCC": 0.12960384153661464, "eval_loss": 0.022433871403336525, "eval_runtime": 35.775, "eval_samples_per_second": 1.398, "eval_steps_per_second": 0.056, "step": 119 }, { "epoch": 34.285714285714285, "grad_norm": 0.37335312366485596, "learning_rate": 1.1697777844051105e-06, "loss": 0.0085, "step": 120 }, { "epoch": 34.857142857142854, "eval_LCC": 0.11996998016040515, "eval_SROCC": 0.13104441776710685, "eval_loss": 0.022421473637223244, "eval_runtime": 35.8151, "eval_samples_per_second": 1.396, "eval_steps_per_second": 0.056, "step": 122 }, { "epoch": 36.0, "eval_LCC": 0.12212764115994604, "eval_SROCC": 0.12633853541416568, "eval_loss": 0.02237151563167572, "eval_runtime": 35.6186, "eval_samples_per_second": 1.404, "eval_steps_per_second": 0.056, "step": 126 }, { "epoch": 36.857142857142854, "eval_LCC": 0.12332180896765454, "eval_SROCC": 0.12489795918367347, "eval_loss": 0.022355427965521812, "eval_runtime": 35.9066, "eval_samples_per_second": 1.393, "eval_steps_per_second": 0.056, "step": 129 }, { "epoch": 37.142857142857146, "grad_norm": 0.5681003332138062, "learning_rate": 5.318367983829393e-07, "loss": 0.0082, "step": 130 }, { "epoch": 38.0, "eval_LCC": 0.12470767155506596, "eval_SROCC": 0.12720288115246098, "eval_loss": 0.022325601428747177, "eval_runtime": 35.6817, "eval_samples_per_second": 1.401, "eval_steps_per_second": 0.056, "step": 133 }, { "epoch": 38.857142857142854, "eval_LCC": 0.1255198227036719, "eval_SROCC": 0.12720288115246098, "eval_loss": 0.02230682410299778, "eval_runtime": 35.758, "eval_samples_per_second": 1.398, "eval_steps_per_second": 0.056, "step": 136 }, { "epoch": 40.0, "grad_norm": 0.49957460165023804, "learning_rate": 1.3477564710088097e-07, "loss": 0.008, "step": 140 }, { "epoch": 40.0, "eval_LCC": 0.12651835944328998, "eval_SROCC": 0.1291236494597839, "eval_loss": 0.022274091839790344, "eval_runtime": 35.7033, "eval_samples_per_second": 1.4, "eval_steps_per_second": 0.056, "step": 140 }, { "epoch": 40.857142857142854, "eval_LCC": 0.1269013775632479, "eval_SROCC": 0.1291236494597839, "eval_loss": 0.022262830287218094, "eval_runtime": 35.6563, "eval_samples_per_second": 1.402, "eval_steps_per_second": 0.056, "step": 143 }, { "epoch": 42.0, "eval_LCC": 0.12710778753633592, "eval_SROCC": 0.1291236494597839, "eval_loss": 0.022257346659898758, "eval_runtime": 35.6941, "eval_samples_per_second": 1.401, "eval_steps_per_second": 0.056, "step": 147 }, { "epoch": 42.857142857142854, "grad_norm": 0.33458444476127625, "learning_rate": 0.0, "loss": 0.0078, "step": 150 }, { "epoch": 42.857142857142854, "eval_LCC": 0.12712191056512775, "eval_SROCC": 0.1291236494597839, "eval_loss": 0.022257013246417046, "eval_runtime": 35.8013, "eval_samples_per_second": 1.397, "eval_steps_per_second": 0.056, "step": 150 }, { "epoch": 42.857142857142854, "step": 150, "total_flos": 2.1207924866757427e+18, "train_loss": 0.02509542241692543, "train_runtime": 7697.1048, "train_samples_per_second": 1.39, "train_steps_per_second": 0.019 } ], "logging_steps": 10, "max_steps": 150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.1207924866757427e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }