{ "best_metric": 0.15990132093429565, "best_model_checkpoint": "strategydisofrisksv1/checkpoint-205", "epoch": 5.0, "eval_steps": 500, "global_step": 205, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04878048780487805, "grad_norm": 3.658454656600952, "learning_rate": 9.523809523809525e-07, "loss": 1.4175, "step": 2 }, { "epoch": 0.0975609756097561, "grad_norm": 3.2751364707946777, "learning_rate": 1.904761904761905e-06, "loss": 1.4034, "step": 4 }, { "epoch": 0.14634146341463414, "grad_norm": 2.307509422302246, "learning_rate": 2.8571428571428573e-06, "loss": 1.4033, "step": 6 }, { "epoch": 0.1951219512195122, "grad_norm": 7.359793186187744, "learning_rate": 3.80952380952381e-06, "loss": 1.5018, "step": 8 }, { "epoch": 0.24390243902439024, "grad_norm": 5.59666633605957, "learning_rate": 4.761904761904762e-06, "loss": 1.4805, "step": 10 }, { "epoch": 0.2926829268292683, "grad_norm": 4.317787170410156, "learning_rate": 5.7142857142857145e-06, "loss": 1.4057, "step": 12 }, { "epoch": 0.34146341463414637, "grad_norm": 5.5472493171691895, "learning_rate": 6.666666666666667e-06, "loss": 1.3436, "step": 14 }, { "epoch": 0.3902439024390244, "grad_norm": 3.424031972885132, "learning_rate": 7.61904761904762e-06, "loss": 1.377, "step": 16 }, { "epoch": 0.43902439024390244, "grad_norm": 3.460782051086426, "learning_rate": 8.571428571428571e-06, "loss": 1.3405, "step": 18 }, { "epoch": 0.4878048780487805, "grad_norm": 3.272519588470459, "learning_rate": 9.523809523809525e-06, "loss": 1.3732, "step": 20 }, { "epoch": 0.5365853658536586, "grad_norm": 2.9090654850006104, "learning_rate": 9.945652173913044e-06, "loss": 1.3401, "step": 22 }, { "epoch": 0.5853658536585366, "grad_norm": 2.4508166313171387, "learning_rate": 9.836956521739131e-06, "loss": 1.3892, "step": 24 }, { "epoch": 0.6341463414634146, "grad_norm": 3.865966320037842, "learning_rate": 9.728260869565218e-06, "loss": 1.3603, "step": 26 }, { "epoch": 0.6829268292682927, "grad_norm": 3.798243999481201, "learning_rate": 9.619565217391305e-06, "loss": 1.3379, "step": 28 }, { "epoch": 0.7317073170731707, "grad_norm": 3.441756248474121, "learning_rate": 9.510869565217392e-06, "loss": 1.3618, "step": 30 }, { "epoch": 0.7804878048780488, "grad_norm": 3.554654121398926, "learning_rate": 9.402173913043479e-06, "loss": 1.3416, "step": 32 }, { "epoch": 0.8292682926829268, "grad_norm": 2.5521254539489746, "learning_rate": 9.293478260869566e-06, "loss": 1.3339, "step": 34 }, { "epoch": 0.8780487804878049, "grad_norm": 3.268244743347168, "learning_rate": 9.184782608695653e-06, "loss": 1.299, "step": 36 }, { "epoch": 0.926829268292683, "grad_norm": 3.396571636199951, "learning_rate": 9.07608695652174e-06, "loss": 1.3283, "step": 38 }, { "epoch": 0.975609756097561, "grad_norm": 5.777693748474121, "learning_rate": 8.967391304347827e-06, "loss": 1.3184, "step": 40 }, { "epoch": 1.0, "eval_accuracy": 0.5487804878048781, "eval_f1_macro": 0.5079113924050633, "eval_f1_micro": 0.5487804878048781, "eval_f1_weighted": 0.5020067922198209, "eval_loss": 1.3049376010894775, "eval_precision_macro": 0.5905172413793103, "eval_precision_micro": 0.5487804878048781, "eval_precision_weighted": 0.5805298570227082, "eval_recall_macro": 0.55, "eval_recall_micro": 0.5487804878048781, "eval_recall_weighted": 0.5487804878048781, "eval_runtime": 0.4986, "eval_samples_per_second": 164.454, "eval_steps_per_second": 12.033, "step": 41 }, { "epoch": 1.024390243902439, "grad_norm": 3.4227399826049805, "learning_rate": 8.858695652173914e-06, "loss": 1.2624, "step": 42 }, { "epoch": 1.0731707317073171, "grad_norm": 3.7791168689727783, "learning_rate": 8.750000000000001e-06, "loss": 1.2806, "step": 44 }, { "epoch": 1.1219512195121952, "grad_norm": 5.1954498291015625, "learning_rate": 8.641304347826088e-06, "loss": 1.3604, "step": 46 }, { "epoch": 1.170731707317073, "grad_norm": 6.211756229400635, "learning_rate": 8.532608695652175e-06, "loss": 1.2671, "step": 48 }, { "epoch": 1.2195121951219512, "grad_norm": 3.809525489807129, "learning_rate": 8.423913043478262e-06, "loss": 1.2143, "step": 50 }, { "epoch": 1.2682926829268293, "grad_norm": 4.3469696044921875, "learning_rate": 8.315217391304349e-06, "loss": 1.2812, "step": 52 }, { "epoch": 1.3170731707317074, "grad_norm": 5.45401668548584, "learning_rate": 8.206521739130436e-06, "loss": 1.302, "step": 54 }, { "epoch": 1.3658536585365852, "grad_norm": 4.719168663024902, "learning_rate": 8.097826086956523e-06, "loss": 1.2044, "step": 56 }, { "epoch": 1.4146341463414633, "grad_norm": 5.400396823883057, "learning_rate": 7.98913043478261e-06, "loss": 1.2528, "step": 58 }, { "epoch": 1.4634146341463414, "grad_norm": 4.081637859344482, "learning_rate": 7.880434782608695e-06, "loss": 1.0832, "step": 60 }, { "epoch": 1.5121951219512195, "grad_norm": 8.081562995910645, "learning_rate": 7.771739130434784e-06, "loss": 1.1357, "step": 62 }, { "epoch": 1.5609756097560976, "grad_norm": 5.839077949523926, "learning_rate": 7.66304347826087e-06, "loss": 1.0947, "step": 64 }, { "epoch": 1.6097560975609757, "grad_norm": 5.829254627227783, "learning_rate": 7.5543478260869576e-06, "loss": 1.0257, "step": 66 }, { "epoch": 1.6585365853658538, "grad_norm": 5.4526448249816895, "learning_rate": 7.445652173913044e-06, "loss": 1.0797, "step": 68 }, { "epoch": 1.7073170731707317, "grad_norm": 6.3645524978637695, "learning_rate": 7.3369565217391315e-06, "loss": 0.9097, "step": 70 }, { "epoch": 1.7560975609756098, "grad_norm": 5.510509490966797, "learning_rate": 7.228260869565218e-06, "loss": 1.0205, "step": 72 }, { "epoch": 1.8048780487804879, "grad_norm": 6.244691371917725, "learning_rate": 7.119565217391305e-06, "loss": 1.0144, "step": 74 }, { "epoch": 1.8536585365853657, "grad_norm": 7.428989887237549, "learning_rate": 7.0108695652173915e-06, "loss": 0.9632, "step": 76 }, { "epoch": 1.9024390243902438, "grad_norm": 6.228334426879883, "learning_rate": 6.9021739130434785e-06, "loss": 0.8049, "step": 78 }, { "epoch": 1.951219512195122, "grad_norm": 4.784652233123779, "learning_rate": 6.793478260869566e-06, "loss": 0.7772, "step": 80 }, { "epoch": 2.0, "grad_norm": 6.2926740646362305, "learning_rate": 6.6847826086956524e-06, "loss": 0.8149, "step": 82 }, { "epoch": 2.0, "eval_accuracy": 0.926829268292683, "eval_f1_macro": 0.9260584103480483, "eval_f1_micro": 0.926829268292683, "eval_f1_weighted": 0.9262945592194588, "eval_loss": 0.7190239429473877, "eval_precision_macro": 0.9312770562770563, "eval_precision_micro": 0.926829268292683, "eval_precision_weighted": 0.9323989019110971, "eval_recall_macro": 0.9273809523809523, "eval_recall_micro": 0.926829268292683, "eval_recall_weighted": 0.926829268292683, "eval_runtime": 0.4833, "eval_samples_per_second": 169.67, "eval_steps_per_second": 12.415, "step": 82 }, { "epoch": 2.048780487804878, "grad_norm": 5.493005275726318, "learning_rate": 6.57608695652174e-06, "loss": 0.7457, "step": 84 }, { "epoch": 2.097560975609756, "grad_norm": 6.086998462677002, "learning_rate": 6.521739130434783e-06, "loss": 0.7334, "step": 86 }, { "epoch": 2.1463414634146343, "grad_norm": 5.742645740509033, "learning_rate": 6.41304347826087e-06, "loss": 0.8067, "step": 88 }, { "epoch": 2.1951219512195124, "grad_norm": 7.06817626953125, "learning_rate": 6.304347826086958e-06, "loss": 0.6018, "step": 90 }, { "epoch": 2.2439024390243905, "grad_norm": 5.059027194976807, "learning_rate": 6.195652173913044e-06, "loss": 0.6361, "step": 92 }, { "epoch": 2.292682926829268, "grad_norm": 7.772392272949219, "learning_rate": 6.086956521739132e-06, "loss": 0.7838, "step": 94 }, { "epoch": 2.341463414634146, "grad_norm": 4.42880916595459, "learning_rate": 5.978260869565218e-06, "loss": 0.6915, "step": 96 }, { "epoch": 2.3902439024390243, "grad_norm": 4.635205268859863, "learning_rate": 5.8695652173913055e-06, "loss": 0.5535, "step": 98 }, { "epoch": 2.4390243902439024, "grad_norm": 4.7791666984558105, "learning_rate": 5.760869565217392e-06, "loss": 0.5392, "step": 100 }, { "epoch": 2.4878048780487805, "grad_norm": 6.933748722076416, "learning_rate": 5.652173913043479e-06, "loss": 0.5618, "step": 102 }, { "epoch": 2.5365853658536586, "grad_norm": 5.073132514953613, "learning_rate": 5.543478260869566e-06, "loss": 0.4763, "step": 104 }, { "epoch": 2.5853658536585367, "grad_norm": 5.315598487854004, "learning_rate": 5.4347826086956525e-06, "loss": 0.4308, "step": 106 }, { "epoch": 2.6341463414634148, "grad_norm": 5.840494632720947, "learning_rate": 5.3260869565217395e-06, "loss": 0.4109, "step": 108 }, { "epoch": 2.682926829268293, "grad_norm": 5.477383613586426, "learning_rate": 5.2173913043478265e-06, "loss": 0.5133, "step": 110 }, { "epoch": 2.7317073170731705, "grad_norm": 4.808891296386719, "learning_rate": 5.108695652173914e-06, "loss": 0.4048, "step": 112 }, { "epoch": 2.7804878048780486, "grad_norm": 6.304122447967529, "learning_rate": 5e-06, "loss": 0.4002, "step": 114 }, { "epoch": 2.8292682926829267, "grad_norm": 6.4951348304748535, "learning_rate": 4.891304347826087e-06, "loss": 0.3788, "step": 116 }, { "epoch": 2.8780487804878048, "grad_norm": 3.5770864486694336, "learning_rate": 4.782608695652174e-06, "loss": 0.3845, "step": 118 }, { "epoch": 2.926829268292683, "grad_norm": 6.861332893371582, "learning_rate": 4.673913043478261e-06, "loss": 0.6693, "step": 120 }, { "epoch": 2.975609756097561, "grad_norm": 6.302731513977051, "learning_rate": 4.565217391304348e-06, "loss": 0.3862, "step": 122 }, { "epoch": 3.0, "eval_accuracy": 0.975609756097561, "eval_f1_macro": 0.974930590799176, "eval_f1_micro": 0.975609756097561, "eval_f1_weighted": 0.9752584323168365, "eval_loss": 0.28084805607795715, "eval_precision_macro": 0.9767316017316017, "eval_precision_micro": 0.975609756097561, "eval_precision_weighted": 0.9767447999155316, "eval_recall_macro": 0.975, "eval_recall_micro": 0.975609756097561, "eval_recall_weighted": 0.975609756097561, "eval_runtime": 0.496, "eval_samples_per_second": 165.313, "eval_steps_per_second": 12.096, "step": 123 }, { "epoch": 3.024390243902439, "grad_norm": 5.4799089431762695, "learning_rate": 4.456521739130435e-06, "loss": 0.4782, "step": 124 }, { "epoch": 3.073170731707317, "grad_norm": 4.1216254234313965, "learning_rate": 4.347826086956522e-06, "loss": 0.2859, "step": 126 }, { "epoch": 3.1219512195121952, "grad_norm": 2.9940977096557617, "learning_rate": 4.239130434782609e-06, "loss": 0.2228, "step": 128 }, { "epoch": 3.1707317073170733, "grad_norm": 5.191917419433594, "learning_rate": 4.130434782608696e-06, "loss": 0.4608, "step": 130 }, { "epoch": 3.2195121951219514, "grad_norm": 8.951691627502441, "learning_rate": 4.021739130434783e-06, "loss": 0.3534, "step": 132 }, { "epoch": 3.2682926829268295, "grad_norm": 3.320030450820923, "learning_rate": 3.91304347826087e-06, "loss": 0.3589, "step": 134 }, { "epoch": 3.317073170731707, "grad_norm": 5.8095927238464355, "learning_rate": 3.804347826086957e-06, "loss": 0.3599, "step": 136 }, { "epoch": 3.3658536585365852, "grad_norm": 4.727943420410156, "learning_rate": 3.6956521739130436e-06, "loss": 0.394, "step": 138 }, { "epoch": 3.4146341463414633, "grad_norm": 3.2468807697296143, "learning_rate": 3.5869565217391305e-06, "loss": 0.2072, "step": 140 }, { "epoch": 3.4634146341463414, "grad_norm": 2.6724114418029785, "learning_rate": 3.4782608695652175e-06, "loss": 0.2368, "step": 142 }, { "epoch": 3.5121951219512195, "grad_norm": 7.218781471252441, "learning_rate": 3.3695652173913045e-06, "loss": 0.3322, "step": 144 }, { "epoch": 3.5609756097560976, "grad_norm": 4.017630577087402, "learning_rate": 3.2608695652173914e-06, "loss": 0.2232, "step": 146 }, { "epoch": 3.6097560975609757, "grad_norm": 3.567613363265991, "learning_rate": 3.152173913043479e-06, "loss": 0.3167, "step": 148 }, { "epoch": 3.658536585365854, "grad_norm": 5.940097332000732, "learning_rate": 3.043478260869566e-06, "loss": 0.3129, "step": 150 }, { "epoch": 3.7073170731707314, "grad_norm": 1.8391088247299194, "learning_rate": 2.9347826086956528e-06, "loss": 0.2177, "step": 152 }, { "epoch": 3.7560975609756095, "grad_norm": 6.80277681350708, "learning_rate": 2.8260869565217393e-06, "loss": 0.2086, "step": 154 }, { "epoch": 3.8048780487804876, "grad_norm": 5.100131511688232, "learning_rate": 2.7173913043478263e-06, "loss": 0.2072, "step": 156 }, { "epoch": 3.8536585365853657, "grad_norm": 7.363709449768066, "learning_rate": 2.6086956521739132e-06, "loss": 0.4223, "step": 158 }, { "epoch": 3.902439024390244, "grad_norm": 2.784132480621338, "learning_rate": 2.5e-06, "loss": 0.2342, "step": 160 }, { "epoch": 3.951219512195122, "grad_norm": 10.512962341308594, "learning_rate": 2.391304347826087e-06, "loss": 0.3363, "step": 162 }, { "epoch": 4.0, "grad_norm": 2.996764659881592, "learning_rate": 2.282608695652174e-06, "loss": 0.1728, "step": 164 }, { "epoch": 4.0, "eval_accuracy": 0.975609756097561, "eval_f1_macro": 0.974930590799176, "eval_f1_micro": 0.975609756097561, "eval_f1_weighted": 0.9752584323168365, "eval_loss": 0.17828890681266785, "eval_precision_macro": 0.9767316017316017, "eval_precision_micro": 0.975609756097561, "eval_precision_weighted": 0.9767447999155316, "eval_recall_macro": 0.975, "eval_recall_micro": 0.975609756097561, "eval_recall_weighted": 0.975609756097561, "eval_runtime": 0.486, "eval_samples_per_second": 168.741, "eval_steps_per_second": 12.347, "step": 164 }, { "epoch": 4.048780487804878, "grad_norm": 11.200080871582031, "learning_rate": 2.173913043478261e-06, "loss": 0.3192, "step": 166 }, { "epoch": 4.097560975609756, "grad_norm": 4.50062370300293, "learning_rate": 2.065217391304348e-06, "loss": 0.3224, "step": 168 }, { "epoch": 4.146341463414634, "grad_norm": 2.1641335487365723, "learning_rate": 1.956521739130435e-06, "loss": 0.2588, "step": 170 }, { "epoch": 4.195121951219512, "grad_norm": 6.210737705230713, "learning_rate": 1.8478260869565218e-06, "loss": 0.1391, "step": 172 }, { "epoch": 4.2439024390243905, "grad_norm": 4.513918876647949, "learning_rate": 1.7391304347826088e-06, "loss": 0.1949, "step": 174 }, { "epoch": 4.2926829268292686, "grad_norm": 5.105907917022705, "learning_rate": 1.6304347826086957e-06, "loss": 0.4274, "step": 176 }, { "epoch": 4.341463414634147, "grad_norm": 13.60393238067627, "learning_rate": 1.521739130434783e-06, "loss": 0.3062, "step": 178 }, { "epoch": 4.390243902439025, "grad_norm": 2.7764010429382324, "learning_rate": 1.4130434782608697e-06, "loss": 0.1415, "step": 180 }, { "epoch": 4.439024390243903, "grad_norm": 5.599649906158447, "learning_rate": 1.3043478260869566e-06, "loss": 0.308, "step": 182 }, { "epoch": 4.487804878048781, "grad_norm": 11.585519790649414, "learning_rate": 1.1956521739130436e-06, "loss": 0.3944, "step": 184 }, { "epoch": 4.536585365853659, "grad_norm": 2.6623806953430176, "learning_rate": 1.0869565217391306e-06, "loss": 0.1589, "step": 186 }, { "epoch": 4.585365853658536, "grad_norm": 12.680092811584473, "learning_rate": 9.782608695652175e-07, "loss": 0.236, "step": 188 }, { "epoch": 4.634146341463414, "grad_norm": 2.8459999561309814, "learning_rate": 8.695652173913044e-07, "loss": 0.284, "step": 190 }, { "epoch": 4.682926829268292, "grad_norm": 2.673713207244873, "learning_rate": 7.608695652173914e-07, "loss": 0.1493, "step": 192 }, { "epoch": 4.7317073170731705, "grad_norm": 2.6639437675476074, "learning_rate": 6.521739130434783e-07, "loss": 0.1627, "step": 194 }, { "epoch": 4.780487804878049, "grad_norm": 1.504746913909912, "learning_rate": 5.434782608695653e-07, "loss": 0.1257, "step": 196 }, { "epoch": 4.829268292682927, "grad_norm": 7.770730018615723, "learning_rate": 4.347826086956522e-07, "loss": 0.1999, "step": 198 }, { "epoch": 4.878048780487805, "grad_norm": 2.6138458251953125, "learning_rate": 3.2608695652173915e-07, "loss": 0.1497, "step": 200 }, { "epoch": 4.926829268292683, "grad_norm": 4.859775066375732, "learning_rate": 2.173913043478261e-07, "loss": 0.1766, "step": 202 }, { "epoch": 4.975609756097561, "grad_norm": 4.779507160186768, "learning_rate": 1.0869565217391305e-07, "loss": 0.1548, "step": 204 }, { "epoch": 5.0, "eval_accuracy": 0.975609756097561, "eval_f1_macro": 0.974930590799176, "eval_f1_micro": 0.975609756097561, "eval_f1_weighted": 0.9752584323168365, "eval_loss": 0.15990132093429565, "eval_precision_macro": 0.9767316017316017, "eval_precision_micro": 0.975609756097561, "eval_precision_weighted": 0.9767447999155316, "eval_recall_macro": 0.975, "eval_recall_micro": 0.975609756097561, "eval_recall_weighted": 0.975609756097561, "eval_runtime": 0.4995, "eval_samples_per_second": 164.175, "eval_steps_per_second": 12.013, "step": 205 } ], "logging_steps": 2, "max_steps": 205, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 215929561128960.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }