|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.3807365439093484, |
|
"global_step": 228, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016, |
|
"loss": 2.092, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00032, |
|
"loss": 2.0773, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0003999958893916732, |
|
"loss": 1.9915, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00039996300553887804, |
|
"loss": 2.1242, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0003998972432401376, |
|
"loss": 2.0798, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00039979861330826294, |
|
"loss": 2.099, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0003996671319602482, |
|
"loss": 2.0562, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00039950282081460415, |
|
"loss": 2.0909, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0003993057068878037, |
|
"loss": 2.0784, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00039907582258983965, |
|
"loss": 2.0465, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00039881320571889605, |
|
"loss": 2.173, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000398517899455133, |
|
"loss": 2.0897, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00039818995235358696, |
|
"loss": 2.057, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00039782941833618724, |
|
"loss": 2.0833, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00039743635668289007, |
|
"loss": 2.0256, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0003970108320219314, |
|
"loss": 2.0925, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0003965529143192008, |
|
"loss": 2.0337, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0003960626788667375, |
|
"loss": 2.0865, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00039554020627035035, |
|
"loss": 2.164, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0003949855824363647, |
|
"loss": 2.1327, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00039439889855749763, |
|
"loss": 2.171, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0003937802510978631, |
|
"loss": 2.0629, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00039312974177711183, |
|
"loss": 2.0344, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0003924474775537058, |
|
"loss": 2.071, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0003917335706073321, |
|
"loss": 2.11, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0003909881383204581, |
|
"loss": 2.1477, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00039021130325903074, |
|
"loss": 2.087, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0003894031931523243, |
|
"loss": 2.0943, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0003885639408719386, |
|
"loss": 2.0061, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0003876936844099521, |
|
"loss": 2.0888, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00038679256685623264, |
|
"loss": 2.1795, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0003858607363749104, |
|
"loss": 2.1248, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00038489834618001635, |
|
"loss": 2.0761, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0003839055545102902, |
|
"loss": 2.1293, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00038288252460316253, |
|
"loss": 2.0532, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.000381829424667915, |
|
"loss": 2.1937, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0003807464278580227, |
|
"loss": 2.1357, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0003796337122426838, |
|
"loss": 2.132, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0003784914607775412, |
|
"loss": 2.0785, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00037731986127460007, |
|
"loss": 2.1671, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0003761191063713476, |
|
"loss": 2.1016, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00037488939349907913, |
|
"loss": 2.1284, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0003736309248504357, |
|
"loss": 2.1275, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.000372343907346159, |
|
"loss": 2.0667, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00037102855260106926, |
|
"loss": 2.1596, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0003696850768892705, |
|
"loss": 2.1802, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00036831370110859067, |
|
"loss": 2.0789, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00036691465074426054, |
|
"loss": 2.1284, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0003654881558318393, |
|
"loss": 2.1499, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0003640344509193912, |
|
"loss": 2.063, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00036255377502892054, |
|
"loss": 2.0843, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0003610463716170712, |
|
"loss": 2.0807, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00035951248853509696, |
|
"loss": 2.0838, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00035795237798810846, |
|
"loss": 2.1357, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.000356366296493606, |
|
"loss": 2.1482, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00035475450483930137, |
|
"loss": 2.0468, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00035311726804023894, |
|
"loss": 2.0996, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0003514548552952211, |
|
"loss": 2.1049, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00034976753994254566, |
|
"loss": 2.07, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0003480555994150631, |
|
"loss": 2.1217, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0003463193151945603, |
|
"loss": 2.0953, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0003445589727654783, |
|
"loss": 2.1044, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0003427748615679726, |
|
"loss": 2.0992, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00034096727495032235, |
|
"loss": 2.1416, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00033913651012069734, |
|
"loss": 2.1709, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0003372828680982901, |
|
"loss": 2.1202, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0003354066536638219, |
|
"loss": 2.145, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0003335081753094296, |
|
"loss": 2.1667, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00033158774518794254, |
|
"loss": 2.0619, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00032964567906155774, |
|
"loss": 2.0791, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0003276822962499211, |
|
"loss": 2.1484, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0003256979195776247, |
|
"loss": 2.1358, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00032369287532112634, |
|
"loss": 2.0854, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0003216674931551027, |
|
"loss": 2.1297, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00031962210609824324, |
|
"loss": 2.0643, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00031755705045849464, |
|
"loss": 2.1189, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00031547266577776394, |
|
"loss": 2.1361, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00031336929477609, |
|
"loss": 2.0426, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.000311247283295293, |
|
"loss": 2.0954, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0003091069802421097, |
|
"loss": 2.1072, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00030694873753082594, |
|
"loss": 2.1506, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0003047729100254131, |
|
"loss": 2.1293, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00030257985548118126, |
|
"loss": 2.5226, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00030036993448595556, |
|
"loss": 2.0338, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0002981435104007876, |
|
"loss": 2.0049, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0002959009493002108, |
|
"loss": 2.0443, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0002936426199120492, |
|
"loss": 2.0638, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0002913688935567903, |
|
"loss": 2.039, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0002890801440865318, |
|
"loss": 1.9961, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00028677674782351165, |
|
"loss": 1.9829, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.000284459083498232, |
|
"loss": 1.9802, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0002821275321871876, |
|
"loss": 2.012, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0002797824772502084, |
|
"loss": 1.9602, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00027742430426742567, |
|
"loss": 2.1046, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0002750534009758749, |
|
"loss": 2.046, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0002726701572057423, |
|
"loss": 2.0335, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0002702749648162686, |
|
"loss": 1.9656, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0002678682176313175, |
|
"loss": 1.9843, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0002654503113746234, |
|
"loss": 2.0132, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0002630216436047242, |
|
"loss": 2.0522, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00026058261364959444, |
|
"loss": 2.0108, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00025813362254098677, |
|
"loss": 2.0582, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0002556750729484927, |
|
"loss": 1.9982, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00025320736911333503, |
|
"loss": 2.0753, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0002507309167819015, |
|
"loss": 2.04, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00024824612313903053, |
|
"loss": 1.9574, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0002457533967410611, |
|
"loss": 2.06, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00024325314744865672, |
|
"loss": 2.0338, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00024074578635941514, |
|
"loss": 2.012, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0002382317257402745, |
|
"loss": 2.0156, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00023571137895972733, |
|
"loss": 2.0225, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0002331851604198536, |
|
"loss": 2.0278, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00023065348548818317, |
|
"loss": 2.1048, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00022811677042940058, |
|
"loss": 1.9557, |
|
"step": 228 |
|
} |
|
], |
|
"max_steps": 495, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.0784042558129439e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|