|
{ |
|
"best_metric": 0.87, |
|
"best_model_checkpoint": "distilhubert-finetuned-gtzan/checkpoint-1356", |
|
"epoch": 12.0, |
|
"global_step": 1356, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.1061946902654867e-06, |
|
"loss": 2.3091, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.2123893805309734e-06, |
|
"loss": 2.3013, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.3185840707964607e-06, |
|
"loss": 2.304, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.424778761061947e-06, |
|
"loss": 2.307, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5.5309734513274346e-06, |
|
"loss": 2.3035, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.6371681415929215e-06, |
|
"loss": 2.2975, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.743362831858407e-06, |
|
"loss": 2.2995, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 8.849557522123894e-06, |
|
"loss": 2.2945, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.95575221238938e-06, |
|
"loss": 2.2868, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1061946902654869e-05, |
|
"loss": 2.2764, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.2168141592920354e-05, |
|
"loss": 2.2802, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.3274336283185843e-05, |
|
"loss": 2.2647, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.4380530973451328e-05, |
|
"loss": 2.2792, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.5486725663716813e-05, |
|
"loss": 2.2499, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6592920353982302e-05, |
|
"loss": 2.2809, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.7699115044247787e-05, |
|
"loss": 2.2683, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.8805309734513272e-05, |
|
"loss": 2.2492, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.991150442477876e-05, |
|
"loss": 2.239, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.101769911504425e-05, |
|
"loss": 2.2239, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.2123893805309738e-05, |
|
"loss": 2.1822, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.3230088495575223e-05, |
|
"loss": 2.1901, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.433628318584071e-05, |
|
"loss": 2.2018, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.37, |
|
"eval_loss": 2.124462842941284, |
|
"eval_runtime": 37.2852, |
|
"eval_samples_per_second": 2.682, |
|
"eval_steps_per_second": 0.349, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.5442477876106197e-05, |
|
"loss": 2.1462, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.6548672566371686e-05, |
|
"loss": 2.1083, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.7654867256637168e-05, |
|
"loss": 2.0828, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.8761061946902656e-05, |
|
"loss": 2.0594, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.9867256637168145e-05, |
|
"loss": 2.0813, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.097345132743363e-05, |
|
"loss": 2.0167, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.185840707964602e-05, |
|
"loss": 1.9729, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.296460176991151e-05, |
|
"loss": 1.9886, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.407079646017699e-05, |
|
"loss": 1.9951, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.517699115044248e-05, |
|
"loss": 1.9228, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.628318584070797e-05, |
|
"loss": 1.9038, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.7389380530973455e-05, |
|
"loss": 1.857, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 3.849557522123894e-05, |
|
"loss": 1.922, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 3.9601769911504426e-05, |
|
"loss": 1.8138, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.0707964601769914e-05, |
|
"loss": 1.7358, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.1814159292035396e-05, |
|
"loss": 1.6802, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.2920353982300885e-05, |
|
"loss": 1.6438, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.4026548672566373e-05, |
|
"loss": 1.67, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.491150442477876e-05, |
|
"loss": 1.8038, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 4.579646017699115e-05, |
|
"loss": 1.7205, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 4.690265486725664e-05, |
|
"loss": 1.7188, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.800884955752213e-05, |
|
"loss": 1.5285, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 4.911504424778761e-05, |
|
"loss": 1.5528, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.56, |
|
"eval_loss": 1.5200048685073853, |
|
"eval_runtime": 36.9619, |
|
"eval_samples_per_second": 2.705, |
|
"eval_steps_per_second": 0.352, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.997541789577188e-05, |
|
"loss": 1.5776, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.985250737463127e-05, |
|
"loss": 1.5497, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.972959685349066e-05, |
|
"loss": 1.4853, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.960668633235005e-05, |
|
"loss": 1.5263, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.9483775811209444e-05, |
|
"loss": 1.4766, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.936086529006883e-05, |
|
"loss": 1.4554, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.923795476892822e-05, |
|
"loss": 1.3716, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.911504424778761e-05, |
|
"loss": 1.3669, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.8992133726647006e-05, |
|
"loss": 1.4348, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.8869223205506394e-05, |
|
"loss": 1.275, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 4.874631268436578e-05, |
|
"loss": 1.4206, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 4.862340216322517e-05, |
|
"loss": 1.4962, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 4.850049164208457e-05, |
|
"loss": 1.2934, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 4.8377581120943956e-05, |
|
"loss": 1.263, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 4.825467059980335e-05, |
|
"loss": 1.2592, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.813176007866273e-05, |
|
"loss": 1.1302, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.800884955752213e-05, |
|
"loss": 1.242, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 4.788593903638152e-05, |
|
"loss": 1.2154, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.776302851524091e-05, |
|
"loss": 1.1873, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 4.7640117994100294e-05, |
|
"loss": 1.2688, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 4.751720747295968e-05, |
|
"loss": 1.1488, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.739429695181908e-05, |
|
"loss": 1.2506, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 1.1110644340515137, |
|
"eval_runtime": 37.2461, |
|
"eval_samples_per_second": 2.685, |
|
"eval_steps_per_second": 0.349, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.727138643067847e-05, |
|
"loss": 1.2124, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 4.714847590953786e-05, |
|
"loss": 1.1278, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 4.7025565388397244e-05, |
|
"loss": 1.1197, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 4.690265486725664e-05, |
|
"loss": 0.9486, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 4.677974434611603e-05, |
|
"loss": 1.0949, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 4.6656833824975424e-05, |
|
"loss": 1.1837, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 4.655850540806293e-05, |
|
"loss": 1.0059, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 4.643559488692232e-05, |
|
"loss": 1.0395, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 4.631268436578171e-05, |
|
"loss": 1.1674, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 4.6189773844641105e-05, |
|
"loss": 0.9608, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.606686332350049e-05, |
|
"loss": 1.2654, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 4.594395280235988e-05, |
|
"loss": 0.8788, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 4.582104228121927e-05, |
|
"loss": 0.9846, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 4.5698131760078666e-05, |
|
"loss": 0.8597, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 4.5575221238938055e-05, |
|
"loss": 1.1485, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 4.545231071779745e-05, |
|
"loss": 1.0469, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 4.532940019665683e-05, |
|
"loss": 0.9581, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 4.520648967551623e-05, |
|
"loss": 0.9821, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 4.5083579154375616e-05, |
|
"loss": 1.0122, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 4.496066863323501e-05, |
|
"loss": 0.8543, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 4.48377581120944e-05, |
|
"loss": 0.8246, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 4.471484759095379e-05, |
|
"loss": 0.891, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 4.459193706981318e-05, |
|
"loss": 1.0517, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.950592041015625, |
|
"eval_runtime": 37.0565, |
|
"eval_samples_per_second": 2.699, |
|
"eval_steps_per_second": 0.351, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.446902654867257e-05, |
|
"loss": 0.7031, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 4.434611602753196e-05, |
|
"loss": 0.6235, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 4.422320550639135e-05, |
|
"loss": 0.8002, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 4.410029498525074e-05, |
|
"loss": 0.9738, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 4.397738446411013e-05, |
|
"loss": 0.8511, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 4.385447394296952e-05, |
|
"loss": 0.6335, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 4.373156342182891e-05, |
|
"loss": 0.644, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 4.36086529006883e-05, |
|
"loss": 0.8481, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 4.348574237954769e-05, |
|
"loss": 0.6972, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 4.3362831858407084e-05, |
|
"loss": 0.8044, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 4.323992133726647e-05, |
|
"loss": 0.7073, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 4.311701081612586e-05, |
|
"loss": 0.8473, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 4.301868239921337e-05, |
|
"loss": 0.7983, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 4.2895771878072765e-05, |
|
"loss": 0.5702, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 4.2772861356932154e-05, |
|
"loss": 0.649, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 4.264995083579155e-05, |
|
"loss": 0.6669, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 4.252704031465093e-05, |
|
"loss": 0.5829, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 4.2404129793510327e-05, |
|
"loss": 0.65, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 4.2281219272369715e-05, |
|
"loss": 0.81, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 4.215830875122911e-05, |
|
"loss": 0.7507, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 4.20353982300885e-05, |
|
"loss": 0.6655, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 4.191248770894789e-05, |
|
"loss": 0.6181, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.178957718780728e-05, |
|
"loss": 0.8272, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.83, |
|
"eval_loss": 0.7394164800643921, |
|
"eval_runtime": 37.0651, |
|
"eval_samples_per_second": 2.698, |
|
"eval_steps_per_second": 0.351, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.6379, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 4.154375614552606e-05, |
|
"loss": 0.5593, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 4.142084562438545e-05, |
|
"loss": 0.524, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 4.129793510324484e-05, |
|
"loss": 0.4926, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 4.1175024582104233e-05, |
|
"loss": 0.6712, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 4.105211406096362e-05, |
|
"loss": 0.59, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 4.092920353982301e-05, |
|
"loss": 0.3684, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 4.08062930186824e-05, |
|
"loss": 0.5484, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 4.068338249754179e-05, |
|
"loss": 0.4718, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 4.0560471976401183e-05, |
|
"loss": 0.5205, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 4.043756145526057e-05, |
|
"loss": 0.4103, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 4.031465093411996e-05, |
|
"loss": 0.6238, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 4.019174041297935e-05, |
|
"loss": 0.391, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 4.0068829891838745e-05, |
|
"loss": 0.626, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 3.9945919370698133e-05, |
|
"loss": 0.5142, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 3.982300884955752e-05, |
|
"loss": 0.5664, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 3.970009832841691e-05, |
|
"loss": 0.3242, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 3.9577187807276306e-05, |
|
"loss": 0.4262, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 3.9454277286135695e-05, |
|
"loss": 0.6169, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 3.9331366764995083e-05, |
|
"loss": 0.436, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 3.920845624385447e-05, |
|
"loss": 0.2636, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 3.908554572271387e-05, |
|
"loss": 0.3728, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.81, |
|
"eval_loss": 0.6849124431610107, |
|
"eval_runtime": 37.0876, |
|
"eval_samples_per_second": 2.696, |
|
"eval_steps_per_second": 0.351, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 3.8962635201573256e-05, |
|
"loss": 0.4045, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 3.883972468043265e-05, |
|
"loss": 0.2084, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 3.8716814159292034e-05, |
|
"loss": 0.3604, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 3.859390363815143e-05, |
|
"loss": 0.3663, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 3.847099311701082e-05, |
|
"loss": 0.3161, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 3.834808259587021e-05, |
|
"loss": 0.4456, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 3.82251720747296e-05, |
|
"loss": 0.2935, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 3.810226155358899e-05, |
|
"loss": 0.2577, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 3.797935103244838e-05, |
|
"loss": 0.2841, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 3.7856440511307774e-05, |
|
"loss": 0.2892, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 3.773352999016716e-05, |
|
"loss": 0.2578, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 3.7610619469026545e-05, |
|
"loss": 0.2938, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 3.748770894788594e-05, |
|
"loss": 0.2936, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 3.736479842674533e-05, |
|
"loss": 0.426, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 3.7241887905604724e-05, |
|
"loss": 0.2328, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 3.711897738446411e-05, |
|
"loss": 0.2715, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 3.69960668633235e-05, |
|
"loss": 0.3068, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 3.687315634218289e-05, |
|
"loss": 0.4009, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 3.6750245821042286e-05, |
|
"loss": 0.3685, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 3.6627335299901674e-05, |
|
"loss": 0.3065, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 3.650442477876106e-05, |
|
"loss": 0.2677, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 3.638151425762045e-05, |
|
"loss": 0.3096, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 3.625860373647985e-05, |
|
"loss": 0.3878, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.647432267665863, |
|
"eval_runtime": 36.7252, |
|
"eval_samples_per_second": 2.723, |
|
"eval_steps_per_second": 0.354, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 3.6135693215339236e-05, |
|
"loss": 0.1663, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 3.6012782694198624e-05, |
|
"loss": 0.2787, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 3.588987217305801e-05, |
|
"loss": 0.1559, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 3.576696165191741e-05, |
|
"loss": 0.1331, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 3.56440511307768e-05, |
|
"loss": 0.1643, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 3.5521140609636186e-05, |
|
"loss": 0.1594, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 3.54228121927237e-05, |
|
"loss": 0.1871, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 3.529990167158309e-05, |
|
"loss": 0.1745, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 3.517699115044248e-05, |
|
"loss": 0.125, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 3.5054080629301874e-05, |
|
"loss": 0.1259, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 3.493117010816126e-05, |
|
"loss": 0.2476, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 3.480825958702065e-05, |
|
"loss": 0.0858, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 3.468534906588004e-05, |
|
"loss": 0.1988, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 3.4562438544739435e-05, |
|
"loss": 0.1398, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 3.4439528023598824e-05, |
|
"loss": 0.1934, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 3.431661750245821e-05, |
|
"loss": 0.0942, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 3.41937069813176e-05, |
|
"loss": 0.1991, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 3.407079646017699e-05, |
|
"loss": 0.1436, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 3.3947885939036385e-05, |
|
"loss": 0.0865, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 3.3824975417895774e-05, |
|
"loss": 0.1405, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 3.370206489675516e-05, |
|
"loss": 0.285, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 3.357915437561455e-05, |
|
"loss": 0.1119, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.83, |
|
"eval_loss": 0.5846229791641235, |
|
"eval_runtime": 37.7892, |
|
"eval_samples_per_second": 2.646, |
|
"eval_steps_per_second": 0.344, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 3.3456243854473946e-05, |
|
"loss": 0.1696, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.1067, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 3.3210422812192724e-05, |
|
"loss": 0.0852, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 3.308751229105211e-05, |
|
"loss": 0.1015, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 3.296460176991151e-05, |
|
"loss": 0.1699, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 3.2841691248770896e-05, |
|
"loss": 0.0691, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 3.2718780727630285e-05, |
|
"loss": 0.0661, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 3.2595870206489674e-05, |
|
"loss": 0.0512, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 3.247295968534907e-05, |
|
"loss": 0.1003, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 3.235004916420846e-05, |
|
"loss": 0.0523, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 3.222713864306785e-05, |
|
"loss": 0.0893, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 3.2104228121927235e-05, |
|
"loss": 0.053, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"learning_rate": 3.198131760078663e-05, |
|
"loss": 0.0592, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 3.185840707964602e-05, |
|
"loss": 0.0779, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 3.1735496558505414e-05, |
|
"loss": 0.0611, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 3.16125860373648e-05, |
|
"loss": 0.0776, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 3.148967551622419e-05, |
|
"loss": 0.1722, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 3.136676499508358e-05, |
|
"loss": 0.0809, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 3.124385447394297e-05, |
|
"loss": 0.185, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 3.1120943952802364e-05, |
|
"loss": 0.0357, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 3.0998033431661746e-05, |
|
"loss": 0.082, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 3.087512291052114e-05, |
|
"loss": 0.1928, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 3.075221238938053e-05, |
|
"loss": 0.1312, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.84, |
|
"eval_loss": 0.5652107000350952, |
|
"eval_runtime": 36.9689, |
|
"eval_samples_per_second": 2.705, |
|
"eval_steps_per_second": 0.352, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 3.0629301868239926e-05, |
|
"loss": 0.0416, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 3.050639134709931e-05, |
|
"loss": 0.1309, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 3.0383480825958703e-05, |
|
"loss": 0.1201, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 3.0260570304818092e-05, |
|
"loss": 0.0265, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 3.0137659783677484e-05, |
|
"loss": 0.0291, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 3.0014749262536872e-05, |
|
"loss": 0.0306, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 2.9891838741396268e-05, |
|
"loss": 0.0262, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 2.9768928220255653e-05, |
|
"loss": 0.0262, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 2.964601769911505e-05, |
|
"loss": 0.0243, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 2.9523107177974434e-05, |
|
"loss": 0.0314, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 2.940019665683383e-05, |
|
"loss": 0.0218, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 2.9277286135693215e-05, |
|
"loss": 0.0294, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 2.915437561455261e-05, |
|
"loss": 0.0187, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 2.9031465093412e-05, |
|
"loss": 0.0614, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 2.890855457227139e-05, |
|
"loss": 0.0269, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 2.878564405113078e-05, |
|
"loss": 0.0591, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 2.866273352999017e-05, |
|
"loss": 0.0282, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 2.853982300884956e-05, |
|
"loss": 0.0242, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 2.8416912487708945e-05, |
|
"loss": 0.0709, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 2.829400196656834e-05, |
|
"loss": 0.0158, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 2.8171091445427726e-05, |
|
"loss": 0.1044, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 2.804818092428712e-05, |
|
"loss": 0.0404, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2.792527040314651e-05, |
|
"loss": 0.0149, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.84, |
|
"eval_loss": 0.707106351852417, |
|
"eval_runtime": 37.5782, |
|
"eval_samples_per_second": 2.661, |
|
"eval_steps_per_second": 0.346, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 2.7802359882005902e-05, |
|
"loss": 0.1165, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 2.767944936086529e-05, |
|
"loss": 0.0129, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 2.7556538839724683e-05, |
|
"loss": 0.0624, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 2.743362831858407e-05, |
|
"loss": 0.0272, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 2.7310717797443463e-05, |
|
"loss": 0.0176, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 2.7187807276302852e-05, |
|
"loss": 0.0138, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 2.7064896755162244e-05, |
|
"loss": 0.0138, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 2.6941986234021633e-05, |
|
"loss": 0.0177, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 2.6819075712881025e-05, |
|
"loss": 0.024, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 2.6696165191740413e-05, |
|
"loss": 0.0676, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 2.6573254670599805e-05, |
|
"loss": 0.0112, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 2.6450344149459194e-05, |
|
"loss": 0.011, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 2.6327433628318586e-05, |
|
"loss": 0.0118, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 2.6204523107177975e-05, |
|
"loss": 0.018, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.66, |
|
"learning_rate": 2.6081612586037367e-05, |
|
"loss": 0.017, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 2.5958702064896756e-05, |
|
"loss": 0.0117, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 2.583579154375615e-05, |
|
"loss": 0.01, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 2.5712881022615536e-05, |
|
"loss": 0.0101, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 2.558997050147493e-05, |
|
"loss": 0.017, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 2.5467059980334317e-05, |
|
"loss": 0.0094, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 2.5344149459193706e-05, |
|
"loss": 0.0085, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"learning_rate": 2.5221238938053098e-05, |
|
"loss": 0.0154, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.84, |
|
"eval_loss": 0.7669206261634827, |
|
"eval_runtime": 37.5505, |
|
"eval_samples_per_second": 2.663, |
|
"eval_steps_per_second": 0.346, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 2.5098328416912486e-05, |
|
"loss": 0.0085, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 2.4975417895771878e-05, |
|
"loss": 0.0091, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 11.11, |
|
"learning_rate": 2.485250737463127e-05, |
|
"loss": 0.0086, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 2.4729596853490662e-05, |
|
"loss": 0.0087, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 11.19, |
|
"learning_rate": 2.460668633235005e-05, |
|
"loss": 0.008, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 2.4483775811209443e-05, |
|
"loss": 0.0078, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 2.436086529006883e-05, |
|
"loss": 0.0076, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 2.4237954768928224e-05, |
|
"loss": 0.0074, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 2.411504424778761e-05, |
|
"loss": 0.0076, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"learning_rate": 2.3992133726647e-05, |
|
"loss": 0.0071, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 11.46, |
|
"learning_rate": 2.3869223205506393e-05, |
|
"loss": 0.0078, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 2.374631268436578e-05, |
|
"loss": 0.0069, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 2.3623402163225174e-05, |
|
"loss": 0.0071, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 2.3500491642084562e-05, |
|
"loss": 0.0086, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 2.3377581120943954e-05, |
|
"loss": 0.0075, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"learning_rate": 2.3254670599803343e-05, |
|
"loss": 0.0072, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 2.3131760078662735e-05, |
|
"loss": 0.0078, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 11.77, |
|
"learning_rate": 2.3008849557522124e-05, |
|
"loss": 0.0398, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 2.2885939036381516e-05, |
|
"loss": 0.0069, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 2.2763028515240904e-05, |
|
"loss": 0.0069, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 2.2640117994100296e-05, |
|
"loss": 0.0057, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 2.2517207472959685e-05, |
|
"loss": 0.0108, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 2.2394296951819077e-05, |
|
"loss": 0.1238, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.87, |
|
"eval_loss": 0.7153857350349426, |
|
"eval_runtime": 37.3772, |
|
"eval_samples_per_second": 2.675, |
|
"eval_steps_per_second": 0.348, |
|
"step": 1356 |
|
} |
|
], |
|
"max_steps": 2260, |
|
"num_train_epochs": 20, |
|
"total_flos": 3.6803929647744e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|