{ "best_metric": 0.87, "best_model_checkpoint": "distilhubert-finetuned-gtzan/checkpoint-1356", "epoch": 13.0, "global_step": 1469, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 1.1061946902654867e-06, "loss": 2.3091, "step": 5 }, { "epoch": 0.09, "learning_rate": 2.2123893805309734e-06, "loss": 2.3013, "step": 10 }, { "epoch": 0.13, "learning_rate": 3.3185840707964607e-06, "loss": 2.304, "step": 15 }, { "epoch": 0.18, "learning_rate": 4.424778761061947e-06, "loss": 2.307, "step": 20 }, { "epoch": 0.22, "learning_rate": 5.5309734513274346e-06, "loss": 2.3035, "step": 25 }, { "epoch": 0.27, "learning_rate": 6.6371681415929215e-06, "loss": 2.2975, "step": 30 }, { "epoch": 0.31, "learning_rate": 7.743362831858407e-06, "loss": 2.2995, "step": 35 }, { "epoch": 0.35, "learning_rate": 8.849557522123894e-06, "loss": 2.2945, "step": 40 }, { "epoch": 0.4, "learning_rate": 9.95575221238938e-06, "loss": 2.2868, "step": 45 }, { "epoch": 0.44, "learning_rate": 1.1061946902654869e-05, "loss": 2.2764, "step": 50 }, { "epoch": 0.49, "learning_rate": 1.2168141592920354e-05, "loss": 2.2802, "step": 55 }, { "epoch": 0.53, "learning_rate": 1.3274336283185843e-05, "loss": 2.2647, "step": 60 }, { "epoch": 0.58, "learning_rate": 1.4380530973451328e-05, "loss": 2.2792, "step": 65 }, { "epoch": 0.62, "learning_rate": 1.5486725663716813e-05, "loss": 2.2499, "step": 70 }, { "epoch": 0.66, "learning_rate": 1.6592920353982302e-05, "loss": 2.2809, "step": 75 }, { "epoch": 0.71, "learning_rate": 1.7699115044247787e-05, "loss": 2.2683, "step": 80 }, { "epoch": 0.75, "learning_rate": 1.8805309734513272e-05, "loss": 2.2492, "step": 85 }, { "epoch": 0.8, "learning_rate": 1.991150442477876e-05, "loss": 2.239, "step": 90 }, { "epoch": 0.84, "learning_rate": 2.101769911504425e-05, "loss": 2.2239, "step": 95 }, { "epoch": 0.88, "learning_rate": 2.2123893805309738e-05, "loss": 2.1822, "step": 100 }, { "epoch": 0.93, "learning_rate": 2.3230088495575223e-05, "loss": 2.1901, "step": 105 }, { "epoch": 0.97, "learning_rate": 2.433628318584071e-05, "loss": 2.2018, "step": 110 }, { "epoch": 1.0, "eval_accuracy": 0.37, "eval_loss": 2.124462842941284, "eval_runtime": 37.2852, "eval_samples_per_second": 2.682, "eval_steps_per_second": 0.349, "step": 113 }, { "epoch": 1.02, "learning_rate": 2.5442477876106197e-05, "loss": 2.1462, "step": 115 }, { "epoch": 1.06, "learning_rate": 2.6548672566371686e-05, "loss": 2.1083, "step": 120 }, { "epoch": 1.11, "learning_rate": 2.7654867256637168e-05, "loss": 2.0828, "step": 125 }, { "epoch": 1.15, "learning_rate": 2.8761061946902656e-05, "loss": 2.0594, "step": 130 }, { "epoch": 1.19, "learning_rate": 2.9867256637168145e-05, "loss": 2.0813, "step": 135 }, { "epoch": 1.24, "learning_rate": 3.097345132743363e-05, "loss": 2.0167, "step": 140 }, { "epoch": 1.28, "learning_rate": 3.185840707964602e-05, "loss": 1.9729, "step": 145 }, { "epoch": 1.33, "learning_rate": 3.296460176991151e-05, "loss": 1.9886, "step": 150 }, { "epoch": 1.37, "learning_rate": 3.407079646017699e-05, "loss": 1.9951, "step": 155 }, { "epoch": 1.42, "learning_rate": 3.517699115044248e-05, "loss": 1.9228, "step": 160 }, { "epoch": 1.46, "learning_rate": 3.628318584070797e-05, "loss": 1.9038, "step": 165 }, { "epoch": 1.5, "learning_rate": 3.7389380530973455e-05, "loss": 1.857, "step": 170 }, { "epoch": 1.55, "learning_rate": 3.849557522123894e-05, "loss": 1.922, "step": 175 }, { "epoch": 1.59, "learning_rate": 3.9601769911504426e-05, "loss": 1.8138, "step": 180 }, { "epoch": 1.64, "learning_rate": 4.0707964601769914e-05, "loss": 1.7358, "step": 185 }, { "epoch": 1.68, "learning_rate": 4.1814159292035396e-05, "loss": 1.6802, "step": 190 }, { "epoch": 1.73, "learning_rate": 4.2920353982300885e-05, "loss": 1.6438, "step": 195 }, { "epoch": 1.77, "learning_rate": 4.4026548672566373e-05, "loss": 1.67, "step": 200 }, { "epoch": 1.81, "learning_rate": 4.491150442477876e-05, "loss": 1.8038, "step": 205 }, { "epoch": 1.86, "learning_rate": 4.579646017699115e-05, "loss": 1.7205, "step": 210 }, { "epoch": 1.9, "learning_rate": 4.690265486725664e-05, "loss": 1.7188, "step": 215 }, { "epoch": 1.95, "learning_rate": 4.800884955752213e-05, "loss": 1.5285, "step": 220 }, { "epoch": 1.99, "learning_rate": 4.911504424778761e-05, "loss": 1.5528, "step": 225 }, { "epoch": 2.0, "eval_accuracy": 0.56, "eval_loss": 1.5200048685073853, "eval_runtime": 36.9619, "eval_samples_per_second": 2.705, "eval_steps_per_second": 0.352, "step": 226 }, { "epoch": 2.04, "learning_rate": 4.997541789577188e-05, "loss": 1.5776, "step": 230 }, { "epoch": 2.08, "learning_rate": 4.985250737463127e-05, "loss": 1.5497, "step": 235 }, { "epoch": 2.12, "learning_rate": 4.972959685349066e-05, "loss": 1.4853, "step": 240 }, { "epoch": 2.17, "learning_rate": 4.960668633235005e-05, "loss": 1.5263, "step": 245 }, { "epoch": 2.21, "learning_rate": 4.9483775811209444e-05, "loss": 1.4766, "step": 250 }, { "epoch": 2.26, "learning_rate": 4.936086529006883e-05, "loss": 1.4554, "step": 255 }, { "epoch": 2.3, "learning_rate": 4.923795476892822e-05, "loss": 1.3716, "step": 260 }, { "epoch": 2.35, "learning_rate": 4.911504424778761e-05, "loss": 1.3669, "step": 265 }, { "epoch": 2.39, "learning_rate": 4.8992133726647006e-05, "loss": 1.4348, "step": 270 }, { "epoch": 2.43, "learning_rate": 4.8869223205506394e-05, "loss": 1.275, "step": 275 }, { "epoch": 2.48, "learning_rate": 4.874631268436578e-05, "loss": 1.4206, "step": 280 }, { "epoch": 2.52, "learning_rate": 4.862340216322517e-05, "loss": 1.4962, "step": 285 }, { "epoch": 2.57, "learning_rate": 4.850049164208457e-05, "loss": 1.2934, "step": 290 }, { "epoch": 2.61, "learning_rate": 4.8377581120943956e-05, "loss": 1.263, "step": 295 }, { "epoch": 2.65, "learning_rate": 4.825467059980335e-05, "loss": 1.2592, "step": 300 }, { "epoch": 2.7, "learning_rate": 4.813176007866273e-05, "loss": 1.1302, "step": 305 }, { "epoch": 2.74, "learning_rate": 4.800884955752213e-05, "loss": 1.242, "step": 310 }, { "epoch": 2.79, "learning_rate": 4.788593903638152e-05, "loss": 1.2154, "step": 315 }, { "epoch": 2.83, "learning_rate": 4.776302851524091e-05, "loss": 1.1873, "step": 320 }, { "epoch": 2.88, "learning_rate": 4.7640117994100294e-05, "loss": 1.2688, "step": 325 }, { "epoch": 2.92, "learning_rate": 4.751720747295968e-05, "loss": 1.1488, "step": 330 }, { "epoch": 2.96, "learning_rate": 4.739429695181908e-05, "loss": 1.2506, "step": 335 }, { "epoch": 3.0, "eval_accuracy": 0.71, "eval_loss": 1.1110644340515137, "eval_runtime": 37.2461, "eval_samples_per_second": 2.685, "eval_steps_per_second": 0.349, "step": 339 }, { "epoch": 3.01, "learning_rate": 4.727138643067847e-05, "loss": 1.2124, "step": 340 }, { "epoch": 3.05, "learning_rate": 4.714847590953786e-05, "loss": 1.1278, "step": 345 }, { "epoch": 3.1, "learning_rate": 4.7025565388397244e-05, "loss": 1.1197, "step": 350 }, { "epoch": 3.14, "learning_rate": 4.690265486725664e-05, "loss": 0.9486, "step": 355 }, { "epoch": 3.19, "learning_rate": 4.677974434611603e-05, "loss": 1.0949, "step": 360 }, { "epoch": 3.23, "learning_rate": 4.6656833824975424e-05, "loss": 1.1837, "step": 365 }, { "epoch": 3.27, "learning_rate": 4.655850540806293e-05, "loss": 1.0059, "step": 370 }, { "epoch": 3.32, "learning_rate": 4.643559488692232e-05, "loss": 1.0395, "step": 375 }, { "epoch": 3.36, "learning_rate": 4.631268436578171e-05, "loss": 1.1674, "step": 380 }, { "epoch": 3.41, "learning_rate": 4.6189773844641105e-05, "loss": 0.9608, "step": 385 }, { "epoch": 3.45, "learning_rate": 4.606686332350049e-05, "loss": 1.2654, "step": 390 }, { "epoch": 3.5, "learning_rate": 4.594395280235988e-05, "loss": 0.8788, "step": 395 }, { "epoch": 3.54, "learning_rate": 4.582104228121927e-05, "loss": 0.9846, "step": 400 }, { "epoch": 3.58, "learning_rate": 4.5698131760078666e-05, "loss": 0.8597, "step": 405 }, { "epoch": 3.63, "learning_rate": 4.5575221238938055e-05, "loss": 1.1485, "step": 410 }, { "epoch": 3.67, "learning_rate": 4.545231071779745e-05, "loss": 1.0469, "step": 415 }, { "epoch": 3.72, "learning_rate": 4.532940019665683e-05, "loss": 0.9581, "step": 420 }, { "epoch": 3.76, "learning_rate": 4.520648967551623e-05, "loss": 0.9821, "step": 425 }, { "epoch": 3.81, "learning_rate": 4.5083579154375616e-05, "loss": 1.0122, "step": 430 }, { "epoch": 3.85, "learning_rate": 4.496066863323501e-05, "loss": 0.8543, "step": 435 }, { "epoch": 3.89, "learning_rate": 4.48377581120944e-05, "loss": 0.8246, "step": 440 }, { "epoch": 3.94, "learning_rate": 4.471484759095379e-05, "loss": 0.891, "step": 445 }, { "epoch": 3.98, "learning_rate": 4.459193706981318e-05, "loss": 1.0517, "step": 450 }, { "epoch": 4.0, "eval_accuracy": 0.74, "eval_loss": 0.950592041015625, "eval_runtime": 37.0565, "eval_samples_per_second": 2.699, "eval_steps_per_second": 0.351, "step": 452 }, { "epoch": 4.03, "learning_rate": 4.446902654867257e-05, "loss": 0.7031, "step": 455 }, { "epoch": 4.07, "learning_rate": 4.434611602753196e-05, "loss": 0.6235, "step": 460 }, { "epoch": 4.12, "learning_rate": 4.422320550639135e-05, "loss": 0.8002, "step": 465 }, { "epoch": 4.16, "learning_rate": 4.410029498525074e-05, "loss": 0.9738, "step": 470 }, { "epoch": 4.2, "learning_rate": 4.397738446411013e-05, "loss": 0.8511, "step": 475 }, { "epoch": 4.25, "learning_rate": 4.385447394296952e-05, "loss": 0.6335, "step": 480 }, { "epoch": 4.29, "learning_rate": 4.373156342182891e-05, "loss": 0.644, "step": 485 }, { "epoch": 4.34, "learning_rate": 4.36086529006883e-05, "loss": 0.8481, "step": 490 }, { "epoch": 4.38, "learning_rate": 4.348574237954769e-05, "loss": 0.6972, "step": 495 }, { "epoch": 4.42, "learning_rate": 4.3362831858407084e-05, "loss": 0.8044, "step": 500 }, { "epoch": 4.47, "learning_rate": 4.323992133726647e-05, "loss": 0.7073, "step": 505 }, { "epoch": 4.51, "learning_rate": 4.311701081612586e-05, "loss": 0.8473, "step": 510 }, { "epoch": 4.56, "learning_rate": 4.301868239921337e-05, "loss": 0.7983, "step": 515 }, { "epoch": 4.6, "learning_rate": 4.2895771878072765e-05, "loss": 0.5702, "step": 520 }, { "epoch": 4.65, "learning_rate": 4.2772861356932154e-05, "loss": 0.649, "step": 525 }, { "epoch": 4.69, "learning_rate": 4.264995083579155e-05, "loss": 0.6669, "step": 530 }, { "epoch": 4.73, "learning_rate": 4.252704031465093e-05, "loss": 0.5829, "step": 535 }, { "epoch": 4.78, "learning_rate": 4.2404129793510327e-05, "loss": 0.65, "step": 540 }, { "epoch": 4.82, "learning_rate": 4.2281219272369715e-05, "loss": 0.81, "step": 545 }, { "epoch": 4.87, "learning_rate": 4.215830875122911e-05, "loss": 0.7507, "step": 550 }, { "epoch": 4.91, "learning_rate": 4.20353982300885e-05, "loss": 0.6655, "step": 555 }, { "epoch": 4.96, "learning_rate": 4.191248770894789e-05, "loss": 0.6181, "step": 560 }, { "epoch": 5.0, "learning_rate": 4.178957718780728e-05, "loss": 0.8272, "step": 565 }, { "epoch": 5.0, "eval_accuracy": 0.83, "eval_loss": 0.7394164800643921, "eval_runtime": 37.0651, "eval_samples_per_second": 2.698, "eval_steps_per_second": 0.351, "step": 565 }, { "epoch": 5.04, "learning_rate": 4.166666666666667e-05, "loss": 0.6379, "step": 570 }, { "epoch": 5.09, "learning_rate": 4.154375614552606e-05, "loss": 0.5593, "step": 575 }, { "epoch": 5.13, "learning_rate": 4.142084562438545e-05, "loss": 0.524, "step": 580 }, { "epoch": 5.18, "learning_rate": 4.129793510324484e-05, "loss": 0.4926, "step": 585 }, { "epoch": 5.22, "learning_rate": 4.1175024582104233e-05, "loss": 0.6712, "step": 590 }, { "epoch": 5.27, "learning_rate": 4.105211406096362e-05, "loss": 0.59, "step": 595 }, { "epoch": 5.31, "learning_rate": 4.092920353982301e-05, "loss": 0.3684, "step": 600 }, { "epoch": 5.35, "learning_rate": 4.08062930186824e-05, "loss": 0.5484, "step": 605 }, { "epoch": 5.4, "learning_rate": 4.068338249754179e-05, "loss": 0.4718, "step": 610 }, { "epoch": 5.44, "learning_rate": 4.0560471976401183e-05, "loss": 0.5205, "step": 615 }, { "epoch": 5.49, "learning_rate": 4.043756145526057e-05, "loss": 0.4103, "step": 620 }, { "epoch": 5.53, "learning_rate": 4.031465093411996e-05, "loss": 0.6238, "step": 625 }, { "epoch": 5.58, "learning_rate": 4.019174041297935e-05, "loss": 0.391, "step": 630 }, { "epoch": 5.62, "learning_rate": 4.0068829891838745e-05, "loss": 0.626, "step": 635 }, { "epoch": 5.66, "learning_rate": 3.9945919370698133e-05, "loss": 0.5142, "step": 640 }, { "epoch": 5.71, "learning_rate": 3.982300884955752e-05, "loss": 0.5664, "step": 645 }, { "epoch": 5.75, "learning_rate": 3.970009832841691e-05, "loss": 0.3242, "step": 650 }, { "epoch": 5.8, "learning_rate": 3.9577187807276306e-05, "loss": 0.4262, "step": 655 }, { "epoch": 5.84, "learning_rate": 3.9454277286135695e-05, "loss": 0.6169, "step": 660 }, { "epoch": 5.88, "learning_rate": 3.9331366764995083e-05, "loss": 0.436, "step": 665 }, { "epoch": 5.93, "learning_rate": 3.920845624385447e-05, "loss": 0.2636, "step": 670 }, { "epoch": 5.97, "learning_rate": 3.908554572271387e-05, "loss": 0.3728, "step": 675 }, { "epoch": 6.0, "eval_accuracy": 0.81, "eval_loss": 0.6849124431610107, "eval_runtime": 37.0876, "eval_samples_per_second": 2.696, "eval_steps_per_second": 0.351, "step": 678 }, { "epoch": 6.02, "learning_rate": 3.8962635201573256e-05, "loss": 0.4045, "step": 680 }, { "epoch": 6.06, "learning_rate": 3.883972468043265e-05, "loss": 0.2084, "step": 685 }, { "epoch": 6.11, "learning_rate": 3.8716814159292034e-05, "loss": 0.3604, "step": 690 }, { "epoch": 6.15, "learning_rate": 3.859390363815143e-05, "loss": 0.3663, "step": 695 }, { "epoch": 6.19, "learning_rate": 3.847099311701082e-05, "loss": 0.3161, "step": 700 }, { "epoch": 6.24, "learning_rate": 3.834808259587021e-05, "loss": 0.4456, "step": 705 }, { "epoch": 6.28, "learning_rate": 3.82251720747296e-05, "loss": 0.2935, "step": 710 }, { "epoch": 6.33, "learning_rate": 3.810226155358899e-05, "loss": 0.2577, "step": 715 }, { "epoch": 6.37, "learning_rate": 3.797935103244838e-05, "loss": 0.2841, "step": 720 }, { "epoch": 6.42, "learning_rate": 3.7856440511307774e-05, "loss": 0.2892, "step": 725 }, { "epoch": 6.46, "learning_rate": 3.773352999016716e-05, "loss": 0.2578, "step": 730 }, { "epoch": 6.5, "learning_rate": 3.7610619469026545e-05, "loss": 0.2938, "step": 735 }, { "epoch": 6.55, "learning_rate": 3.748770894788594e-05, "loss": 0.2936, "step": 740 }, { "epoch": 6.59, "learning_rate": 3.736479842674533e-05, "loss": 0.426, "step": 745 }, { "epoch": 6.64, "learning_rate": 3.7241887905604724e-05, "loss": 0.2328, "step": 750 }, { "epoch": 6.68, "learning_rate": 3.711897738446411e-05, "loss": 0.2715, "step": 755 }, { "epoch": 6.73, "learning_rate": 3.69960668633235e-05, "loss": 0.3068, "step": 760 }, { "epoch": 6.77, "learning_rate": 3.687315634218289e-05, "loss": 0.4009, "step": 765 }, { "epoch": 6.81, "learning_rate": 3.6750245821042286e-05, "loss": 0.3685, "step": 770 }, { "epoch": 6.86, "learning_rate": 3.6627335299901674e-05, "loss": 0.3065, "step": 775 }, { "epoch": 6.9, "learning_rate": 3.650442477876106e-05, "loss": 0.2677, "step": 780 }, { "epoch": 6.95, "learning_rate": 3.638151425762045e-05, "loss": 0.3096, "step": 785 }, { "epoch": 6.99, "learning_rate": 3.625860373647985e-05, "loss": 0.3878, "step": 790 }, { "epoch": 7.0, "eval_accuracy": 0.8, "eval_loss": 0.647432267665863, "eval_runtime": 36.7252, "eval_samples_per_second": 2.723, "eval_steps_per_second": 0.354, "step": 791 }, { "epoch": 7.04, "learning_rate": 3.6135693215339236e-05, "loss": 0.1663, "step": 795 }, { "epoch": 7.08, "learning_rate": 3.6012782694198624e-05, "loss": 0.2787, "step": 800 }, { "epoch": 7.12, "learning_rate": 3.588987217305801e-05, "loss": 0.1559, "step": 805 }, { "epoch": 7.17, "learning_rate": 3.576696165191741e-05, "loss": 0.1331, "step": 810 }, { "epoch": 7.21, "learning_rate": 3.56440511307768e-05, "loss": 0.1643, "step": 815 }, { "epoch": 7.26, "learning_rate": 3.5521140609636186e-05, "loss": 0.1594, "step": 820 }, { "epoch": 7.3, "learning_rate": 3.54228121927237e-05, "loss": 0.1871, "step": 825 }, { "epoch": 7.35, "learning_rate": 3.529990167158309e-05, "loss": 0.1745, "step": 830 }, { "epoch": 7.39, "learning_rate": 3.517699115044248e-05, "loss": 0.125, "step": 835 }, { "epoch": 7.43, "learning_rate": 3.5054080629301874e-05, "loss": 0.1259, "step": 840 }, { "epoch": 7.48, "learning_rate": 3.493117010816126e-05, "loss": 0.2476, "step": 845 }, { "epoch": 7.52, "learning_rate": 3.480825958702065e-05, "loss": 0.0858, "step": 850 }, { "epoch": 7.57, "learning_rate": 3.468534906588004e-05, "loss": 0.1988, "step": 855 }, { "epoch": 7.61, "learning_rate": 3.4562438544739435e-05, "loss": 0.1398, "step": 860 }, { "epoch": 7.65, "learning_rate": 3.4439528023598824e-05, "loss": 0.1934, "step": 865 }, { "epoch": 7.7, "learning_rate": 3.431661750245821e-05, "loss": 0.0942, "step": 870 }, { "epoch": 7.74, "learning_rate": 3.41937069813176e-05, "loss": 0.1991, "step": 875 }, { "epoch": 7.79, "learning_rate": 3.407079646017699e-05, "loss": 0.1436, "step": 880 }, { "epoch": 7.83, "learning_rate": 3.3947885939036385e-05, "loss": 0.0865, "step": 885 }, { "epoch": 7.88, "learning_rate": 3.3824975417895774e-05, "loss": 0.1405, "step": 890 }, { "epoch": 7.92, "learning_rate": 3.370206489675516e-05, "loss": 0.285, "step": 895 }, { "epoch": 7.96, "learning_rate": 3.357915437561455e-05, "loss": 0.1119, "step": 900 }, { "epoch": 8.0, "eval_accuracy": 0.83, "eval_loss": 0.5846229791641235, "eval_runtime": 37.7892, "eval_samples_per_second": 2.646, "eval_steps_per_second": 0.344, "step": 904 }, { "epoch": 8.01, "learning_rate": 3.3456243854473946e-05, "loss": 0.1696, "step": 905 }, { "epoch": 8.05, "learning_rate": 3.3333333333333335e-05, "loss": 0.1067, "step": 910 }, { "epoch": 8.1, "learning_rate": 3.3210422812192724e-05, "loss": 0.0852, "step": 915 }, { "epoch": 8.14, "learning_rate": 3.308751229105211e-05, "loss": 0.1015, "step": 920 }, { "epoch": 8.19, "learning_rate": 3.296460176991151e-05, "loss": 0.1699, "step": 925 }, { "epoch": 8.23, "learning_rate": 3.2841691248770896e-05, "loss": 0.0691, "step": 930 }, { "epoch": 8.27, "learning_rate": 3.2718780727630285e-05, "loss": 0.0661, "step": 935 }, { "epoch": 8.32, "learning_rate": 3.2595870206489674e-05, "loss": 0.0512, "step": 940 }, { "epoch": 8.36, "learning_rate": 3.247295968534907e-05, "loss": 0.1003, "step": 945 }, { "epoch": 8.41, "learning_rate": 3.235004916420846e-05, "loss": 0.0523, "step": 950 }, { "epoch": 8.45, "learning_rate": 3.222713864306785e-05, "loss": 0.0893, "step": 955 }, { "epoch": 8.5, "learning_rate": 3.2104228121927235e-05, "loss": 0.053, "step": 960 }, { "epoch": 8.54, "learning_rate": 3.198131760078663e-05, "loss": 0.0592, "step": 965 }, { "epoch": 8.58, "learning_rate": 3.185840707964602e-05, "loss": 0.0779, "step": 970 }, { "epoch": 8.63, "learning_rate": 3.1735496558505414e-05, "loss": 0.0611, "step": 975 }, { "epoch": 8.67, "learning_rate": 3.16125860373648e-05, "loss": 0.0776, "step": 980 }, { "epoch": 8.72, "learning_rate": 3.148967551622419e-05, "loss": 0.1722, "step": 985 }, { "epoch": 8.76, "learning_rate": 3.136676499508358e-05, "loss": 0.0809, "step": 990 }, { "epoch": 8.81, "learning_rate": 3.124385447394297e-05, "loss": 0.185, "step": 995 }, { "epoch": 8.85, "learning_rate": 3.1120943952802364e-05, "loss": 0.0357, "step": 1000 }, { "epoch": 8.89, "learning_rate": 3.0998033431661746e-05, "loss": 0.082, "step": 1005 }, { "epoch": 8.94, "learning_rate": 3.087512291052114e-05, "loss": 0.1928, "step": 1010 }, { "epoch": 8.98, "learning_rate": 3.075221238938053e-05, "loss": 0.1312, "step": 1015 }, { "epoch": 9.0, "eval_accuracy": 0.84, "eval_loss": 0.5652107000350952, "eval_runtime": 36.9689, "eval_samples_per_second": 2.705, "eval_steps_per_second": 0.352, "step": 1017 }, { "epoch": 9.03, "learning_rate": 3.0629301868239926e-05, "loss": 0.0416, "step": 1020 }, { "epoch": 9.07, "learning_rate": 3.050639134709931e-05, "loss": 0.1309, "step": 1025 }, { "epoch": 9.12, "learning_rate": 3.0383480825958703e-05, "loss": 0.1201, "step": 1030 }, { "epoch": 9.16, "learning_rate": 3.0260570304818092e-05, "loss": 0.0265, "step": 1035 }, { "epoch": 9.2, "learning_rate": 3.0137659783677484e-05, "loss": 0.0291, "step": 1040 }, { "epoch": 9.25, "learning_rate": 3.0014749262536872e-05, "loss": 0.0306, "step": 1045 }, { "epoch": 9.29, "learning_rate": 2.9891838741396268e-05, "loss": 0.0262, "step": 1050 }, { "epoch": 9.34, "learning_rate": 2.9768928220255653e-05, "loss": 0.0262, "step": 1055 }, { "epoch": 9.38, "learning_rate": 2.964601769911505e-05, "loss": 0.0243, "step": 1060 }, { "epoch": 9.42, "learning_rate": 2.9523107177974434e-05, "loss": 0.0314, "step": 1065 }, { "epoch": 9.47, "learning_rate": 2.940019665683383e-05, "loss": 0.0218, "step": 1070 }, { "epoch": 9.51, "learning_rate": 2.9277286135693215e-05, "loss": 0.0294, "step": 1075 }, { "epoch": 9.56, "learning_rate": 2.915437561455261e-05, "loss": 0.0187, "step": 1080 }, { "epoch": 9.6, "learning_rate": 2.9031465093412e-05, "loss": 0.0614, "step": 1085 }, { "epoch": 9.65, "learning_rate": 2.890855457227139e-05, "loss": 0.0269, "step": 1090 }, { "epoch": 9.69, "learning_rate": 2.878564405113078e-05, "loss": 0.0591, "step": 1095 }, { "epoch": 9.73, "learning_rate": 2.866273352999017e-05, "loss": 0.0282, "step": 1100 }, { "epoch": 9.78, "learning_rate": 2.853982300884956e-05, "loss": 0.0242, "step": 1105 }, { "epoch": 9.82, "learning_rate": 2.8416912487708945e-05, "loss": 0.0709, "step": 1110 }, { "epoch": 9.87, "learning_rate": 2.829400196656834e-05, "loss": 0.0158, "step": 1115 }, { "epoch": 9.91, "learning_rate": 2.8171091445427726e-05, "loss": 0.1044, "step": 1120 }, { "epoch": 9.96, "learning_rate": 2.804818092428712e-05, "loss": 0.0404, "step": 1125 }, { "epoch": 10.0, "learning_rate": 2.792527040314651e-05, "loss": 0.0149, "step": 1130 }, { "epoch": 10.0, "eval_accuracy": 0.84, "eval_loss": 0.707106351852417, "eval_runtime": 37.5782, "eval_samples_per_second": 2.661, "eval_steps_per_second": 0.346, "step": 1130 }, { "epoch": 10.04, "learning_rate": 2.7802359882005902e-05, "loss": 0.1165, "step": 1135 }, { "epoch": 10.09, "learning_rate": 2.767944936086529e-05, "loss": 0.0129, "step": 1140 }, { "epoch": 10.13, "learning_rate": 2.7556538839724683e-05, "loss": 0.0624, "step": 1145 }, { "epoch": 10.18, "learning_rate": 2.743362831858407e-05, "loss": 0.0272, "step": 1150 }, { "epoch": 10.22, "learning_rate": 2.7310717797443463e-05, "loss": 0.0176, "step": 1155 }, { "epoch": 10.27, "learning_rate": 2.7187807276302852e-05, "loss": 0.0138, "step": 1160 }, { "epoch": 10.31, "learning_rate": 2.7064896755162244e-05, "loss": 0.0138, "step": 1165 }, { "epoch": 10.35, "learning_rate": 2.6941986234021633e-05, "loss": 0.0177, "step": 1170 }, { "epoch": 10.4, "learning_rate": 2.6819075712881025e-05, "loss": 0.024, "step": 1175 }, { "epoch": 10.44, "learning_rate": 2.6696165191740413e-05, "loss": 0.0676, "step": 1180 }, { "epoch": 10.49, "learning_rate": 2.6573254670599805e-05, "loss": 0.0112, "step": 1185 }, { "epoch": 10.53, "learning_rate": 2.6450344149459194e-05, "loss": 0.011, "step": 1190 }, { "epoch": 10.58, "learning_rate": 2.6327433628318586e-05, "loss": 0.0118, "step": 1195 }, { "epoch": 10.62, "learning_rate": 2.6204523107177975e-05, "loss": 0.018, "step": 1200 }, { "epoch": 10.66, "learning_rate": 2.6081612586037367e-05, "loss": 0.017, "step": 1205 }, { "epoch": 10.71, "learning_rate": 2.5958702064896756e-05, "loss": 0.0117, "step": 1210 }, { "epoch": 10.75, "learning_rate": 2.583579154375615e-05, "loss": 0.01, "step": 1215 }, { "epoch": 10.8, "learning_rate": 2.5712881022615536e-05, "loss": 0.0101, "step": 1220 }, { "epoch": 10.84, "learning_rate": 2.558997050147493e-05, "loss": 0.017, "step": 1225 }, { "epoch": 10.88, "learning_rate": 2.5467059980334317e-05, "loss": 0.0094, "step": 1230 }, { "epoch": 10.93, "learning_rate": 2.5344149459193706e-05, "loss": 0.0085, "step": 1235 }, { "epoch": 10.97, "learning_rate": 2.5221238938053098e-05, "loss": 0.0154, "step": 1240 }, { "epoch": 11.0, "eval_accuracy": 0.84, "eval_loss": 0.7669206261634827, "eval_runtime": 37.5505, "eval_samples_per_second": 2.663, "eval_steps_per_second": 0.346, "step": 1243 }, { "epoch": 11.02, "learning_rate": 2.5098328416912486e-05, "loss": 0.0085, "step": 1245 }, { "epoch": 11.06, "learning_rate": 2.4975417895771878e-05, "loss": 0.0091, "step": 1250 }, { "epoch": 11.11, "learning_rate": 2.485250737463127e-05, "loss": 0.0086, "step": 1255 }, { "epoch": 11.15, "learning_rate": 2.4729596853490662e-05, "loss": 0.0087, "step": 1260 }, { "epoch": 11.19, "learning_rate": 2.460668633235005e-05, "loss": 0.008, "step": 1265 }, { "epoch": 11.24, "learning_rate": 2.4483775811209443e-05, "loss": 0.0078, "step": 1270 }, { "epoch": 11.28, "learning_rate": 2.436086529006883e-05, "loss": 0.0076, "step": 1275 }, { "epoch": 11.33, "learning_rate": 2.4237954768928224e-05, "loss": 0.0074, "step": 1280 }, { "epoch": 11.37, "learning_rate": 2.411504424778761e-05, "loss": 0.0076, "step": 1285 }, { "epoch": 11.42, "learning_rate": 2.3992133726647e-05, "loss": 0.0071, "step": 1290 }, { "epoch": 11.46, "learning_rate": 2.3869223205506393e-05, "loss": 0.0078, "step": 1295 }, { "epoch": 11.5, "learning_rate": 2.374631268436578e-05, "loss": 0.0069, "step": 1300 }, { "epoch": 11.55, "learning_rate": 2.3623402163225174e-05, "loss": 0.0071, "step": 1305 }, { "epoch": 11.59, "learning_rate": 2.3500491642084562e-05, "loss": 0.0086, "step": 1310 }, { "epoch": 11.64, "learning_rate": 2.3377581120943954e-05, "loss": 0.0075, "step": 1315 }, { "epoch": 11.68, "learning_rate": 2.3254670599803343e-05, "loss": 0.0072, "step": 1320 }, { "epoch": 11.73, "learning_rate": 2.3131760078662735e-05, "loss": 0.0078, "step": 1325 }, { "epoch": 11.77, "learning_rate": 2.3008849557522124e-05, "loss": 0.0398, "step": 1330 }, { "epoch": 11.81, "learning_rate": 2.2885939036381516e-05, "loss": 0.0069, "step": 1335 }, { "epoch": 11.86, "learning_rate": 2.2763028515240904e-05, "loss": 0.0069, "step": 1340 }, { "epoch": 11.9, "learning_rate": 2.2640117994100296e-05, "loss": 0.0057, "step": 1345 }, { "epoch": 11.95, "learning_rate": 2.2517207472959685e-05, "loss": 0.0108, "step": 1350 }, { "epoch": 11.99, "learning_rate": 2.2394296951819077e-05, "loss": 0.1238, "step": 1355 }, { "epoch": 12.0, "eval_accuracy": 0.87, "eval_loss": 0.7153857350349426, "eval_runtime": 37.3772, "eval_samples_per_second": 2.675, "eval_steps_per_second": 0.348, "step": 1356 }, { "epoch": 12.04, "learning_rate": 2.227138643067847e-05, "loss": 0.0058, "step": 1360 }, { "epoch": 12.08, "learning_rate": 2.2148475909537858e-05, "loss": 0.0076, "step": 1365 }, { "epoch": 12.12, "learning_rate": 2.202556538839725e-05, "loss": 0.0085, "step": 1370 }, { "epoch": 12.17, "learning_rate": 2.190265486725664e-05, "loss": 0.0068, "step": 1375 }, { "epoch": 12.21, "learning_rate": 2.177974434611603e-05, "loss": 0.0058, "step": 1380 }, { "epoch": 12.26, "learning_rate": 2.165683382497542e-05, "loss": 0.0058, "step": 1385 }, { "epoch": 12.3, "learning_rate": 2.153392330383481e-05, "loss": 0.0059, "step": 1390 }, { "epoch": 12.35, "learning_rate": 2.14110127826942e-05, "loss": 0.006, "step": 1395 }, { "epoch": 12.39, "learning_rate": 2.1288102261553592e-05, "loss": 0.0053, "step": 1400 }, { "epoch": 12.43, "learning_rate": 2.116519174041298e-05, "loss": 0.0064, "step": 1405 }, { "epoch": 12.48, "learning_rate": 2.104228121927237e-05, "loss": 0.0054, "step": 1410 }, { "epoch": 12.52, "learning_rate": 2.091937069813176e-05, "loss": 0.0979, "step": 1415 }, { "epoch": 12.57, "learning_rate": 2.079646017699115e-05, "loss": 0.0052, "step": 1420 }, { "epoch": 12.61, "learning_rate": 2.0673549655850542e-05, "loss": 0.0049, "step": 1425 }, { "epoch": 12.65, "learning_rate": 2.055063913470993e-05, "loss": 0.0051, "step": 1430 }, { "epoch": 12.7, "learning_rate": 2.0427728613569323e-05, "loss": 0.0053, "step": 1435 }, { "epoch": 12.74, "learning_rate": 2.030481809242871e-05, "loss": 0.0095, "step": 1440 }, { "epoch": 12.79, "learning_rate": 2.0181907571288103e-05, "loss": 0.005, "step": 1445 }, { "epoch": 12.83, "learning_rate": 2.0058997050147492e-05, "loss": 0.0048, "step": 1450 }, { "epoch": 12.88, "learning_rate": 1.9936086529006884e-05, "loss": 0.0045, "step": 1455 }, { "epoch": 12.92, "learning_rate": 1.9813176007866273e-05, "loss": 0.0048, "step": 1460 }, { "epoch": 12.96, "learning_rate": 1.9690265486725665e-05, "loss": 0.005, "step": 1465 }, { "epoch": 13.0, "eval_accuracy": 0.87, "eval_loss": 0.7023229002952576, "eval_runtime": 37.9815, "eval_samples_per_second": 2.633, "eval_steps_per_second": 0.342, "step": 1469 } ], "max_steps": 2260, "num_train_epochs": 20, "total_flos": 3.9870923785056e+17, "trial_name": null, "trial_params": null }