{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 36816, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 1.9930309764864052e-05, "loss": 0.9224, "step": 500 }, { "epoch": 0.04, "eval_accuracy": 0.7422312786551197, "eval_loss": 0.6256920099258423, "eval_runtime": 34.0513, "eval_samples_per_second": 288.242, "step": 500 }, { "epoch": 0.08, "learning_rate": 1.965648750240075e-05, "loss": 0.6107, "step": 1000 }, { "epoch": 0.08, "eval_accuracy": 0.7959246051961284, "eval_loss": 0.5218016505241394, "eval_runtime": 34.3716, "eval_samples_per_second": 285.555, "step": 1000 }, { "epoch": 0.12, "learning_rate": 1.93821164979285e-05, "loss": 0.5464, "step": 1500 }, { "epoch": 0.12, "eval_accuracy": 0.8036678553234845, "eval_loss": 0.4963739216327667, "eval_runtime": 34.3352, "eval_samples_per_second": 285.858, "step": 1500 }, { "epoch": 0.16, "learning_rate": 1.9107745493456255e-05, "loss": 0.5153, "step": 2000 }, { "epoch": 0.16, "eval_accuracy": 0.8173204279164544, "eval_loss": 0.4657774269580841, "eval_runtime": 34.4768, "eval_samples_per_second": 284.684, "step": 2000 }, { "epoch": 0.2, "learning_rate": 1.8833374488984006e-05, "loss": 0.4934, "step": 2500 }, { "epoch": 0.2, "eval_accuracy": 0.8264900662251655, "eval_loss": 0.4544191062450409, "eval_runtime": 34.2913, "eval_samples_per_second": 286.224, "step": 2500 }, { "epoch": 0.24, "learning_rate": 1.8559003484511758e-05, "loss": 0.4863, "step": 3000 }, { "epoch": 0.24, "eval_accuracy": 0.817014773306164, "eval_loss": 0.469377726316452, "eval_runtime": 34.3798, "eval_samples_per_second": 285.488, "step": 3000 }, { "epoch": 0.29, "learning_rate": 1.828463248003951e-05, "loss": 0.4639, "step": 3500 }, { "epoch": 0.29, "eval_accuracy": 0.8337238920020377, "eval_loss": 0.44527676701545715, "eval_runtime": 34.3542, "eval_samples_per_second": 285.7, "step": 3500 }, { "epoch": 0.33, "learning_rate": 1.8010261475567264e-05, "loss": 0.477, "step": 4000 }, { "epoch": 0.33, "eval_accuracy": 0.83841059602649, "eval_loss": 0.4258698523044586, "eval_runtime": 34.3114, "eval_samples_per_second": 286.056, "step": 4000 }, { "epoch": 0.37, "learning_rate": 1.7735890471095016e-05, "loss": 0.4592, "step": 4500 }, { "epoch": 0.37, "eval_accuracy": 0.8421803362200713, "eval_loss": 0.4147617518901825, "eval_runtime": 34.3201, "eval_samples_per_second": 285.984, "step": 4500 }, { "epoch": 0.41, "learning_rate": 1.746151946662277e-05, "loss": 0.4449, "step": 5000 }, { "epoch": 0.41, "eval_accuracy": 0.8431991849210392, "eval_loss": 0.42169830203056335, "eval_runtime": 34.3613, "eval_samples_per_second": 285.641, "step": 5000 }, { "epoch": 0.45, "learning_rate": 1.718714846215052e-05, "loss": 0.4448, "step": 5500 }, { "epoch": 0.45, "eval_accuracy": 0.844319918492104, "eval_loss": 0.4075462818145752, "eval_runtime": 34.3101, "eval_samples_per_second": 286.067, "step": 5500 }, { "epoch": 0.49, "learning_rate": 1.6912777457678273e-05, "loss": 0.4416, "step": 6000 }, { "epoch": 0.49, "eval_accuracy": 0.8473764645950076, "eval_loss": 0.40427786111831665, "eval_runtime": 34.3011, "eval_samples_per_second": 286.142, "step": 6000 }, { "epoch": 0.53, "learning_rate": 1.6638406453206025e-05, "loss": 0.4396, "step": 6500 }, { "epoch": 0.53, "eval_accuracy": 0.845746306673459, "eval_loss": 0.40583938360214233, "eval_runtime": 34.3317, "eval_samples_per_second": 285.887, "step": 6500 }, { "epoch": 0.57, "learning_rate": 1.636403544873378e-05, "loss": 0.4262, "step": 7000 }, { "epoch": 0.57, "eval_accuracy": 0.8414671421293938, "eval_loss": 0.4056021273136139, "eval_runtime": 34.3275, "eval_samples_per_second": 285.923, "step": 7000 }, { "epoch": 0.61, "learning_rate": 1.609076192827942e-05, "loss": 0.4181, "step": 7500 }, { "epoch": 0.61, "eval_accuracy": 0.8471726948548141, "eval_loss": 0.4094808101654053, "eval_runtime": 34.3454, "eval_samples_per_second": 285.773, "step": 7500 }, { "epoch": 0.65, "learning_rate": 1.5816390923807176e-05, "loss": 0.4277, "step": 8000 }, { "epoch": 0.65, "eval_accuracy": 0.8513499745287825, "eval_loss": 0.3897625207901001, "eval_runtime": 34.3305, "eval_samples_per_second": 285.898, "step": 8000 }, { "epoch": 0.69, "learning_rate": 1.5542019919334924e-05, "loss": 0.4204, "step": 8500 }, { "epoch": 0.69, "eval_accuracy": 0.8414671421293938, "eval_loss": 0.40381574630737305, "eval_runtime": 34.3185, "eval_samples_per_second": 285.997, "step": 8500 }, { "epoch": 0.73, "learning_rate": 1.526764891486268e-05, "loss": 0.408, "step": 9000 }, { "epoch": 0.73, "eval_accuracy": 0.8490066225165563, "eval_loss": 0.39596864581108093, "eval_runtime": 34.3274, "eval_samples_per_second": 285.923, "step": 9000 }, { "epoch": 0.77, "learning_rate": 1.4993277910390432e-05, "loss": 0.4147, "step": 9500 }, { "epoch": 0.77, "eval_accuracy": 0.8508405501782985, "eval_loss": 0.39388328790664673, "eval_runtime": 34.3118, "eval_samples_per_second": 286.053, "step": 9500 }, { "epoch": 0.81, "learning_rate": 1.4718906905918185e-05, "loss": 0.4087, "step": 10000 }, { "epoch": 0.81, "eval_accuracy": 0.8454406520631687, "eval_loss": 0.3990323841571808, "eval_runtime": 34.3039, "eval_samples_per_second": 286.119, "step": 10000 }, { "epoch": 0.86, "learning_rate": 1.4444535901445937e-05, "loss": 0.4182, "step": 10500 }, { "epoch": 0.86, "eval_accuracy": 0.8559347936831381, "eval_loss": 0.37663206458091736, "eval_runtime": 34.3301, "eval_samples_per_second": 285.901, "step": 10500 }, { "epoch": 0.9, "learning_rate": 1.4170164896973688e-05, "loss": 0.3974, "step": 11000 }, { "epoch": 0.9, "eval_accuracy": 0.8542027508914927, "eval_loss": 0.38825666904449463, "eval_runtime": 34.3094, "eval_samples_per_second": 286.073, "step": 11000 }, { "epoch": 0.94, "learning_rate": 1.3895793892501441e-05, "loss": 0.4031, "step": 11500 }, { "epoch": 0.94, "eval_accuracy": 0.8604177279673968, "eval_loss": 0.372531533241272, "eval_runtime": 34.3046, "eval_samples_per_second": 286.113, "step": 11500 }, { "epoch": 0.98, "learning_rate": 1.3621422888029194e-05, "loss": 0.4012, "step": 12000 }, { "epoch": 0.98, "eval_accuracy": 0.8586856851757514, "eval_loss": 0.3715327978134155, "eval_runtime": 34.3023, "eval_samples_per_second": 286.133, "step": 12000 }, { "epoch": 1.02, "learning_rate": 1.334760062556589e-05, "loss": 0.3364, "step": 12500 }, { "epoch": 1.02, "eval_accuracy": 0.8536933265410087, "eval_loss": 0.4157598912715912, "eval_runtime": 34.3192, "eval_samples_per_second": 285.992, "step": 12500 }, { "epoch": 1.06, "learning_rate": 1.3073229621093644e-05, "loss": 0.256, "step": 13000 }, { "epoch": 1.06, "eval_accuracy": 0.8565461029037188, "eval_loss": 0.4385443329811096, "eval_runtime": 34.3274, "eval_samples_per_second": 285.923, "step": 13000 }, { "epoch": 1.1, "learning_rate": 1.2798858616621397e-05, "loss": 0.2682, "step": 13500 }, { "epoch": 1.1, "eval_accuracy": 0.8606214977075904, "eval_loss": 0.40176108479499817, "eval_runtime": 34.3252, "eval_samples_per_second": 285.941, "step": 13500 }, { "epoch": 1.14, "learning_rate": 1.252448761214915e-05, "loss": 0.2566, "step": 14000 }, { "epoch": 1.14, "eval_accuracy": 0.8628629648497198, "eval_loss": 0.40525978803634644, "eval_runtime": 34.3427, "eval_samples_per_second": 285.796, "step": 14000 }, { "epoch": 1.18, "learning_rate": 1.22501166076769e-05, "loss": 0.2672, "step": 14500 }, { "epoch": 1.18, "eval_accuracy": 0.8600101884870097, "eval_loss": 0.41904282569885254, "eval_runtime": 34.3153, "eval_samples_per_second": 286.024, "step": 14500 }, { "epoch": 1.22, "learning_rate": 1.1975745603204653e-05, "loss": 0.2614, "step": 15000 }, { "epoch": 1.22, "eval_accuracy": 0.8596026490066225, "eval_loss": 0.4234951138496399, "eval_runtime": 34.3391, "eval_samples_per_second": 285.825, "step": 15000 }, { "epoch": 1.26, "learning_rate": 1.1701374598732406e-05, "loss": 0.2581, "step": 15500 }, { "epoch": 1.26, "eval_accuracy": 0.8562404482934284, "eval_loss": 0.4271596670150757, "eval_runtime": 34.3468, "eval_samples_per_second": 285.762, "step": 15500 }, { "epoch": 1.3, "learning_rate": 1.142700359426016e-05, "loss": 0.2623, "step": 16000 }, { "epoch": 1.3, "eval_accuracy": 0.8616403464085584, "eval_loss": 0.42012786865234375, "eval_runtime": 34.342, "eval_samples_per_second": 285.802, "step": 16000 }, { "epoch": 1.34, "learning_rate": 1.1153181331796856e-05, "loss": 0.2536, "step": 16500 }, { "epoch": 1.34, "eval_accuracy": 0.8611309220580744, "eval_loss": 0.42309054732322693, "eval_runtime": 34.3494, "eval_samples_per_second": 285.74, "step": 16500 }, { "epoch": 1.39, "learning_rate": 1.0878810327324609e-05, "loss": 0.2595, "step": 17000 }, { "epoch": 1.39, "eval_accuracy": 0.863779928680591, "eval_loss": 0.408088356256485, "eval_runtime": 34.2723, "eval_samples_per_second": 286.383, "step": 17000 }, { "epoch": 1.43, "learning_rate": 1.0604439322852362e-05, "loss": 0.2626, "step": 17500 }, { "epoch": 1.43, "eval_accuracy": 0.8558329088130413, "eval_loss": 0.4161333441734314, "eval_runtime": 34.2334, "eval_samples_per_second": 286.708, "step": 17500 }, { "epoch": 1.47, "learning_rate": 1.0330068318380115e-05, "loss": 0.2607, "step": 18000 }, { "epoch": 1.47, "eval_accuracy": 0.8580743759551707, "eval_loss": 0.42380374670028687, "eval_runtime": 34.2608, "eval_samples_per_second": 286.479, "step": 18000 }, { "epoch": 1.51, "learning_rate": 1.0055697313907867e-05, "loss": 0.268, "step": 18500 }, { "epoch": 1.51, "eval_accuracy": 0.8599083036169128, "eval_loss": 0.43461742997169495, "eval_runtime": 34.234, "eval_samples_per_second": 286.703, "step": 18500 }, { "epoch": 1.55, "learning_rate": 9.78132630943562e-06, "loss": 0.2702, "step": 19000 }, { "epoch": 1.55, "eval_accuracy": 0.8628629648497198, "eval_loss": 0.4047214388847351, "eval_runtime": 34.2365, "eval_samples_per_second": 286.682, "step": 19000 }, { "epoch": 1.59, "learning_rate": 9.506955304963372e-06, "loss": 0.2496, "step": 19500 }, { "epoch": 1.59, "eval_accuracy": 0.8595007641365258, "eval_loss": 0.4368164539337158, "eval_runtime": 34.2332, "eval_samples_per_second": 286.71, "step": 19500 }, { "epoch": 1.63, "learning_rate": 9.232584300491125e-06, "loss": 0.2681, "step": 20000 }, { "epoch": 1.63, "eval_accuracy": 0.867142129393785, "eval_loss": 0.400213360786438, "eval_runtime": 34.2373, "eval_samples_per_second": 286.675, "step": 20000 }, { "epoch": 1.67, "learning_rate": 8.958762038027821e-06, "loss": 0.2608, "step": 20500 }, { "epoch": 1.67, "eval_accuracy": 0.863779928680591, "eval_loss": 0.40518832206726074, "eval_runtime": 34.2681, "eval_samples_per_second": 286.418, "step": 20500 }, { "epoch": 1.71, "learning_rate": 8.684939775564519e-06, "loss": 0.261, "step": 21000 }, { "epoch": 1.71, "eval_accuracy": 0.8655119714722364, "eval_loss": 0.3955936133861542, "eval_runtime": 34.2769, "eval_samples_per_second": 286.344, "step": 21000 }, { "epoch": 1.75, "learning_rate": 8.41056877109227e-06, "loss": 0.267, "step": 21500 }, { "epoch": 1.75, "eval_accuracy": 0.8610290371879776, "eval_loss": 0.40947309136390686, "eval_runtime": 34.2634, "eval_samples_per_second": 286.457, "step": 21500 }, { "epoch": 1.79, "learning_rate": 8.136197766620024e-06, "loss": 0.2598, "step": 22000 }, { "epoch": 1.79, "eval_accuracy": 0.8642893530310749, "eval_loss": 0.38150227069854736, "eval_runtime": 34.2625, "eval_samples_per_second": 286.465, "step": 22000 }, { "epoch": 1.83, "learning_rate": 7.861826762147777e-06, "loss": 0.2577, "step": 22500 }, { "epoch": 1.83, "eval_accuracy": 0.8641874681609781, "eval_loss": 0.401090145111084, "eval_runtime": 34.2614, "eval_samples_per_second": 286.474, "step": 22500 }, { "epoch": 1.87, "learning_rate": 7.587455757675529e-06, "loss": 0.2583, "step": 23000 }, { "epoch": 1.87, "eval_accuracy": 0.8678553234844626, "eval_loss": 0.39116695523262024, "eval_runtime": 34.2797, "eval_samples_per_second": 286.321, "step": 23000 }, { "epoch": 1.91, "learning_rate": 7.313084753203282e-06, "loss": 0.2623, "step": 23500 }, { "epoch": 1.91, "eval_accuracy": 0.8622516556291391, "eval_loss": 0.3969860076904297, "eval_runtime": 34.2798, "eval_samples_per_second": 286.32, "step": 23500 }, { "epoch": 1.96, "learning_rate": 7.038713748731034e-06, "loss": 0.2475, "step": 24000 }, { "epoch": 1.96, "eval_accuracy": 0.862761079979623, "eval_loss": 0.4156796634197235, "eval_runtime": 34.2085, "eval_samples_per_second": 286.917, "step": 24000 }, { "epoch": 2.0, "learning_rate": 6.764342744258787e-06, "loss": 0.2576, "step": 24500 }, { "epoch": 2.0, "eval_accuracy": 0.8691798267957208, "eval_loss": 0.38194480538368225, "eval_runtime": 34.1983, "eval_samples_per_second": 287.003, "step": 24500 }, { "epoch": 2.04, "learning_rate": 6.490520481795485e-06, "loss": 0.1544, "step": 25000 }, { "epoch": 2.04, "eval_accuracy": 0.8614365766683647, "eval_loss": 0.5428063273429871, "eval_runtime": 34.103, "eval_samples_per_second": 287.804, "step": 25000 }, { "epoch": 2.08, "learning_rate": 6.2161494773232365e-06, "loss": 0.1431, "step": 25500 }, { "epoch": 2.08, "eval_accuracy": 0.8678553234844626, "eval_loss": 0.509951114654541, "eval_runtime": 34.213, "eval_samples_per_second": 286.879, "step": 25500 }, { "epoch": 2.12, "learning_rate": 5.94177847285099e-06, "loss": 0.1481, "step": 26000 }, { "epoch": 2.12, "eval_accuracy": 0.8615384615384616, "eval_loss": 0.523091733455658, "eval_runtime": 34.1262, "eval_samples_per_second": 287.609, "step": 26000 }, { "epoch": 2.16, "learning_rate": 5.667407468378742e-06, "loss": 0.1367, "step": 26500 }, { "epoch": 2.16, "eval_accuracy": 0.8649006622516556, "eval_loss": 0.5161163806915283, "eval_runtime": 34.1588, "eval_samples_per_second": 287.335, "step": 26500 }, { "epoch": 2.2, "learning_rate": 5.393036463906494e-06, "loss": 0.1452, "step": 27000 }, { "epoch": 2.2, "eval_accuracy": 0.8658176260825268, "eval_loss": 0.505763053894043, "eval_runtime": 34.1846, "eval_samples_per_second": 287.118, "step": 27000 }, { "epoch": 2.24, "learning_rate": 5.119214201443192e-06, "loss": 0.1401, "step": 27500 }, { "epoch": 2.24, "eval_accuracy": 0.8645950076413652, "eval_loss": 0.5186136960983276, "eval_runtime": 34.2274, "eval_samples_per_second": 286.758, "step": 27500 }, { "epoch": 2.28, "learning_rate": 4.844843196970945e-06, "loss": 0.1418, "step": 28000 }, { "epoch": 2.28, "eval_accuracy": 0.8596026490066225, "eval_loss": 0.5330820083618164, "eval_runtime": 34.2035, "eval_samples_per_second": 286.959, "step": 28000 }, { "epoch": 2.32, "learning_rate": 4.570472192498697e-06, "loss": 0.1381, "step": 28500 }, { "epoch": 2.32, "eval_accuracy": 0.8654100866021396, "eval_loss": 0.5446536540985107, "eval_runtime": 34.125, "eval_samples_per_second": 287.619, "step": 28500 }, { "epoch": 2.36, "learning_rate": 4.296101188026449e-06, "loss": 0.1416, "step": 29000 }, { "epoch": 2.36, "eval_accuracy": 0.8622516556291391, "eval_loss": 0.540454626083374, "eval_runtime": 34.1047, "eval_samples_per_second": 287.79, "step": 29000 }, { "epoch": 2.4, "learning_rate": 4.0217301835542025e-06, "loss": 0.1444, "step": 29500 }, { "epoch": 2.4, "eval_accuracy": 0.8650025471217524, "eval_loss": 0.5085907578468323, "eval_runtime": 34.0859, "eval_samples_per_second": 287.949, "step": 29500 }, { "epoch": 2.44, "learning_rate": 3.747359179081955e-06, "loss": 0.1409, "step": 30000 }, { "epoch": 2.44, "eval_accuracy": 0.8621497707590423, "eval_loss": 0.5341202616691589, "eval_runtime": 34.1606, "eval_samples_per_second": 287.319, "step": 30000 }, { "epoch": 2.49, "learning_rate": 3.473536916618652e-06, "loss": 0.1348, "step": 30500 }, { "epoch": 2.49, "eval_accuracy": 0.8643912379011717, "eval_loss": 0.5396838784217834, "eval_runtime": 34.1392, "eval_samples_per_second": 287.499, "step": 30500 }, { "epoch": 2.53, "learning_rate": 3.199165912146405e-06, "loss": 0.1389, "step": 31000 }, { "epoch": 2.53, "eval_accuracy": 0.8635761589403973, "eval_loss": 0.548418402671814, "eval_runtime": 34.1565, "eval_samples_per_second": 287.354, "step": 31000 }, { "epoch": 2.57, "learning_rate": 2.924794907674157e-06, "loss": 0.1483, "step": 31500 }, { "epoch": 2.57, "eval_accuracy": 0.86571574121243, "eval_loss": 0.5054455399513245, "eval_runtime": 34.1767, "eval_samples_per_second": 287.184, "step": 31500 }, { "epoch": 2.61, "learning_rate": 2.65042390320191e-06, "loss": 0.1384, "step": 32000 }, { "epoch": 2.61, "eval_accuracy": 0.8688741721854305, "eval_loss": 0.5029205083847046, "eval_runtime": 34.1709, "eval_samples_per_second": 287.233, "step": 32000 }, { "epoch": 2.65, "learning_rate": 2.3760528987296626e-06, "loss": 0.1374, "step": 32500 }, { "epoch": 2.65, "eval_accuracy": 0.8649006622516556, "eval_loss": 0.5312877893447876, "eval_runtime": 34.1909, "eval_samples_per_second": 287.065, "step": 32500 }, { "epoch": 2.69, "learning_rate": 2.101681894257415e-06, "loss": 0.135, "step": 33000 }, { "epoch": 2.69, "eval_accuracy": 0.86571574121243, "eval_loss": 0.5231665372848511, "eval_runtime": 34.184, "eval_samples_per_second": 287.123, "step": 33000 }, { "epoch": 2.73, "learning_rate": 1.8273108897851676e-06, "loss": 0.1331, "step": 33500 }, { "epoch": 2.73, "eval_accuracy": 0.8667345899133979, "eval_loss": 0.5193073749542236, "eval_runtime": 34.2139, "eval_samples_per_second": 286.872, "step": 33500 }, { "epoch": 2.77, "learning_rate": 1.5529398853129202e-06, "loss": 0.1287, "step": 34000 }, { "epoch": 2.77, "eval_accuracy": 0.8639836984207845, "eval_loss": 0.5389389395713806, "eval_runtime": 34.2048, "eval_samples_per_second": 286.948, "step": 34000 }, { "epoch": 2.81, "learning_rate": 1.278568880840673e-06, "loss": 0.1399, "step": 34500 }, { "epoch": 2.81, "eval_accuracy": 0.8647987773815589, "eval_loss": 0.5319520235061646, "eval_runtime": 34.136, "eval_samples_per_second": 287.527, "step": 34500 }, { "epoch": 2.85, "learning_rate": 1.0041978763684254e-06, "loss": 0.1371, "step": 35000 }, { "epoch": 2.85, "eval_accuracy": 0.867142129393785, "eval_loss": 0.5155227184295654, "eval_runtime": 34.0818, "eval_samples_per_second": 287.984, "step": 35000 }, { "epoch": 2.89, "learning_rate": 7.298268718961781e-07, "loss": 0.1403, "step": 35500 }, { "epoch": 2.89, "eval_accuracy": 0.8650025471217524, "eval_loss": 0.5259838104248047, "eval_runtime": 34.0228, "eval_samples_per_second": 288.483, "step": 35500 }, { "epoch": 2.93, "learning_rate": 4.5655335144181967e-07, "loss": 0.1402, "step": 36000 }, { "epoch": 2.93, "eval_accuracy": 0.8661232806928171, "eval_loss": 0.5214368104934692, "eval_runtime": 34.0674, "eval_samples_per_second": 288.105, "step": 36000 }, { "epoch": 2.97, "learning_rate": 1.8218234696957227e-07, "loss": 0.1343, "step": 36500 }, { "epoch": 2.97, "eval_accuracy": 0.8656138563423331, "eval_loss": 0.522437572479248, "eval_runtime": 34.0148, "eval_samples_per_second": 288.551, "step": 36500 }, { "epoch": 3.0, "step": 36816, "total_flos": 3.032342720870999e+17, "train_runtime": 14794.1737, "train_samples_per_second": 2.489 } ], "max_steps": 36816, "num_train_epochs": 3, "total_flos": 3.032342720870999e+17, "trial_name": null, "trial_params": null }