|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.99630204866504, |
|
"eval_steps": 500, |
|
"global_step": 253500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.990295857988166e-05, |
|
"loss": 6.3355, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.98043392504931e-05, |
|
"loss": 4.6153, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.9705719921104535e-05, |
|
"loss": 4.0788, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.9607100591715975e-05, |
|
"loss": 3.7655, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.9508481262327415e-05, |
|
"loss": 3.5154, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.9409861932938855e-05, |
|
"loss": 3.3402, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.93112426035503e-05, |
|
"loss": 3.1954, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.921262327416174e-05, |
|
"loss": 3.0717, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.9114003944773176e-05, |
|
"loss": 2.9541, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.9015384615384616e-05, |
|
"loss": 2.8672, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.8916765285996056e-05, |
|
"loss": 2.7782, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.88181459566075e-05, |
|
"loss": 2.7074, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.871952662721894e-05, |
|
"loss": 2.6362, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.862090729783038e-05, |
|
"loss": 2.5867, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.852228796844182e-05, |
|
"loss": 2.5227, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.842366863905326e-05, |
|
"loss": 2.4813, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.832504930966469e-05, |
|
"loss": 2.4355, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.822642998027614e-05, |
|
"loss": 2.3717, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 4.812781065088758e-05, |
|
"loss": 2.345, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.802919132149902e-05, |
|
"loss": 2.3076, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.793057199211046e-05, |
|
"loss": 2.2789, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.78319526627219e-05, |
|
"loss": 2.2335, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.773333333333333e-05, |
|
"loss": 2.2165, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.763471400394477e-05, |
|
"loss": 2.185, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 4.753609467455621e-05, |
|
"loss": 2.1461, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.743747534516765e-05, |
|
"loss": 2.1114, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 4.7338856015779094e-05, |
|
"loss": 2.0961, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.7240236686390534e-05, |
|
"loss": 2.0726, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.714181459566075e-05, |
|
"loss": 2.0446, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 4.70431952662722e-05, |
|
"loss": 2.0268, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 4.6944773175542406e-05, |
|
"loss": 2.0076, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 4.684615384615385e-05, |
|
"loss": 1.9788, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 4.674753451676529e-05, |
|
"loss": 1.9637, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 4.6648915187376726e-05, |
|
"loss": 1.9425, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.655029585798817e-05, |
|
"loss": 1.92, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 4.645167652859961e-05, |
|
"loss": 1.9069, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.635305719921105e-05, |
|
"loss": 1.8903, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 4.625443786982249e-05, |
|
"loss": 1.8661, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 4.61560157790927e-05, |
|
"loss": 1.8637, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 4.6057396449704143e-05, |
|
"loss": 1.8444, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 4.5958777120315584e-05, |
|
"loss": 1.8321, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 4.5860157790927024e-05, |
|
"loss": 1.8032, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 4.5761538461538464e-05, |
|
"loss": 1.7901, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 4.5662919132149904e-05, |
|
"loss": 1.7781, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 4.556449704142012e-05, |
|
"loss": 1.7769, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 4.546587771203156e-05, |
|
"loss": 1.7731, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 4.5367258382643e-05, |
|
"loss": 1.7503, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 4.526863905325444e-05, |
|
"loss": 1.7306, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 4.517021696252466e-05, |
|
"loss": 1.729, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 4.50715976331361e-05, |
|
"loss": 1.7238, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 4.497297830374754e-05, |
|
"loss": 1.7157, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 4.487435897435898e-05, |
|
"loss": 1.6753, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 4.477573964497042e-05, |
|
"loss": 1.6714, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 4.4677317554240634e-05, |
|
"loss": 1.6588, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 4.4578698224852074e-05, |
|
"loss": 1.6559, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 4.4480078895463514e-05, |
|
"loss": 1.6571, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 4.4381459566074954e-05, |
|
"loss": 1.6449, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 4.428284023668639e-05, |
|
"loss": 1.6439, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 4.4184220907297835e-05, |
|
"loss": 1.6303, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 4.4085798816568044e-05, |
|
"loss": 1.6159, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 4.398717948717949e-05, |
|
"loss": 1.6218, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 4.3888757396449707e-05, |
|
"loss": 1.5967, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 4.379013806706115e-05, |
|
"loss": 1.5843, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 4.369151873767259e-05, |
|
"loss": 1.581, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 4.359289940828403e-05, |
|
"loss": 1.5737, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 4.349428007889547e-05, |
|
"loss": 1.5591, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 4.339566074950691e-05, |
|
"loss": 1.5654, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 4.3297238658777124e-05, |
|
"loss": 1.5536, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 4.3198619329388564e-05, |
|
"loss": 1.5458, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 4.3100000000000004e-05, |
|
"loss": 1.5353, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.300138067061144e-05, |
|
"loss": 1.5321, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 4.290276134122288e-05, |
|
"loss": 1.5217, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 4.280414201183432e-05, |
|
"loss": 1.5073, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 4.270552268244576e-05, |
|
"loss": 1.5073, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 4.2606903353057205e-05, |
|
"loss": 1.4939, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 4.2508284023668646e-05, |
|
"loss": 1.4914, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 4.2409664694280086e-05, |
|
"loss": 1.489, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 4.23112426035503e-05, |
|
"loss": 1.4797, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 4.221262327416174e-05, |
|
"loss": 1.4836, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 4.2114003944773175e-05, |
|
"loss": 1.4689, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 4.20155818540434e-05, |
|
"loss": 1.4639, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 4.191696252465484e-05, |
|
"loss": 1.4526, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 4.181834319526627e-05, |
|
"loss": 1.4486, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 4.171972386587771e-05, |
|
"loss": 1.4381, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 4.162110453648915e-05, |
|
"loss": 1.4321, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 4.152248520710059e-05, |
|
"loss": 1.4366, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 4.142386587771203e-05, |
|
"loss": 1.4324, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 4.132524654832347e-05, |
|
"loss": 1.4236, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 4.122662721893491e-05, |
|
"loss": 1.4231, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 4.112800788954635e-05, |
|
"loss": 1.416, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 4.102958579881657e-05, |
|
"loss": 1.4205, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 4.093096646942801e-05, |
|
"loss": 1.3942, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 9.17, |
|
"learning_rate": 4.083234714003945e-05, |
|
"loss": 1.402, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 4.073372781065089e-05, |
|
"loss": 1.3927, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 4.063510848126233e-05, |
|
"loss": 1.3817, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 4.053648915187377e-05, |
|
"loss": 1.3762, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 4.043786982248521e-05, |
|
"loss": 1.3781, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 4.0339447731755426e-05, |
|
"loss": 1.3754, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 4.0240828402366867e-05, |
|
"loss": 1.3764, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 4.014240631163708e-05, |
|
"loss": 1.3621, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 4.004378698224852e-05, |
|
"loss": 1.3692, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 10.06, |
|
"learning_rate": 3.994516765285996e-05, |
|
"loss": 1.3485, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 3.98465483234714e-05, |
|
"loss": 1.3479, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 3.974792899408284e-05, |
|
"loss": 1.343, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 3.9649309664694284e-05, |
|
"loss": 1.3521, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"learning_rate": 3.9550690335305724e-05, |
|
"loss": 1.336, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 3.9452071005917164e-05, |
|
"loss": 1.3361, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 3.9353451676528604e-05, |
|
"loss": 1.3395, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 3.9254832347140045e-05, |
|
"loss": 1.3308, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 3.915621301775148e-05, |
|
"loss": 1.3235, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 3.90577909270217e-05, |
|
"loss": 1.3222, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 3.8959171597633134e-05, |
|
"loss": 1.322, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 3.8860552268244574e-05, |
|
"loss": 1.3015, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 3.8761932938856015e-05, |
|
"loss": 1.3081, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"learning_rate": 3.8663313609467455e-05, |
|
"loss": 1.3034, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 3.8564694280078895e-05, |
|
"loss": 1.2988, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 3.846607495069034e-05, |
|
"loss": 1.287, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 3.836765285996056e-05, |
|
"loss": 1.2945, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 3.8269033530572e-05, |
|
"loss": 1.2901, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"learning_rate": 3.817041420118344e-05, |
|
"loss": 1.2921, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 3.8071992110453654e-05, |
|
"loss": 1.2868, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 3.7973372781065094e-05, |
|
"loss": 1.2843, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 3.787475345167653e-05, |
|
"loss": 1.2763, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 3.777613412228797e-05, |
|
"loss": 1.2681, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 3.767751479289941e-05, |
|
"loss": 1.2677, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"learning_rate": 3.757889546351085e-05, |
|
"loss": 1.2675, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 3.7480473372781064e-05, |
|
"loss": 1.2605, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 3.7381854043392505e-05, |
|
"loss": 1.262, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 3.7283234714003945e-05, |
|
"loss": 1.2483, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 3.7184615384615385e-05, |
|
"loss": 1.2615, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 3.7085996055226825e-05, |
|
"loss": 1.2499, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 13.02, |
|
"learning_rate": 3.6987376725838266e-05, |
|
"loss": 1.2475, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 13.12, |
|
"learning_rate": 3.6888757396449706e-05, |
|
"loss": 1.2385, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 3.6790138067061146e-05, |
|
"loss": 1.2381, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 3.6691518737672586e-05, |
|
"loss": 1.2394, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 3.65930966469428e-05, |
|
"loss": 1.2345, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 3.649447731755424e-05, |
|
"loss": 1.2348, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 3.639585798816568e-05, |
|
"loss": 1.2237, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 3.62974358974359e-05, |
|
"loss": 1.2346, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 3.619881656804734e-05, |
|
"loss": 1.2205, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 3.610019723865878e-05, |
|
"loss": 1.2187, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 3.600157790927022e-05, |
|
"loss": 1.222, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 3.590295857988166e-05, |
|
"loss": 1.2185, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 3.58043392504931e-05, |
|
"loss": 1.2069, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 14.3, |
|
"learning_rate": 3.570571992110453e-05, |
|
"loss": 1.2022, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 3.5607297830374756e-05, |
|
"loss": 1.2077, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"learning_rate": 3.5508678500986196e-05, |
|
"loss": 1.2086, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 3.5410059171597636e-05, |
|
"loss": 1.2018, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 14.69, |
|
"learning_rate": 3.5311439842209076e-05, |
|
"loss": 1.1921, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 14.79, |
|
"learning_rate": 3.521282051282052e-05, |
|
"loss": 1.1921, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 3.511420118343196e-05, |
|
"loss": 1.195, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 3.50155818540434e-05, |
|
"loss": 1.1917, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 3.491715976331361e-05, |
|
"loss": 1.1868, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 3.481854043392505e-05, |
|
"loss": 1.188, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 15.28, |
|
"learning_rate": 3.4719921104536493e-05, |
|
"loss": 1.1823, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 3.462130177514793e-05, |
|
"loss": 1.1842, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 3.452268244575937e-05, |
|
"loss": 1.1753, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 3.442406311637081e-05, |
|
"loss": 1.1706, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 15.68, |
|
"learning_rate": 3.432544378698225e-05, |
|
"loss": 1.176, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 15.78, |
|
"learning_rate": 3.4226824457593695e-05, |
|
"loss": 1.1707, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"learning_rate": 3.4128205128205135e-05, |
|
"loss": 1.1627, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 15.98, |
|
"learning_rate": 3.402978303747535e-05, |
|
"loss": 1.166, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 16.07, |
|
"learning_rate": 3.393116370808679e-05, |
|
"loss": 1.1525, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 3.3832544378698224e-05, |
|
"loss": 1.1591, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"learning_rate": 3.3733925049309665e-05, |
|
"loss": 1.159, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 16.37, |
|
"learning_rate": 3.3635305719921105e-05, |
|
"loss": 1.1587, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 16.47, |
|
"learning_rate": 3.353688362919132e-05, |
|
"loss": 1.156, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 3.343826429980276e-05, |
|
"loss": 1.1462, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 3.33396449704142e-05, |
|
"loss": 1.1573, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 16.76, |
|
"learning_rate": 3.324102564102564e-05, |
|
"loss": 1.1403, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"learning_rate": 3.314260355029586e-05, |
|
"loss": 1.1419, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 3.30439842209073e-05, |
|
"loss": 1.1434, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 17.06, |
|
"learning_rate": 3.294536489151874e-05, |
|
"loss": 1.1452, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"learning_rate": 3.2846942800788954e-05, |
|
"loss": 1.128, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 17.26, |
|
"learning_rate": 3.2748323471400394e-05, |
|
"loss": 1.1396, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 17.36, |
|
"learning_rate": 3.2649704142011834e-05, |
|
"loss": 1.13, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 17.45, |
|
"learning_rate": 3.255108481262328e-05, |
|
"loss": 1.1303, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 3.2452465483234715e-05, |
|
"loss": 1.1292, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 17.65, |
|
"learning_rate": 3.2353846153846155e-05, |
|
"loss": 1.1313, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 17.75, |
|
"learning_rate": 3.2255226824457595e-05, |
|
"loss": 1.1294, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"learning_rate": 3.2156607495069035e-05, |
|
"loss": 1.1237, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"learning_rate": 3.2057988165680475e-05, |
|
"loss": 1.1252, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 18.05, |
|
"learning_rate": 3.1959368836291916e-05, |
|
"loss": 1.1231, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 18.14, |
|
"learning_rate": 3.1860749506903356e-05, |
|
"loss": 1.1169, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 3.176232741617357e-05, |
|
"loss": 1.1104, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 18.34, |
|
"learning_rate": 3.166370808678501e-05, |
|
"loss": 1.1108, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 18.44, |
|
"learning_rate": 3.156508875739645e-05, |
|
"loss": 1.1034, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 18.54, |
|
"learning_rate": 3.1466469428007886e-05, |
|
"loss": 1.1067, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 3.136785009861933e-05, |
|
"loss": 1.1096, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 3.126923076923077e-05, |
|
"loss": 1.103, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 18.83, |
|
"learning_rate": 3.117080867850099e-05, |
|
"loss": 1.1128, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"learning_rate": 3.107218934911243e-05, |
|
"loss": 1.0998, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"learning_rate": 3.097357001972387e-05, |
|
"loss": 1.0949, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"learning_rate": 3.087495069033531e-05, |
|
"loss": 1.0935, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 3.077633136094675e-05, |
|
"loss": 1.1058, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 19.33, |
|
"learning_rate": 3.067771203155819e-05, |
|
"loss": 1.0914, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 19.43, |
|
"learning_rate": 3.0579092702169623e-05, |
|
"loss": 1.0907, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 19.53, |
|
"learning_rate": 3.0480473372781067e-05, |
|
"loss": 1.0849, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 19.62, |
|
"learning_rate": 3.0382248520710062e-05, |
|
"loss": 1.0866, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 19.72, |
|
"learning_rate": 3.02836291913215e-05, |
|
"loss": 1.0957, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 19.82, |
|
"learning_rate": 3.018500986193294e-05, |
|
"loss": 1.0817, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 19.92, |
|
"learning_rate": 3.008639053254438e-05, |
|
"loss": 1.0924, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"learning_rate": 2.998777120315582e-05, |
|
"loss": 1.0742, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 20.12, |
|
"learning_rate": 2.9889151873767256e-05, |
|
"loss": 1.0749, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 20.22, |
|
"learning_rate": 2.9790729783037475e-05, |
|
"loss": 1.0764, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 20.31, |
|
"learning_rate": 2.969211045364892e-05, |
|
"loss": 1.0746, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 20.41, |
|
"learning_rate": 2.959349112426036e-05, |
|
"loss": 1.073, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 20.51, |
|
"learning_rate": 2.9494871794871796e-05, |
|
"loss": 1.0738, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 20.61, |
|
"learning_rate": 2.9396252465483236e-05, |
|
"loss": 1.0642, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 20.71, |
|
"learning_rate": 2.9297633136094677e-05, |
|
"loss": 1.0707, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 20.81, |
|
"learning_rate": 2.9199013806706117e-05, |
|
"loss": 1.0765, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 20.91, |
|
"learning_rate": 2.9100394477317554e-05, |
|
"loss": 1.0729, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 2.9001775147928994e-05, |
|
"loss": 1.0632, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 21.1, |
|
"learning_rate": 2.890355029585799e-05, |
|
"loss": 1.0581, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"learning_rate": 2.880493096646943e-05, |
|
"loss": 1.058, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 21.3, |
|
"learning_rate": 2.870631163708087e-05, |
|
"loss": 1.0627, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 21.4, |
|
"learning_rate": 2.8607692307692306e-05, |
|
"loss": 1.0581, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 21.5, |
|
"learning_rate": 2.8509270216962525e-05, |
|
"loss": 1.0568, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 21.6, |
|
"learning_rate": 2.8410650887573966e-05, |
|
"loss": 1.05, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 21.69, |
|
"learning_rate": 2.8312031558185402e-05, |
|
"loss": 1.0602, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 21.79, |
|
"learning_rate": 2.8213412228796843e-05, |
|
"loss": 1.0533, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 21.89, |
|
"learning_rate": 2.8114792899408286e-05, |
|
"loss": 1.0573, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"learning_rate": 2.8016370808678506e-05, |
|
"loss": 1.0523, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 22.09, |
|
"learning_rate": 2.7917751479289946e-05, |
|
"loss": 1.0414, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 22.19, |
|
"learning_rate": 2.7819132149901383e-05, |
|
"loss": 1.0416, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 22.29, |
|
"learning_rate": 2.7720512820512823e-05, |
|
"loss": 1.042, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 22.38, |
|
"learning_rate": 2.7621893491124263e-05, |
|
"loss": 1.0446, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 22.48, |
|
"learning_rate": 2.752347140039448e-05, |
|
"loss": 1.0371, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 22.58, |
|
"learning_rate": 2.742485207100592e-05, |
|
"loss": 1.0334, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 22.68, |
|
"learning_rate": 2.732623274161736e-05, |
|
"loss": 1.0409, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 22.78, |
|
"learning_rate": 2.7227810650887575e-05, |
|
"loss": 1.043, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 22.88, |
|
"learning_rate": 2.7129191321499015e-05, |
|
"loss": 1.0459, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 22.98, |
|
"learning_rate": 2.7030571992110452e-05, |
|
"loss": 1.0381, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 2.6931952662721893e-05, |
|
"loss": 1.0289, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 23.17, |
|
"learning_rate": 2.6833333333333333e-05, |
|
"loss": 1.0328, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 23.27, |
|
"learning_rate": 2.6734714003944773e-05, |
|
"loss": 1.0265, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 23.37, |
|
"learning_rate": 2.6636094674556217e-05, |
|
"loss": 1.0294, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 23.47, |
|
"learning_rate": 2.6537475345167657e-05, |
|
"loss": 1.0243, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 23.57, |
|
"learning_rate": 2.6438856015779094e-05, |
|
"loss": 1.0317, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 23.67, |
|
"learning_rate": 2.6340236686390534e-05, |
|
"loss": 1.0321, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 23.77, |
|
"learning_rate": 2.6241617357001974e-05, |
|
"loss": 1.0258, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 23.86, |
|
"learning_rate": 2.614319526627219e-05, |
|
"loss": 1.0245, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 23.96, |
|
"learning_rate": 2.604457593688363e-05, |
|
"loss": 1.0255, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 24.06, |
|
"learning_rate": 2.594615384615385e-05, |
|
"loss": 1.0241, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 24.16, |
|
"learning_rate": 2.5847534516765286e-05, |
|
"loss": 1.0119, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 24.26, |
|
"learning_rate": 2.5748915187376727e-05, |
|
"loss": 1.0134, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 24.36, |
|
"learning_rate": 2.5650295857988167e-05, |
|
"loss": 1.0086, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 24.46, |
|
"learning_rate": 2.5551676528599604e-05, |
|
"loss": 1.0143, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 24.55, |
|
"learning_rate": 2.5453057199211044e-05, |
|
"loss": 1.018, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 24.65, |
|
"learning_rate": 2.5354437869822484e-05, |
|
"loss": 1.0141, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 24.75, |
|
"learning_rate": 2.5255818540433928e-05, |
|
"loss": 1.0027, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 24.85, |
|
"learning_rate": 2.515759368836292e-05, |
|
"loss": 1.0171, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 24.95, |
|
"learning_rate": 2.5058974358974356e-05, |
|
"loss": 1.0162, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 25.05, |
|
"learning_rate": 2.49603550295858e-05, |
|
"loss": 1.0012, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 25.15, |
|
"learning_rate": 2.486173570019724e-05, |
|
"loss": 0.9968, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 25.24, |
|
"learning_rate": 2.4763116370808677e-05, |
|
"loss": 1.0019, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 25.34, |
|
"learning_rate": 2.466449704142012e-05, |
|
"loss": 0.9994, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 25.44, |
|
"learning_rate": 2.456587771203156e-05, |
|
"loss": 1.0005, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 25.54, |
|
"learning_rate": 2.4467258382642997e-05, |
|
"loss": 1.0004, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 2.4368639053254438e-05, |
|
"loss": 1.0023, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 25.74, |
|
"learning_rate": 2.4270019723865878e-05, |
|
"loss": 0.9932, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 25.84, |
|
"learning_rate": 2.4171400394477318e-05, |
|
"loss": 0.9891, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 25.93, |
|
"learning_rate": 2.4072978303747537e-05, |
|
"loss": 1.0018, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 26.03, |
|
"learning_rate": 2.3974358974358978e-05, |
|
"loss": 0.9974, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 26.13, |
|
"learning_rate": 2.3875739644970414e-05, |
|
"loss": 0.9823, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 26.23, |
|
"learning_rate": 2.3777317554240634e-05, |
|
"loss": 0.989, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 26.33, |
|
"learning_rate": 2.367889546351085e-05, |
|
"loss": 0.9932, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 26.43, |
|
"learning_rate": 2.358027613412229e-05, |
|
"loss": 0.9848, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 26.53, |
|
"learning_rate": 2.348165680473373e-05, |
|
"loss": 0.9874, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 26.63, |
|
"learning_rate": 2.3383037475345167e-05, |
|
"loss": 0.9876, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 26.72, |
|
"learning_rate": 2.3284418145956607e-05, |
|
"loss": 0.9832, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 26.82, |
|
"learning_rate": 2.318579881656805e-05, |
|
"loss": 0.9902, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 26.92, |
|
"learning_rate": 2.3087179487179488e-05, |
|
"loss": 0.9808, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 27.02, |
|
"learning_rate": 2.2988560157790928e-05, |
|
"loss": 0.9789, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"learning_rate": 2.2890138067061147e-05, |
|
"loss": 0.9742, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 27.22, |
|
"learning_rate": 2.2791518737672584e-05, |
|
"loss": 0.9733, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 27.32, |
|
"learning_rate": 2.2692899408284024e-05, |
|
"loss": 0.9864, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 27.41, |
|
"learning_rate": 2.2594280078895464e-05, |
|
"loss": 0.9769, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 27.51, |
|
"learning_rate": 2.2495660749506905e-05, |
|
"loss": 0.9751, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 27.61, |
|
"learning_rate": 2.2397041420118345e-05, |
|
"loss": 0.9776, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 27.71, |
|
"learning_rate": 2.2298422090729785e-05, |
|
"loss": 0.9781, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 27.81, |
|
"learning_rate": 2.2199802761341222e-05, |
|
"loss": 0.9687, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 27.91, |
|
"learning_rate": 2.210138067061144e-05, |
|
"loss": 0.9738, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 28.01, |
|
"learning_rate": 2.200276134122288e-05, |
|
"loss": 0.9686, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 28.1, |
|
"learning_rate": 2.1904339250493097e-05, |
|
"loss": 0.9642, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 28.2, |
|
"learning_rate": 2.1805719921104537e-05, |
|
"loss": 0.9571, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"learning_rate": 2.1707100591715978e-05, |
|
"loss": 0.9686, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 28.4, |
|
"learning_rate": 2.1608481262327418e-05, |
|
"loss": 0.9633, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 28.5, |
|
"learning_rate": 2.1509861932938858e-05, |
|
"loss": 0.9627, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 28.6, |
|
"learning_rate": 2.1411242603550295e-05, |
|
"loss": 0.9615, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 28.7, |
|
"learning_rate": 2.1312623274161735e-05, |
|
"loss": 0.9633, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 28.79, |
|
"learning_rate": 2.1214003944773175e-05, |
|
"loss": 0.9609, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 28.89, |
|
"learning_rate": 2.111538461538462e-05, |
|
"loss": 0.962, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"learning_rate": 2.1016765285996056e-05, |
|
"loss": 0.9561, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 29.09, |
|
"learning_rate": 2.0918145956607496e-05, |
|
"loss": 0.9611, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 29.19, |
|
"learning_rate": 2.0819526627218936e-05, |
|
"loss": 0.9502, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 29.29, |
|
"learning_rate": 2.0720907297830373e-05, |
|
"loss": 0.9651, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 29.39, |
|
"learning_rate": 2.062268244575937e-05, |
|
"loss": 0.9502, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 29.48, |
|
"learning_rate": 2.052406311637081e-05, |
|
"loss": 0.9558, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 29.58, |
|
"learning_rate": 2.042544378698225e-05, |
|
"loss": 0.9614, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"learning_rate": 2.0326824457593692e-05, |
|
"loss": 0.9507, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 29.78, |
|
"learning_rate": 2.0228402366863905e-05, |
|
"loss": 0.9465, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 29.88, |
|
"learning_rate": 2.0129783037475348e-05, |
|
"loss": 0.9483, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 29.98, |
|
"learning_rate": 2.0031163708086785e-05, |
|
"loss": 0.9488, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 30.08, |
|
"learning_rate": 1.9932544378698225e-05, |
|
"loss": 0.9501, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 30.18, |
|
"learning_rate": 1.9833925049309666e-05, |
|
"loss": 0.9477, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 30.27, |
|
"learning_rate": 1.9735305719921106e-05, |
|
"loss": 0.9401, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 30.37, |
|
"learning_rate": 1.9636686390532546e-05, |
|
"loss": 0.9528, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 30.47, |
|
"learning_rate": 1.9538067061143986e-05, |
|
"loss": 0.9412, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 30.57, |
|
"learning_rate": 1.9439644970414202e-05, |
|
"loss": 0.9421, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 30.67, |
|
"learning_rate": 1.934122287968442e-05, |
|
"loss": 0.9455, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"learning_rate": 1.9242603550295858e-05, |
|
"loss": 0.9401, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 30.87, |
|
"learning_rate": 1.91439842209073e-05, |
|
"loss": 0.9396, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 30.96, |
|
"learning_rate": 1.904536489151874e-05, |
|
"loss": 0.937, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 31.06, |
|
"learning_rate": 1.894674556213018e-05, |
|
"loss": 0.9382, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 31.16, |
|
"learning_rate": 1.8848126232741616e-05, |
|
"loss": 0.9368, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 31.26, |
|
"learning_rate": 1.874950690335306e-05, |
|
"loss": 0.9322, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 31.36, |
|
"learning_rate": 1.86508875739645e-05, |
|
"loss": 0.938, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 31.46, |
|
"learning_rate": 1.8552268244575936e-05, |
|
"loss": 0.9279, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 31.56, |
|
"learning_rate": 1.8453648915187377e-05, |
|
"loss": 0.9364, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 31.65, |
|
"learning_rate": 1.8355226824457593e-05, |
|
"loss": 0.9404, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 31.75, |
|
"learning_rate": 1.8256607495069033e-05, |
|
"loss": 0.9264, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 31.85, |
|
"learning_rate": 1.8158185404339252e-05, |
|
"loss": 0.9406, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 31.95, |
|
"learning_rate": 1.805956607495069e-05, |
|
"loss": 0.9316, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 32.05, |
|
"learning_rate": 1.7961143984220908e-05, |
|
"loss": 0.9257, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 32.15, |
|
"learning_rate": 1.7862524654832348e-05, |
|
"loss": 0.9292, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 32.25, |
|
"learning_rate": 1.776390532544379e-05, |
|
"loss": 0.9198, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 32.34, |
|
"learning_rate": 1.766528599605523e-05, |
|
"loss": 0.9245, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 32.44, |
|
"learning_rate": 1.756666666666667e-05, |
|
"loss": 0.9244, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 32.54, |
|
"learning_rate": 1.7468047337278106e-05, |
|
"loss": 0.9294, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 32.64, |
|
"learning_rate": 1.7369428007889546e-05, |
|
"loss": 0.9284, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 32.74, |
|
"learning_rate": 1.727080867850099e-05, |
|
"loss": 0.9224, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 32.84, |
|
"learning_rate": 1.7172189349112427e-05, |
|
"loss": 0.9269, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 32.94, |
|
"learning_rate": 1.7073570019723867e-05, |
|
"loss": 0.9204, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 33.04, |
|
"learning_rate": 1.6974950690335307e-05, |
|
"loss": 0.9203, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 33.13, |
|
"learning_rate": 1.6876331360946744e-05, |
|
"loss": 0.9213, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 33.23, |
|
"learning_rate": 1.6777712031558184e-05, |
|
"loss": 0.9177, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 1.6679289940828403e-05, |
|
"loss": 0.9117, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 33.43, |
|
"learning_rate": 1.6580670611439844e-05, |
|
"loss": 0.9109, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 33.53, |
|
"learning_rate": 1.6482051282051284e-05, |
|
"loss": 0.919, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 33.63, |
|
"learning_rate": 1.6383431952662724e-05, |
|
"loss": 0.917, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 33.73, |
|
"learning_rate": 1.628500986193294e-05, |
|
"loss": 0.9071, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 33.82, |
|
"learning_rate": 1.618639053254438e-05, |
|
"loss": 0.9128, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 33.92, |
|
"learning_rate": 1.6087771203155817e-05, |
|
"loss": 0.9237, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 34.02, |
|
"learning_rate": 1.5989151873767257e-05, |
|
"loss": 0.9179, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 34.12, |
|
"learning_rate": 1.58905325443787e-05, |
|
"loss": 0.9095, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 34.22, |
|
"learning_rate": 1.579191321499014e-05, |
|
"loss": 0.9104, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 34.32, |
|
"learning_rate": 1.5693491124260357e-05, |
|
"loss": 0.9134, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 34.42, |
|
"learning_rate": 1.5594871794871797e-05, |
|
"loss": 0.9078, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 34.51, |
|
"learning_rate": 1.5496449704142013e-05, |
|
"loss": 0.9037, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 34.61, |
|
"learning_rate": 1.5397830374753453e-05, |
|
"loss": 0.9107, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 34.71, |
|
"learning_rate": 1.5299211045364893e-05, |
|
"loss": 0.9082, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 34.81, |
|
"learning_rate": 1.520059171597633e-05, |
|
"loss": 0.914, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"learning_rate": 1.5101972386587774e-05, |
|
"loss": 0.9087, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 35.01, |
|
"learning_rate": 1.5003353057199212e-05, |
|
"loss": 0.9059, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 35.11, |
|
"learning_rate": 1.490493096646943e-05, |
|
"loss": 0.8985, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"learning_rate": 1.4806311637080869e-05, |
|
"loss": 0.9007, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 35.3, |
|
"learning_rate": 1.4707692307692309e-05, |
|
"loss": 0.9071, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 35.4, |
|
"learning_rate": 1.4609072978303747e-05, |
|
"loss": 0.9013, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 35.5, |
|
"learning_rate": 1.4510453648915188e-05, |
|
"loss": 0.9006, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 35.6, |
|
"learning_rate": 1.4412031558185405e-05, |
|
"loss": 0.9026, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 35.7, |
|
"learning_rate": 1.4313412228796844e-05, |
|
"loss": 0.9051, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 35.8, |
|
"learning_rate": 1.4214792899408286e-05, |
|
"loss": 0.9061, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 35.89, |
|
"learning_rate": 1.4116173570019726e-05, |
|
"loss": 0.9025, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 35.99, |
|
"learning_rate": 1.4017554240631164e-05, |
|
"loss": 0.8981, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 36.09, |
|
"learning_rate": 1.3918934911242603e-05, |
|
"loss": 0.8951, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 36.19, |
|
"learning_rate": 1.3820315581854043e-05, |
|
"loss": 0.8995, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 36.29, |
|
"learning_rate": 1.3721696252465485e-05, |
|
"loss": 0.8995, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 36.39, |
|
"learning_rate": 1.36232741617357e-05, |
|
"loss": 0.8954, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 36.49, |
|
"learning_rate": 1.3524654832347141e-05, |
|
"loss": 0.8927, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 36.59, |
|
"learning_rate": 1.3426035502958581e-05, |
|
"loss": 0.8843, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 36.68, |
|
"learning_rate": 1.332741617357002e-05, |
|
"loss": 0.891, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 36.78, |
|
"learning_rate": 1.322879684418146e-05, |
|
"loss": 0.9012, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 36.88, |
|
"learning_rate": 1.3130177514792899e-05, |
|
"loss": 0.8937, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 36.98, |
|
"learning_rate": 1.303155818540434e-05, |
|
"loss": 0.8982, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 37.08, |
|
"learning_rate": 1.2932938856015781e-05, |
|
"loss": 0.8958, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 37.18, |
|
"learning_rate": 1.283431952662722e-05, |
|
"loss": 0.893, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 37.28, |
|
"learning_rate": 1.273570019723866e-05, |
|
"loss": 0.8875, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 37.37, |
|
"learning_rate": 1.2637278106508877e-05, |
|
"loss": 0.8863, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 37.47, |
|
"learning_rate": 1.2538658777120316e-05, |
|
"loss": 0.881, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 37.57, |
|
"learning_rate": 1.2440039447731756e-05, |
|
"loss": 0.8837, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 37.67, |
|
"learning_rate": 1.2341420118343196e-05, |
|
"loss": 0.8899, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 37.77, |
|
"learning_rate": 1.2242800788954635e-05, |
|
"loss": 0.8834, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 37.87, |
|
"learning_rate": 1.2144181459566075e-05, |
|
"loss": 0.889, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 37.97, |
|
"learning_rate": 1.2045562130177515e-05, |
|
"loss": 0.8911, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 38.06, |
|
"learning_rate": 1.1947140039447733e-05, |
|
"loss": 0.887, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 38.16, |
|
"learning_rate": 1.1848520710059171e-05, |
|
"loss": 0.8768, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 38.26, |
|
"learning_rate": 1.1749901380670612e-05, |
|
"loss": 0.8869, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 38.36, |
|
"learning_rate": 1.1651282051282052e-05, |
|
"loss": 0.8802, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 1.155266272189349e-05, |
|
"loss": 0.8752, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 38.56, |
|
"learning_rate": 1.1454240631163708e-05, |
|
"loss": 0.8792, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 38.66, |
|
"learning_rate": 1.135562130177515e-05, |
|
"loss": 0.8815, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 38.75, |
|
"learning_rate": 1.1257199211045366e-05, |
|
"loss": 0.8719, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 38.85, |
|
"learning_rate": 1.1158579881656806e-05, |
|
"loss": 0.8821, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 38.95, |
|
"learning_rate": 1.1059960552268244e-05, |
|
"loss": 0.88, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 39.05, |
|
"learning_rate": 1.0961341222879686e-05, |
|
"loss": 0.878, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 39.15, |
|
"learning_rate": 1.0862919132149902e-05, |
|
"loss": 0.8813, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 39.25, |
|
"learning_rate": 1.0764299802761342e-05, |
|
"loss": 0.8756, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 39.35, |
|
"learning_rate": 1.0665680473372781e-05, |
|
"loss": 0.8748, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 39.44, |
|
"learning_rate": 1.0567061143984223e-05, |
|
"loss": 0.8762, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 39.54, |
|
"learning_rate": 1.0468441814595661e-05, |
|
"loss": 0.876, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 39.64, |
|
"learning_rate": 1.03698224852071e-05, |
|
"loss": 0.8688, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 39.74, |
|
"learning_rate": 1.0271203155818542e-05, |
|
"loss": 0.8772, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 39.84, |
|
"learning_rate": 1.017258382642998e-05, |
|
"loss": 0.8721, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 39.94, |
|
"learning_rate": 1.007396449704142e-05, |
|
"loss": 0.8711, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 40.04, |
|
"learning_rate": 9.975542406311638e-06, |
|
"loss": 0.8737, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 40.14, |
|
"learning_rate": 9.876923076923078e-06, |
|
"loss": 0.8682, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 40.23, |
|
"learning_rate": 9.778303747534517e-06, |
|
"loss": 0.8691, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 40.33, |
|
"learning_rate": 9.679684418145957e-06, |
|
"loss": 0.8727, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 40.43, |
|
"learning_rate": 9.581065088757397e-06, |
|
"loss": 0.8725, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 40.53, |
|
"learning_rate": 9.482642998027613e-06, |
|
"loss": 0.8707, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 40.63, |
|
"learning_rate": 9.384023668639053e-06, |
|
"loss": 0.8761, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 40.73, |
|
"learning_rate": 9.285404339250494e-06, |
|
"loss": 0.8674, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 40.83, |
|
"learning_rate": 9.186785009861934e-06, |
|
"loss": 0.8629, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 40.92, |
|
"learning_rate": 9.088165680473374e-06, |
|
"loss": 0.8733, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 41.02, |
|
"learning_rate": 8.989546351084813e-06, |
|
"loss": 0.8634, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 41.12, |
|
"learning_rate": 8.89112426035503e-06, |
|
"loss": 0.8653, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 41.22, |
|
"learning_rate": 8.792504930966469e-06, |
|
"loss": 0.8645, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 41.32, |
|
"learning_rate": 8.69388560157791e-06, |
|
"loss": 0.8623, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 41.42, |
|
"learning_rate": 8.59526627218935e-06, |
|
"loss": 0.8655, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 41.52, |
|
"learning_rate": 8.496844181459567e-06, |
|
"loss": 0.8619, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 41.61, |
|
"learning_rate": 8.398224852071005e-06, |
|
"loss": 0.8728, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 41.71, |
|
"learning_rate": 8.299605522682447e-06, |
|
"loss": 0.8671, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 41.81, |
|
"learning_rate": 8.200986193293886e-06, |
|
"loss": 0.8606, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 41.91, |
|
"learning_rate": 8.102366863905324e-06, |
|
"loss": 0.8578, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 42.01, |
|
"learning_rate": 8.003747534516766e-06, |
|
"loss": 0.8644, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 42.11, |
|
"learning_rate": 7.905128205128205e-06, |
|
"loss": 0.8576, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 42.21, |
|
"learning_rate": 7.806508875739647e-06, |
|
"loss": 0.855, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 42.3, |
|
"learning_rate": 7.707889546351085e-06, |
|
"loss": 0.85, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 42.4, |
|
"learning_rate": 7.609270216962525e-06, |
|
"loss": 0.8654, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"learning_rate": 7.510650887573965e-06, |
|
"loss": 0.8571, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 42.6, |
|
"learning_rate": 7.412031558185404e-06, |
|
"loss": 0.8673, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 42.7, |
|
"learning_rate": 7.313609467455622e-06, |
|
"loss": 0.8577, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 42.8, |
|
"learning_rate": 7.214990138067061e-06, |
|
"loss": 0.8535, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 42.9, |
|
"learning_rate": 7.116370808678502e-06, |
|
"loss": 0.8613, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 42.99, |
|
"learning_rate": 7.017751479289941e-06, |
|
"loss": 0.8586, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 43.09, |
|
"learning_rate": 6.919329388560158e-06, |
|
"loss": 0.8546, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 43.19, |
|
"learning_rate": 6.820710059171598e-06, |
|
"loss": 0.8521, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 43.29, |
|
"learning_rate": 6.722090729783037e-06, |
|
"loss": 0.8598, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 43.39, |
|
"learning_rate": 6.623668639053255e-06, |
|
"loss": 0.8527, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 43.49, |
|
"learning_rate": 6.525049309664695e-06, |
|
"loss": 0.8512, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 43.59, |
|
"learning_rate": 6.426429980276134e-06, |
|
"loss": 0.8522, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 43.69, |
|
"learning_rate": 6.327810650887574e-06, |
|
"loss": 0.855, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 43.78, |
|
"learning_rate": 6.229191321499015e-06, |
|
"loss": 0.854, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 43.88, |
|
"learning_rate": 6.130769230769231e-06, |
|
"loss": 0.8585, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 43.98, |
|
"learning_rate": 6.032149901380671e-06, |
|
"loss": 0.8577, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 44.08, |
|
"learning_rate": 5.933530571992111e-06, |
|
"loss": 0.8501, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 44.18, |
|
"learning_rate": 5.8349112426035505e-06, |
|
"loss": 0.8473, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 44.28, |
|
"learning_rate": 5.73629191321499e-06, |
|
"loss": 0.8528, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 44.38, |
|
"learning_rate": 5.6378698224852074e-06, |
|
"loss": 0.8459, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 44.47, |
|
"learning_rate": 5.539250493096648e-06, |
|
"loss": 0.8507, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 44.57, |
|
"learning_rate": 5.440631163708087e-06, |
|
"loss": 0.844, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 44.67, |
|
"learning_rate": 5.342011834319527e-06, |
|
"loss": 0.8505, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 44.77, |
|
"learning_rate": 5.243392504930967e-06, |
|
"loss": 0.8497, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 44.87, |
|
"learning_rate": 5.144773175542406e-06, |
|
"loss": 0.8534, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 44.97, |
|
"learning_rate": 5.046153846153846e-06, |
|
"loss": 0.8451, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 45.07, |
|
"learning_rate": 4.9475345167652866e-06, |
|
"loss": 0.8426, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 45.16, |
|
"learning_rate": 4.848915187376726e-06, |
|
"loss": 0.8503, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 45.26, |
|
"learning_rate": 4.750493096646943e-06, |
|
"loss": 0.8476, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 45.36, |
|
"learning_rate": 4.65207100591716e-06, |
|
"loss": 0.8432, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 45.46, |
|
"learning_rate": 4.5534516765286e-06, |
|
"loss": 0.8452, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 45.56, |
|
"learning_rate": 4.45483234714004e-06, |
|
"loss": 0.8435, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 45.66, |
|
"learning_rate": 4.356213017751479e-06, |
|
"loss": 0.8441, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 45.76, |
|
"learning_rate": 4.257593688362919e-06, |
|
"loss": 0.8435, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 45.85, |
|
"learning_rate": 4.15897435897436e-06, |
|
"loss": 0.8458, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 45.95, |
|
"learning_rate": 4.060355029585799e-06, |
|
"loss": 0.8538, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 46.05, |
|
"learning_rate": 3.961932938856016e-06, |
|
"loss": 0.8452, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"learning_rate": 3.863510848126233e-06, |
|
"loss": 0.8372, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 46.25, |
|
"learning_rate": 3.7648915187376726e-06, |
|
"loss": 0.8456, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 46.35, |
|
"learning_rate": 3.6662721893491124e-06, |
|
"loss": 0.8453, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 46.45, |
|
"learning_rate": 3.5676528599605527e-06, |
|
"loss": 0.8438, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 46.54, |
|
"learning_rate": 3.4690335305719925e-06, |
|
"loss": 0.8473, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 46.64, |
|
"learning_rate": 3.370414201183432e-06, |
|
"loss": 0.8404, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 46.74, |
|
"learning_rate": 3.271794871794872e-06, |
|
"loss": 0.843, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 46.84, |
|
"learning_rate": 3.173175542406312e-06, |
|
"loss": 0.8439, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 46.94, |
|
"learning_rate": 3.0745562130177517e-06, |
|
"loss": 0.8492, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 47.04, |
|
"learning_rate": 2.975936883629191e-06, |
|
"loss": 0.8425, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 47.14, |
|
"learning_rate": 2.8775147928994087e-06, |
|
"loss": 0.8401, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 47.24, |
|
"learning_rate": 2.778895463510848e-06, |
|
"loss": 0.8437, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 47.33, |
|
"learning_rate": 2.6802761341222883e-06, |
|
"loss": 0.8375, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 47.43, |
|
"learning_rate": 2.581656804733728e-06, |
|
"loss": 0.8448, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 47.53, |
|
"learning_rate": 2.4830374753451675e-06, |
|
"loss": 0.8421, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 47.63, |
|
"learning_rate": 2.3844181459566077e-06, |
|
"loss": 0.8446, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 47.73, |
|
"learning_rate": 2.2859960552268244e-06, |
|
"loss": 0.8403, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 47.83, |
|
"learning_rate": 2.1873767258382646e-06, |
|
"loss": 0.8343, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 47.93, |
|
"learning_rate": 2.088757396449704e-06, |
|
"loss": 0.8386, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 48.02, |
|
"learning_rate": 1.9901380670611443e-06, |
|
"loss": 0.8284, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 48.12, |
|
"learning_rate": 1.8915187376725839e-06, |
|
"loss": 0.8372, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 48.22, |
|
"learning_rate": 1.7930966469428008e-06, |
|
"loss": 0.8392, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 48.32, |
|
"learning_rate": 1.6944773175542408e-06, |
|
"loss": 0.8418, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 48.42, |
|
"learning_rate": 1.5958579881656804e-06, |
|
"loss": 0.8393, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 48.52, |
|
"learning_rate": 1.4972386587771204e-06, |
|
"loss": 0.838, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 48.62, |
|
"learning_rate": 1.3986193293885602e-06, |
|
"loss": 0.8403, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 48.71, |
|
"learning_rate": 1.3001972386587771e-06, |
|
"loss": 0.8346, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 48.81, |
|
"learning_rate": 1.201577909270217e-06, |
|
"loss": 0.8348, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 48.91, |
|
"learning_rate": 1.102958579881657e-06, |
|
"loss": 0.8375, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 49.01, |
|
"learning_rate": 1.0043392504930968e-06, |
|
"loss": 0.838, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 49.11, |
|
"learning_rate": 9.057199211045366e-07, |
|
"loss": 0.831, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 49.21, |
|
"learning_rate": 8.072978303747534e-07, |
|
"loss": 0.8363, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 49.31, |
|
"learning_rate": 7.086785009861934e-07, |
|
"loss": 0.8378, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 49.4, |
|
"learning_rate": 6.102564102564103e-07, |
|
"loss": 0.8329, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 49.5, |
|
"learning_rate": 5.116370808678501e-07, |
|
"loss": 0.8419, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 49.6, |
|
"learning_rate": 4.1301775147929e-07, |
|
"loss": 0.8304, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 49.7, |
|
"learning_rate": 3.1439842209072983e-07, |
|
"loss": 0.8399, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 49.8, |
|
"learning_rate": 2.1577909270216962e-07, |
|
"loss": 0.834, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 49.9, |
|
"learning_rate": 1.1715976331360947e-07, |
|
"loss": 0.838, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 1.854043392504931e-08, |
|
"loss": 0.8346, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 253500, |
|
"total_flos": 2.1443533265845617e+18, |
|
"train_loss": 1.184410398825621, |
|
"train_runtime": 354987.1489, |
|
"train_samples_per_second": 22.853, |
|
"train_steps_per_second": 0.714 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 253500, |
|
"num_train_epochs": 50, |
|
"save_steps": 10000, |
|
"total_flos": 2.1443533265845617e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|