|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 812310, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.996922357228152e-05, |
|
"loss": 1.5563, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9938447144563037e-05, |
|
"loss": 1.4798, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9907670716844554e-05, |
|
"loss": 1.4636, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.987689428912608e-05, |
|
"loss": 1.4411, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9846117861407595e-05, |
|
"loss": 1.4334, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.981534143368911e-05, |
|
"loss": 1.4234, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.978456500597063e-05, |
|
"loss": 1.4075, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9753788578252146e-05, |
|
"loss": 1.4069, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.972301215053366e-05, |
|
"loss": 1.4132, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.969223572281519e-05, |
|
"loss": 1.4005, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9661459295096704e-05, |
|
"loss": 1.3938, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.963068286737822e-05, |
|
"loss": 1.3949, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.959990643965974e-05, |
|
"loss": 1.3849, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9569130011941255e-05, |
|
"loss": 1.3828, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.953835358422277e-05, |
|
"loss": 1.3794, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.950757715650429e-05, |
|
"loss": 1.3727, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.947680072878581e-05, |
|
"loss": 1.3797, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.944602430106733e-05, |
|
"loss": 1.364, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.941524787334885e-05, |
|
"loss": 1.3602, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9384471445630365e-05, |
|
"loss": 1.3704, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.935369501791188e-05, |
|
"loss": 1.3619, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.93229185901934e-05, |
|
"loss": 1.369, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9292142162474916e-05, |
|
"loss": 1.3687, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.926136573475644e-05, |
|
"loss": 1.3481, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.923058930703796e-05, |
|
"loss": 1.3553, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9199812879319474e-05, |
|
"loss": 1.3532, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.916903645160099e-05, |
|
"loss": 1.3519, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.913826002388251e-05, |
|
"loss": 1.3524, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9107483596164025e-05, |
|
"loss": 1.3427, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.907670716844555e-05, |
|
"loss": 1.3436, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9045930740727066e-05, |
|
"loss": 1.3342, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.901515431300858e-05, |
|
"loss": 1.332, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.89843778852901e-05, |
|
"loss": 1.3384, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.895360145757162e-05, |
|
"loss": 1.3252, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8922825029853134e-05, |
|
"loss": 1.3405, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.889204860213465e-05, |
|
"loss": 1.3363, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8861272174416175e-05, |
|
"loss": 1.3297, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.883049574669769e-05, |
|
"loss": 1.3268, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.879971931897921e-05, |
|
"loss": 1.3136, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.876894289126073e-05, |
|
"loss": 1.3182, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8738166463542244e-05, |
|
"loss": 1.3226, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.870739003582376e-05, |
|
"loss": 1.3186, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8676613608105285e-05, |
|
"loss": 1.3234, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.86458371803868e-05, |
|
"loss": 1.3214, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.861506075266832e-05, |
|
"loss": 1.3184, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8584284324949836e-05, |
|
"loss": 1.3074, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.855350789723135e-05, |
|
"loss": 1.3137, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.852273146951287e-05, |
|
"loss": 1.3114, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.849195504179439e-05, |
|
"loss": 1.3174, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.846117861407591e-05, |
|
"loss": 1.3094, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.843040218635743e-05, |
|
"loss": 1.3133, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8399625758638945e-05, |
|
"loss": 1.3095, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.836884933092046e-05, |
|
"loss": 1.3052, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.833807290320198e-05, |
|
"loss": 1.3048, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8307296475483497e-05, |
|
"loss": 1.3093, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8276520047765014e-05, |
|
"loss": 1.307, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.824574362004654e-05, |
|
"loss": 1.312, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8214967192328055e-05, |
|
"loss": 1.3006, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.818419076460957e-05, |
|
"loss": 1.3069, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.815341433689109e-05, |
|
"loss": 1.2969, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8122637909172606e-05, |
|
"loss": 1.3042, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.809186148145412e-05, |
|
"loss": 1.2999, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.806108505373565e-05, |
|
"loss": 1.3, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8030308626017164e-05, |
|
"loss": 1.2973, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.799953219829868e-05, |
|
"loss": 1.2949, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.79687557705802e-05, |
|
"loss": 1.293, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7937979342861715e-05, |
|
"loss": 1.2834, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.790720291514323e-05, |
|
"loss": 1.3032, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.787642648742475e-05, |
|
"loss": 1.2926, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.784565005970627e-05, |
|
"loss": 1.291, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.781487363198779e-05, |
|
"loss": 1.2958, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.778409720426931e-05, |
|
"loss": 1.2883, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7753320776550824e-05, |
|
"loss": 1.2998, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.772254434883234e-05, |
|
"loss": 1.2781, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.769176792111386e-05, |
|
"loss": 1.2933, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.766099149339538e-05, |
|
"loss": 1.2969, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.76302150656769e-05, |
|
"loss": 1.279, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.759943863795842e-05, |
|
"loss": 1.2929, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7568662210239934e-05, |
|
"loss": 1.2873, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.753788578252145e-05, |
|
"loss": 1.2923, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.750710935480297e-05, |
|
"loss": 1.2974, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7476332927084485e-05, |
|
"loss": 1.2789, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.744555649936601e-05, |
|
"loss": 1.2783, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7414780071647526e-05, |
|
"loss": 1.2781, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.738400364392904e-05, |
|
"loss": 1.2925, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.735322721621056e-05, |
|
"loss": 1.2818, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.732245078849208e-05, |
|
"loss": 1.2788, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7291674360773594e-05, |
|
"loss": 1.2732, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.726089793305512e-05, |
|
"loss": 1.278, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7230121505336635e-05, |
|
"loss": 1.2772, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.719934507761815e-05, |
|
"loss": 1.2752, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.716856864989967e-05, |
|
"loss": 1.2623, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.713779222218119e-05, |
|
"loss": 1.2704, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7107015794462704e-05, |
|
"loss": 1.2778, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.707623936674422e-05, |
|
"loss": 1.2775, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.7045462939025745e-05, |
|
"loss": 1.2728, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.701468651130726e-05, |
|
"loss": 1.2722, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.698391008358878e-05, |
|
"loss": 1.2701, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.6953133655870296e-05, |
|
"loss": 1.2808, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.692235722815181e-05, |
|
"loss": 1.2653, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.689158080043333e-05, |
|
"loss": 1.2769, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.686080437271485e-05, |
|
"loss": 1.2752, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.683002794499637e-05, |
|
"loss": 1.2725, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.679925151727789e-05, |
|
"loss": 1.26, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6768475089559405e-05, |
|
"loss": 1.2682, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.673769866184092e-05, |
|
"loss": 1.2759, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.670692223412244e-05, |
|
"loss": 1.2719, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6676145806403957e-05, |
|
"loss": 1.268, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.664536937868548e-05, |
|
"loss": 1.2631, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6614592950967e-05, |
|
"loss": 1.2692, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6583816523248515e-05, |
|
"loss": 1.269, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.655304009553003e-05, |
|
"loss": 1.2673, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.652226366781155e-05, |
|
"loss": 1.2633, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6491487240093066e-05, |
|
"loss": 1.2589, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.646071081237458e-05, |
|
"loss": 1.2639, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.642993438465611e-05, |
|
"loss": 1.2571, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6399157956937624e-05, |
|
"loss": 1.2647, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.636838152921914e-05, |
|
"loss": 1.2582, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.633760510150066e-05, |
|
"loss": 1.259, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6306828673782175e-05, |
|
"loss": 1.2657, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.627605224606369e-05, |
|
"loss": 1.2602, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6245275818345216e-05, |
|
"loss": 1.2594, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.621449939062673e-05, |
|
"loss": 1.2608, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.618372296290825e-05, |
|
"loss": 1.259, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.615294653518977e-05, |
|
"loss": 1.2659, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6122170107471284e-05, |
|
"loss": 1.2597, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.60913936797528e-05, |
|
"loss": 1.2524, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.606061725203432e-05, |
|
"loss": 1.2521, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.602984082431584e-05, |
|
"loss": 1.2588, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.599906439659736e-05, |
|
"loss": 1.2554, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.596828796887888e-05, |
|
"loss": 1.25, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.5937511541160394e-05, |
|
"loss": 1.2467, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.590673511344191e-05, |
|
"loss": 1.2472, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.587595868572343e-05, |
|
"loss": 1.2652, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5845182258004945e-05, |
|
"loss": 1.2506, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.581440583028647e-05, |
|
"loss": 1.247, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5783629402567986e-05, |
|
"loss": 1.2519, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.57528529748495e-05, |
|
"loss": 1.2489, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.572207654713102e-05, |
|
"loss": 1.2508, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.569130011941254e-05, |
|
"loss": 1.2556, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.566052369169406e-05, |
|
"loss": 1.2555, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.562974726397558e-05, |
|
"loss": 1.2407, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.55989708362571e-05, |
|
"loss": 1.2478, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.556819440853862e-05, |
|
"loss": 1.2537, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5537417980820136e-05, |
|
"loss": 1.2487, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.550664155310165e-05, |
|
"loss": 1.2418, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.547586512538317e-05, |
|
"loss": 1.2568, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.544508869766469e-05, |
|
"loss": 1.2428, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5414312269946205e-05, |
|
"loss": 1.2588, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.538353584222773e-05, |
|
"loss": 1.2455, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5352759414509246e-05, |
|
"loss": 1.2459, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.532198298679076e-05, |
|
"loss": 1.2502, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.529120655907228e-05, |
|
"loss": 1.2396, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.52604301313538e-05, |
|
"loss": 1.2437, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5229653703635314e-05, |
|
"loss": 1.2482, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.519887727591683e-05, |
|
"loss": 1.2368, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5168100848198355e-05, |
|
"loss": 1.2435, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.513732442047987e-05, |
|
"loss": 1.2463, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.510654799276139e-05, |
|
"loss": 1.239, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.5075771565042906e-05, |
|
"loss": 1.2439, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.504499513732442e-05, |
|
"loss": 1.2313, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.501421870960594e-05, |
|
"loss": 1.2419, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4983442281887464e-05, |
|
"loss": 1.2356, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.495266585416898e-05, |
|
"loss": 1.2308, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.49218894264505e-05, |
|
"loss": 1.2382, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4891112998732015e-05, |
|
"loss": 1.2555, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.486033657101353e-05, |
|
"loss": 1.245, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.482956014329505e-05, |
|
"loss": 1.2384, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.479878371557657e-05, |
|
"loss": 1.2379, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.476800728785809e-05, |
|
"loss": 1.241, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.473723086013961e-05, |
|
"loss": 1.2262, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4706454432421125e-05, |
|
"loss": 1.2283, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.467567800470264e-05, |
|
"loss": 1.2331, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.464490157698416e-05, |
|
"loss": 1.2439, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4614125149265676e-05, |
|
"loss": 1.2369, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.45833487215472e-05, |
|
"loss": 1.2335, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.455257229382872e-05, |
|
"loss": 1.236, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4521795866110234e-05, |
|
"loss": 1.2333, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.449101943839175e-05, |
|
"loss": 1.2288, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.446024301067327e-05, |
|
"loss": 1.2315, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4429466582954785e-05, |
|
"loss": 1.2349, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.43986901552363e-05, |
|
"loss": 1.2227, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4367913727517826e-05, |
|
"loss": 1.2261, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4337137299799343e-05, |
|
"loss": 1.2303, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.430636087208086e-05, |
|
"loss": 1.2235, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.427558444436238e-05, |
|
"loss": 1.2336, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4244808016643895e-05, |
|
"loss": 1.2345, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.421403158892541e-05, |
|
"loss": 1.2257, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4183255161206936e-05, |
|
"loss": 1.2212, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.415247873348845e-05, |
|
"loss": 1.2221, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.412170230576997e-05, |
|
"loss": 1.2238, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.409092587805149e-05, |
|
"loss": 1.2256, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.4060149450333004e-05, |
|
"loss": 1.2271, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.402937302261452e-05, |
|
"loss": 1.2193, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.399859659489604e-05, |
|
"loss": 1.223, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.396782016717756e-05, |
|
"loss": 1.2272, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.393704373945908e-05, |
|
"loss": 1.2184, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3906267311740596e-05, |
|
"loss": 1.2223, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.387549088402211e-05, |
|
"loss": 1.2296, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.384471445630363e-05, |
|
"loss": 1.2229, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.381393802858515e-05, |
|
"loss": 1.2206, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3783161600866665e-05, |
|
"loss": 1.2293, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.375238517314819e-05, |
|
"loss": 1.2245, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3721608745429706e-05, |
|
"loss": 1.2254, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.369083231771122e-05, |
|
"loss": 1.2136, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.366005588999274e-05, |
|
"loss": 1.2289, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.362927946227426e-05, |
|
"loss": 1.2234, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3598503034555774e-05, |
|
"loss": 1.2157, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.35677266068373e-05, |
|
"loss": 1.2285, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3536950179118815e-05, |
|
"loss": 1.2198, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.350617375140033e-05, |
|
"loss": 1.2256, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.347539732368185e-05, |
|
"loss": 1.2267, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3444620895963366e-05, |
|
"loss": 1.2103, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.341384446824488e-05, |
|
"loss": 1.2176, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.33830680405264e-05, |
|
"loss": 1.2279, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3352291612807924e-05, |
|
"loss": 1.2208, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.332151518508944e-05, |
|
"loss": 1.221, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.329073875737096e-05, |
|
"loss": 1.2268, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3259962329652475e-05, |
|
"loss": 1.2279, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.322918590193399e-05, |
|
"loss": 1.2205, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.319840947421551e-05, |
|
"loss": 1.2212, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3167633046497034e-05, |
|
"loss": 1.2086, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.313685661877855e-05, |
|
"loss": 1.2144, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.310608019106007e-05, |
|
"loss": 1.2346, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3075303763341585e-05, |
|
"loss": 1.2149, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.30445273356231e-05, |
|
"loss": 1.2156, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.301375090790462e-05, |
|
"loss": 1.2136, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.2982974480186136e-05, |
|
"loss": 1.2101, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.295219805246766e-05, |
|
"loss": 1.2077, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.292142162474918e-05, |
|
"loss": 1.2137, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2890645197030694e-05, |
|
"loss": 1.2186, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.285986876931221e-05, |
|
"loss": 1.2302, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.282909234159373e-05, |
|
"loss": 1.2185, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2798315913875245e-05, |
|
"loss": 1.21, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.276753948615676e-05, |
|
"loss": 1.2065, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2736763058438286e-05, |
|
"loss": 1.2174, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2705986630719803e-05, |
|
"loss": 1.2078, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.267521020300132e-05, |
|
"loss": 1.2036, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.264443377528284e-05, |
|
"loss": 1.2098, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2613657347564355e-05, |
|
"loss": 1.209, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.258288091984587e-05, |
|
"loss": 1.2026, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2552104492127396e-05, |
|
"loss": 1.2192, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.252132806440891e-05, |
|
"loss": 1.219, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.249055163669043e-05, |
|
"loss": 1.2026, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.245977520897195e-05, |
|
"loss": 1.2077, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2428998781253464e-05, |
|
"loss": 1.2012, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.239822235353498e-05, |
|
"loss": 1.213, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.23674459258165e-05, |
|
"loss": 1.2107, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.233666949809802e-05, |
|
"loss": 1.2144, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.230589307037954e-05, |
|
"loss": 1.2044, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2275116642661056e-05, |
|
"loss": 1.2031, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.224434021494257e-05, |
|
"loss": 1.2116, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.221356378722409e-05, |
|
"loss": 1.2063, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.218278735950561e-05, |
|
"loss": 1.211, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.215201093178713e-05, |
|
"loss": 1.2049, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.212123450406865e-05, |
|
"loss": 1.2059, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2090458076350166e-05, |
|
"loss": 1.2088, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.205968164863168e-05, |
|
"loss": 1.2126, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.20289052209132e-05, |
|
"loss": 1.1968, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.199812879319472e-05, |
|
"loss": 1.1977, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.1967352365476234e-05, |
|
"loss": 1.2032, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.193657593775776e-05, |
|
"loss": 1.2078, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1905799510039275e-05, |
|
"loss": 1.1938, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.187502308232079e-05, |
|
"loss": 1.2054, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.184424665460231e-05, |
|
"loss": 1.208, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1813470226883826e-05, |
|
"loss": 1.1988, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.178269379916534e-05, |
|
"loss": 1.2053, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.175191737144686e-05, |
|
"loss": 1.1997, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1721140943728384e-05, |
|
"loss": 1.2018, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.16903645160099e-05, |
|
"loss": 1.2037, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.165958808829142e-05, |
|
"loss": 1.2033, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1628811660572935e-05, |
|
"loss": 1.1989, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.159803523285445e-05, |
|
"loss": 1.2092, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.156725880513597e-05, |
|
"loss": 1.2002, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1536482377417493e-05, |
|
"loss": 1.2061, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.150570594969901e-05, |
|
"loss": 1.2063, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.147492952198053e-05, |
|
"loss": 1.2015, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1444153094262045e-05, |
|
"loss": 1.1987, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.141337666654356e-05, |
|
"loss": 1.1982, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.138260023882508e-05, |
|
"loss": 1.2023, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1351823811106596e-05, |
|
"loss": 1.1953, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.132104738338812e-05, |
|
"loss": 1.1943, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.129027095566964e-05, |
|
"loss": 1.2085, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1259494527951154e-05, |
|
"loss": 1.1943, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.122871810023267e-05, |
|
"loss": 1.1994, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.119794167251419e-05, |
|
"loss": 1.2037, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1167165244795705e-05, |
|
"loss": 1.1968, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.113638881707723e-05, |
|
"loss": 1.2031, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1105612389358746e-05, |
|
"loss": 1.2052, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.107483596164026e-05, |
|
"loss": 1.2006, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.104405953392178e-05, |
|
"loss": 1.1961, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.10132831062033e-05, |
|
"loss": 1.1909, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.0982506678484815e-05, |
|
"loss": 1.1995, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.095173025076633e-05, |
|
"loss": 1.1978, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.0920953823047856e-05, |
|
"loss": 1.1952, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.089017739532937e-05, |
|
"loss": 1.1901, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.085940096761089e-05, |
|
"loss": 1.1974, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.082862453989241e-05, |
|
"loss": 1.1914, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0797848112173924e-05, |
|
"loss": 1.193, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.076707168445544e-05, |
|
"loss": 1.1915, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0736295256736965e-05, |
|
"loss": 1.1945, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.070551882901848e-05, |
|
"loss": 1.1986, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.06747424013e-05, |
|
"loss": 1.1928, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0643965973581516e-05, |
|
"loss": 1.1946, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.061318954586303e-05, |
|
"loss": 1.1959, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.058241311814455e-05, |
|
"loss": 1.1911, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.055163669042607e-05, |
|
"loss": 1.1901, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.052086026270759e-05, |
|
"loss": 1.195, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.049008383498911e-05, |
|
"loss": 1.1938, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0459307407270625e-05, |
|
"loss": 1.1994, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.042853097955214e-05, |
|
"loss": 1.1911, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.039775455183366e-05, |
|
"loss": 1.1942, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.036697812411518e-05, |
|
"loss": 1.1889, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0336201696396694e-05, |
|
"loss": 1.185, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.030542526867822e-05, |
|
"loss": 1.1938, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0274648840959735e-05, |
|
"loss": 1.1941, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.024387241324125e-05, |
|
"loss": 1.1771, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.021309598552277e-05, |
|
"loss": 1.1895, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0182319557804286e-05, |
|
"loss": 1.1915, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.01515431300858e-05, |
|
"loss": 1.1935, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.012076670236733e-05, |
|
"loss": 1.194, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0089990274648844e-05, |
|
"loss": 1.189, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.005921384693036e-05, |
|
"loss": 1.1821, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.002843741921188e-05, |
|
"loss": 1.1875, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.9997660991493395e-05, |
|
"loss": 1.1758, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.996688456377491e-05, |
|
"loss": 1.1868, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.993610813605643e-05, |
|
"loss": 1.1873, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9905331708337953e-05, |
|
"loss": 1.1875, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.987455528061947e-05, |
|
"loss": 1.1864, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.984377885290099e-05, |
|
"loss": 1.1872, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9813002425182505e-05, |
|
"loss": 1.1796, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.978222599746402e-05, |
|
"loss": 1.1841, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.975144956974554e-05, |
|
"loss": 1.1899, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.972067314202706e-05, |
|
"loss": 1.1836, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.968989671430858e-05, |
|
"loss": 1.1829, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.96591202865901e-05, |
|
"loss": 1.1899, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9628343858871614e-05, |
|
"loss": 1.1746, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.959756743115313e-05, |
|
"loss": 1.1882, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.956679100343465e-05, |
|
"loss": 1.1935, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.9536014575716165e-05, |
|
"loss": 1.1812, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.950523814799769e-05, |
|
"loss": 1.187, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.9474461720279206e-05, |
|
"loss": 1.1849, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.944368529256072e-05, |
|
"loss": 1.1839, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.941290886484224e-05, |
|
"loss": 1.1804, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.938213243712376e-05, |
|
"loss": 1.1821, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9351356009405275e-05, |
|
"loss": 1.1805, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.932057958168679e-05, |
|
"loss": 1.1808, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9289803153968316e-05, |
|
"loss": 1.1842, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.925902672624983e-05, |
|
"loss": 1.1811, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.922825029853135e-05, |
|
"loss": 1.1841, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.919747387081287e-05, |
|
"loss": 1.1749, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9166697443094384e-05, |
|
"loss": 1.1738, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.91359210153759e-05, |
|
"loss": 1.1821, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9105144587657425e-05, |
|
"loss": 1.1717, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.907436815993894e-05, |
|
"loss": 1.183, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.904359173222046e-05, |
|
"loss": 1.1745, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.9012815304501976e-05, |
|
"loss": 1.1769, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.898203887678349e-05, |
|
"loss": 1.181, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.895126244906501e-05, |
|
"loss": 1.1838, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.892048602134653e-05, |
|
"loss": 1.1808, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.888970959362805e-05, |
|
"loss": 1.1802, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.885893316590957e-05, |
|
"loss": 1.1713, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8828156738191085e-05, |
|
"loss": 1.1832, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.87973803104726e-05, |
|
"loss": 1.1727, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.876660388275412e-05, |
|
"loss": 1.1835, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.873582745503564e-05, |
|
"loss": 1.1777, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.870505102731716e-05, |
|
"loss": 1.1784, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.867427459959868e-05, |
|
"loss": 1.173, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.8643498171880195e-05, |
|
"loss": 1.1789, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.861272174416171e-05, |
|
"loss": 1.1719, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.858194531644323e-05, |
|
"loss": 1.1741, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.8551168888724746e-05, |
|
"loss": 1.1817, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.852039246100626e-05, |
|
"loss": 1.1754, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.848961603328779e-05, |
|
"loss": 1.1789, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.8458839605569304e-05, |
|
"loss": 1.1693, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.842806317785082e-05, |
|
"loss": 1.1775, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.839728675013234e-05, |
|
"loss": 1.1698, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.8366510322413855e-05, |
|
"loss": 1.1773, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.833573389469537e-05, |
|
"loss": 1.1791, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.830495746697689e-05, |
|
"loss": 1.1708, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.8274181039258413e-05, |
|
"loss": 1.1702, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.824340461153993e-05, |
|
"loss": 1.1818, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.821262818382145e-05, |
|
"loss": 1.1785, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.8181851756102965e-05, |
|
"loss": 1.1732, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.815107532838448e-05, |
|
"loss": 1.1719, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.8120298900666e-05, |
|
"loss": 1.176, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.808952247294752e-05, |
|
"loss": 1.1743, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.805874604522904e-05, |
|
"loss": 1.1712, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.802796961751056e-05, |
|
"loss": 1.1693, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.7997193189792074e-05, |
|
"loss": 1.1695, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.796641676207359e-05, |
|
"loss": 1.1749, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.793564033435511e-05, |
|
"loss": 1.1658, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7904863906636625e-05, |
|
"loss": 1.1661, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.787408747891815e-05, |
|
"loss": 1.179, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.7843311051199666e-05, |
|
"loss": 1.1703, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.781253462348118e-05, |
|
"loss": 1.1705, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.77817581957627e-05, |
|
"loss": 1.1607, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.775098176804422e-05, |
|
"loss": 1.1698, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7720205340325735e-05, |
|
"loss": 1.1671, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.768942891260726e-05, |
|
"loss": 1.1753, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7658652484888776e-05, |
|
"loss": 1.1637, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.762787605717029e-05, |
|
"loss": 1.169, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.759709962945181e-05, |
|
"loss": 1.1707, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.756632320173333e-05, |
|
"loss": 1.1625, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7535546774014844e-05, |
|
"loss": 1.1568, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.750477034629636e-05, |
|
"loss": 1.1629, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7473993918577885e-05, |
|
"loss": 1.1645, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.74432174908594e-05, |
|
"loss": 1.1689, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.741244106314092e-05, |
|
"loss": 1.1638, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7381664635422436e-05, |
|
"loss": 1.1676, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.735088820770395e-05, |
|
"loss": 1.1614, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.732011177998547e-05, |
|
"loss": 1.1637, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7289335352266994e-05, |
|
"loss": 1.161, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.725855892454851e-05, |
|
"loss": 1.1651, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.722778249683003e-05, |
|
"loss": 1.1691, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7197006069111545e-05, |
|
"loss": 1.1666, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.716622964139306e-05, |
|
"loss": 1.1594, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.713545321367458e-05, |
|
"loss": 1.1636, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.71046767859561e-05, |
|
"loss": 1.1646, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.707390035823762e-05, |
|
"loss": 1.1693, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.7043123930519144e-05, |
|
"loss": 1.1628, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.701234750280066e-05, |
|
"loss": 1.1708, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.698157107508218e-05, |
|
"loss": 1.1617, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.6950794647363696e-05, |
|
"loss": 1.1649, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.692001821964521e-05, |
|
"loss": 1.1599, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.688924179192673e-05, |
|
"loss": 1.1562, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.685846536420825e-05, |
|
"loss": 1.1553, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.682768893648977e-05, |
|
"loss": 1.1671, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.679691250877129e-05, |
|
"loss": 1.1593, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6766136081052805e-05, |
|
"loss": 1.1561, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.673535965333432e-05, |
|
"loss": 1.1652, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.670458322561584e-05, |
|
"loss": 1.1615, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6673806797897356e-05, |
|
"loss": 1.1635, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.664303037017888e-05, |
|
"loss": 1.1586, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.66122539424604e-05, |
|
"loss": 1.1531, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6581477514741914e-05, |
|
"loss": 1.1629, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.655070108702343e-05, |
|
"loss": 1.1649, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.651992465930495e-05, |
|
"loss": 1.1541, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6489148231586466e-05, |
|
"loss": 1.1532, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.645837180386798e-05, |
|
"loss": 1.1637, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.6427595376149507e-05, |
|
"loss": 1.1622, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6396818948431024e-05, |
|
"loss": 1.1599, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.636604252071254e-05, |
|
"loss": 1.1606, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.633526609299406e-05, |
|
"loss": 1.16, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6304489665275575e-05, |
|
"loss": 1.1663, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.627371323755709e-05, |
|
"loss": 1.1549, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.624293680983861e-05, |
|
"loss": 1.1636, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.621216038212013e-05, |
|
"loss": 1.1565, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.618138395440165e-05, |
|
"loss": 1.1622, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.615060752668317e-05, |
|
"loss": 1.1639, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6119831098964684e-05, |
|
"loss": 1.1588, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.60890546712462e-05, |
|
"loss": 1.155, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.605827824352772e-05, |
|
"loss": 1.152, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.602750181580924e-05, |
|
"loss": 1.1504, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.599672538809076e-05, |
|
"loss": 1.1579, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.5965948960372276e-05, |
|
"loss": 1.1603, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.5935172532653794e-05, |
|
"loss": 1.1619, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.590439610493531e-05, |
|
"loss": 1.1486, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.587361967721683e-05, |
|
"loss": 1.1568, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5842843249498345e-05, |
|
"loss": 1.1534, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.581206682177987e-05, |
|
"loss": 1.1532, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5781290394061386e-05, |
|
"loss": 1.1587, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.57505139663429e-05, |
|
"loss": 1.1572, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.571973753862442e-05, |
|
"loss": 1.1539, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.568896111090594e-05, |
|
"loss": 1.1548, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5658184683187454e-05, |
|
"loss": 1.1496, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.562740825546898e-05, |
|
"loss": 1.1589, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5596631827750495e-05, |
|
"loss": 1.1562, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.556585540003201e-05, |
|
"loss": 1.1447, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.553507897231353e-05, |
|
"loss": 1.1534, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5504302544595046e-05, |
|
"loss": 1.1518, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5473526116876563e-05, |
|
"loss": 1.1538, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.544274968915808e-05, |
|
"loss": 1.1462, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5411973261439604e-05, |
|
"loss": 1.1496, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.538119683372112e-05, |
|
"loss": 1.1523, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.535042040600264e-05, |
|
"loss": 1.1456, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5319643978284156e-05, |
|
"loss": 1.1486, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.528886755056567e-05, |
|
"loss": 1.1433, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.525809112284719e-05, |
|
"loss": 1.1533, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.522731469512871e-05, |
|
"loss": 1.1488, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.519653826741023e-05, |
|
"loss": 1.1501, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.516576183969175e-05, |
|
"loss": 1.1464, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.5134985411973265e-05, |
|
"loss": 1.1515, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.510420898425478e-05, |
|
"loss": 1.1446, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.50734325565363e-05, |
|
"loss": 1.1515, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.5042656128817816e-05, |
|
"loss": 1.1478, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.501187970109934e-05, |
|
"loss": 1.1478, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.498110327338086e-05, |
|
"loss": 1.1541, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.4950326845662374e-05, |
|
"loss": 1.1524, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.491955041794389e-05, |
|
"loss": 1.1595, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.488877399022541e-05, |
|
"loss": 1.1546, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.4857997562506926e-05, |
|
"loss": 1.1554, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.482722113478844e-05, |
|
"loss": 1.1479, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.4796444707069967e-05, |
|
"loss": 1.1498, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.4765668279351484e-05, |
|
"loss": 1.1502, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4734891851633e-05, |
|
"loss": 1.1477, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.470411542391452e-05, |
|
"loss": 1.1501, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4673338996196035e-05, |
|
"loss": 1.1536, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.464256256847755e-05, |
|
"loss": 1.149, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4611786140759076e-05, |
|
"loss": 1.1513, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.458100971304059e-05, |
|
"loss": 1.1459, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.455023328532211e-05, |
|
"loss": 1.1386, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.451945685760363e-05, |
|
"loss": 1.1449, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4488680429885144e-05, |
|
"loss": 1.1577, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.445790400216666e-05, |
|
"loss": 1.1557, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.442712757444818e-05, |
|
"loss": 1.1495, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.43963511467297e-05, |
|
"loss": 1.1445, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.436557471901122e-05, |
|
"loss": 1.1511, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4334798291292736e-05, |
|
"loss": 1.1509, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4304021863574254e-05, |
|
"loss": 1.1537, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.427324543585577e-05, |
|
"loss": 1.1451, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.424246900813729e-05, |
|
"loss": 1.1454, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.421169258041881e-05, |
|
"loss": 1.15, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.418091615270033e-05, |
|
"loss": 1.1492, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4150139724981846e-05, |
|
"loss": 1.1415, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.411936329726336e-05, |
|
"loss": 1.1405, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.408858686954488e-05, |
|
"loss": 1.1472, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.40578104418264e-05, |
|
"loss": 1.1429, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.4027034014107914e-05, |
|
"loss": 1.1368, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.399625758638944e-05, |
|
"loss": 1.1467, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.3965481158670955e-05, |
|
"loss": 1.1562, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.393470473095247e-05, |
|
"loss": 1.1372, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.390392830323399e-05, |
|
"loss": 1.1459, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.3873151875515506e-05, |
|
"loss": 1.1473, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.3842375447797023e-05, |
|
"loss": 1.1462, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.381159902007854e-05, |
|
"loss": 1.1363, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.3780822592360064e-05, |
|
"loss": 1.1392, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.375004616464158e-05, |
|
"loss": 1.1459, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.37192697369231e-05, |
|
"loss": 1.144, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.3688493309204616e-05, |
|
"loss": 1.1426, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.365771688148613e-05, |
|
"loss": 1.1369, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.362694045376765e-05, |
|
"loss": 1.1415, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.3596164026049174e-05, |
|
"loss": 1.1548, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.356538759833069e-05, |
|
"loss": 1.1399, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.353461117061221e-05, |
|
"loss": 1.1362, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.3503834742893725e-05, |
|
"loss": 1.1469, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.347305831517524e-05, |
|
"loss": 1.139, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.344228188745676e-05, |
|
"loss": 1.1356, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.3411505459738276e-05, |
|
"loss": 1.1382, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.33807290320198e-05, |
|
"loss": 1.1351, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.334995260430132e-05, |
|
"loss": 1.1331, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.3319176176582834e-05, |
|
"loss": 1.1403, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.328839974886435e-05, |
|
"loss": 1.1388, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.325762332114587e-05, |
|
"loss": 1.1388, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.3226846893427386e-05, |
|
"loss": 1.1347, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.319607046570891e-05, |
|
"loss": 1.1375, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.3165294037990427e-05, |
|
"loss": 1.1324, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.3134517610271944e-05, |
|
"loss": 1.1322, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.310374118255346e-05, |
|
"loss": 1.1442, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.307296475483498e-05, |
|
"loss": 1.1326, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.3042188327116495e-05, |
|
"loss": 1.1346, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.301141189939801e-05, |
|
"loss": 1.1319, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.2980635471679536e-05, |
|
"loss": 1.132, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.294985904396105e-05, |
|
"loss": 1.1366, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.291908261624257e-05, |
|
"loss": 1.136, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.288830618852409e-05, |
|
"loss": 1.1282, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.2857529760805604e-05, |
|
"loss": 1.1345, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.282675333308712e-05, |
|
"loss": 1.1328, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.279597690536864e-05, |
|
"loss": 1.1454, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.276520047765016e-05, |
|
"loss": 1.1363, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.273442404993168e-05, |
|
"loss": 1.1317, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.2703647622213196e-05, |
|
"loss": 1.1365, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.2672871194494713e-05, |
|
"loss": 1.1274, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.264209476677623e-05, |
|
"loss": 1.1183, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.261131833905775e-05, |
|
"loss": 1.1323, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.258054191133927e-05, |
|
"loss": 1.1345, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.254976548362079e-05, |
|
"loss": 1.1327, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.2518989055902306e-05, |
|
"loss": 1.1311, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.248821262818382e-05, |
|
"loss": 1.1279, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.245743620046534e-05, |
|
"loss": 1.1349, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.242665977274686e-05, |
|
"loss": 1.1286, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.2395883345028374e-05, |
|
"loss": 1.1328, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.23651069173099e-05, |
|
"loss": 1.1296, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.2334330489591415e-05, |
|
"loss": 1.1322, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.230355406187293e-05, |
|
"loss": 1.1292, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.227277763415445e-05, |
|
"loss": 1.1322, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.2242001206435966e-05, |
|
"loss": 1.1352, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.2211224778717483e-05, |
|
"loss": 1.1268, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.218044835099901e-05, |
|
"loss": 1.1239, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.2149671923280524e-05, |
|
"loss": 1.129, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.211889549556204e-05, |
|
"loss": 1.1321, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.208811906784356e-05, |
|
"loss": 1.1374, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.2057342640125076e-05, |
|
"loss": 1.1297, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.202656621240659e-05, |
|
"loss": 1.1311, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.199578978468811e-05, |
|
"loss": 1.1225, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.1965013356969634e-05, |
|
"loss": 1.1225, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.193423692925115e-05, |
|
"loss": 1.1295, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.190346050153267e-05, |
|
"loss": 1.1262, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.1872684073814185e-05, |
|
"loss": 1.1258, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.18419076460957e-05, |
|
"loss": 1.1154, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.181113121837722e-05, |
|
"loss": 1.1327, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.1780354790658736e-05, |
|
"loss": 1.1235, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.174957836294026e-05, |
|
"loss": 1.1287, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.171880193522178e-05, |
|
"loss": 1.128, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.1688025507503294e-05, |
|
"loss": 1.1221, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.165724907978481e-05, |
|
"loss": 1.1259, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.162647265206633e-05, |
|
"loss": 1.1263, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.1595696224347846e-05, |
|
"loss": 1.1298, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.156491979662937e-05, |
|
"loss": 1.1263, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.1534143368910886e-05, |
|
"loss": 1.1299, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.1503366941192404e-05, |
|
"loss": 1.1265, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.147259051347392e-05, |
|
"loss": 1.1249, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.144181408575544e-05, |
|
"loss": 1.1206, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.1411037658036955e-05, |
|
"loss": 1.1186, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.138026123031847e-05, |
|
"loss": 1.1181, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.1349484802599996e-05, |
|
"loss": 1.1291, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.131870837488151e-05, |
|
"loss": 1.1262, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.128793194716303e-05, |
|
"loss": 1.1227, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.125715551944455e-05, |
|
"loss": 1.1233, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.1226379091726064e-05, |
|
"loss": 1.1156, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.119560266400758e-05, |
|
"loss": 1.1147, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.1164826236289105e-05, |
|
"loss": 1.1226, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.113404980857062e-05, |
|
"loss": 1.1175, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.110327338085214e-05, |
|
"loss": 1.124, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.1072496953133656e-05, |
|
"loss": 1.1166, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.1041720525415173e-05, |
|
"loss": 1.1222, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.101094409769669e-05, |
|
"loss": 1.1214, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.098016766997821e-05, |
|
"loss": 1.1231, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.094939124225973e-05, |
|
"loss": 1.1208, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.091861481454125e-05, |
|
"loss": 1.1167, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.0887838386822766e-05, |
|
"loss": 1.1238, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.085706195910428e-05, |
|
"loss": 1.1203, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.08262855313858e-05, |
|
"loss": 1.1276, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.079550910366732e-05, |
|
"loss": 1.1169, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.076473267594884e-05, |
|
"loss": 1.1197, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.073395624823036e-05, |
|
"loss": 1.1207, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.0703179820511875e-05, |
|
"loss": 1.1227, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.067240339279339e-05, |
|
"loss": 1.1221, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.064162696507491e-05, |
|
"loss": 1.1189, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.0610850537356426e-05, |
|
"loss": 1.1168, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.058007410963794e-05, |
|
"loss": 1.1153, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.054929768191947e-05, |
|
"loss": 1.1149, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.0518521254200984e-05, |
|
"loss": 1.1202, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.04877448264825e-05, |
|
"loss": 1.1233, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.045696839876402e-05, |
|
"loss": 1.1165, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.0426191971045536e-05, |
|
"loss": 1.1274, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0395415543327056e-05, |
|
"loss": 1.1161, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0364639115608573e-05, |
|
"loss": 1.1149, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.033386268789009e-05, |
|
"loss": 1.1194, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.030308626017161e-05, |
|
"loss": 1.1261, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0272309832453128e-05, |
|
"loss": 1.1206, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0241533404734645e-05, |
|
"loss": 1.1144, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0210756977016162e-05, |
|
"loss": 1.1186, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0179980549297682e-05, |
|
"loss": 1.129, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.01492041215792e-05, |
|
"loss": 1.1249, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0118427693860717e-05, |
|
"loss": 1.121, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0087651266142237e-05, |
|
"loss": 1.1155, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.0056874838423754e-05, |
|
"loss": 1.1212, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.002609841070527e-05, |
|
"loss": 1.1076, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.9995321982986792e-05, |
|
"loss": 1.1176, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.996454555526831e-05, |
|
"loss": 1.1167, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.9933769127549826e-05, |
|
"loss": 1.1186, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9902992699831346e-05, |
|
"loss": 1.1161, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9872216272112864e-05, |
|
"loss": 1.1168, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.984143984439438e-05, |
|
"loss": 1.1132, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9810663416675898e-05, |
|
"loss": 1.1185, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9779886988957418e-05, |
|
"loss": 1.1088, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9749110561238935e-05, |
|
"loss": 1.1146, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9718334133520452e-05, |
|
"loss": 1.1189, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9687557705801973e-05, |
|
"loss": 1.1159, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.965678127808349e-05, |
|
"loss": 1.1174, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9626004850365007e-05, |
|
"loss": 1.1194, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9595228422646528e-05, |
|
"loss": 1.1091, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9564451994928045e-05, |
|
"loss": 1.112, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.953367556720956e-05, |
|
"loss": 1.112, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.950289913949108e-05, |
|
"loss": 1.1104, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.94721227117726e-05, |
|
"loss": 1.1053, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9441346284054116e-05, |
|
"loss": 1.1106, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9410569856335633e-05, |
|
"loss": 1.1185, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9379793428617154e-05, |
|
"loss": 1.1113, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.934901700089867e-05, |
|
"loss": 1.1096, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9318240573180188e-05, |
|
"loss": 1.1085, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.928746414546171e-05, |
|
"loss": 1.1137, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9256687717743226e-05, |
|
"loss": 1.1075, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9225911290024743e-05, |
|
"loss": 1.1093, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.919513486230626e-05, |
|
"loss": 1.1159, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.916435843458778e-05, |
|
"loss": 1.1063, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9133582006869297e-05, |
|
"loss": 1.1135, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9102805579150815e-05, |
|
"loss": 1.1186, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.9072029151432335e-05, |
|
"loss": 1.1128, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.9041252723713852e-05, |
|
"loss": 1.1148, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.901047629599537e-05, |
|
"loss": 1.1056, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.897969986827689e-05, |
|
"loss": 1.1153, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.8948923440558407e-05, |
|
"loss": 1.1147, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.8918147012839924e-05, |
|
"loss": 1.1155, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8887370585121444e-05, |
|
"loss": 1.1132, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.885659415740296e-05, |
|
"loss": 1.1072, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.882581772968448e-05, |
|
"loss": 1.1175, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8795041301965996e-05, |
|
"loss": 1.1048, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8764264874247516e-05, |
|
"loss": 1.1126, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.8733488446529033e-05, |
|
"loss": 1.1107, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.870271201881055e-05, |
|
"loss": 1.1108, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.867193559109207e-05, |
|
"loss": 1.1062, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.8641159163373588e-05, |
|
"loss": 1.1083, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.8610382735655105e-05, |
|
"loss": 1.0995, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8579606307936625e-05, |
|
"loss": 1.1143, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8548829880218142e-05, |
|
"loss": 1.1146, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.851805345249966e-05, |
|
"loss": 1.1144, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8487277024781177e-05, |
|
"loss": 1.1108, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8456500597062697e-05, |
|
"loss": 1.1105, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.842572416934422e-05, |
|
"loss": 1.1059, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8394947741625738e-05, |
|
"loss": 1.1037, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8364171313907255e-05, |
|
"loss": 1.1111, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8333394886188776e-05, |
|
"loss": 1.1103, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8302618458470293e-05, |
|
"loss": 1.1081, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.827184203075181e-05, |
|
"loss": 1.1095, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.824106560303333e-05, |
|
"loss": 1.1069, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.8210289175314847e-05, |
|
"loss": 1.1134, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.8179512747596364e-05, |
|
"loss": 1.108, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.814873631987788e-05, |
|
"loss": 1.1124, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.8117959892159402e-05, |
|
"loss": 1.1144, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.808718346444092e-05, |
|
"loss": 1.1154, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.8056407036722436e-05, |
|
"loss": 1.1116, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.8025630609003957e-05, |
|
"loss": 1.1119, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.7994854181285474e-05, |
|
"loss": 1.1138, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.796407775356699e-05, |
|
"loss": 1.1047, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.793330132584851e-05, |
|
"loss": 1.1068, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.790252489813003e-05, |
|
"loss": 1.1063, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.7871748470411546e-05, |
|
"loss": 1.0996, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.7840972042693063e-05, |
|
"loss": 1.1076, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.7810195614974583e-05, |
|
"loss": 1.0979, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.77794191872561e-05, |
|
"loss": 1.1051, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7748642759537617e-05, |
|
"loss": 1.1009, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7717866331819138e-05, |
|
"loss": 1.104, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7687089904100655e-05, |
|
"loss": 1.108, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7656313476382172e-05, |
|
"loss": 1.0968, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7625537048663692e-05, |
|
"loss": 1.1071, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.759476062094521e-05, |
|
"loss": 1.0957, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7563984193226727e-05, |
|
"loss": 1.1063, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7533207765508247e-05, |
|
"loss": 1.1008, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7502431337789764e-05, |
|
"loss": 1.1065, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.747165491007128e-05, |
|
"loss": 1.1019, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.74408784823528e-05, |
|
"loss": 1.1028, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.741010205463432e-05, |
|
"loss": 1.0995, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7379325626915836e-05, |
|
"loss": 1.1066, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7348549199197353e-05, |
|
"loss": 1.0997, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7317772771478873e-05, |
|
"loss": 1.1093, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.728699634376039e-05, |
|
"loss": 1.1062, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7256219916041908e-05, |
|
"loss": 1.103, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.7225443488323428e-05, |
|
"loss": 1.115, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.7194667060604945e-05, |
|
"loss": 1.0995, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.7163890632886462e-05, |
|
"loss": 1.0975, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.713311420516798e-05, |
|
"loss": 1.0929, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.71023377774495e-05, |
|
"loss": 1.1008, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.7071561349731017e-05, |
|
"loss": 1.0988, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.7040784922012534e-05, |
|
"loss": 1.0953, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.7010008494294055e-05, |
|
"loss": 1.1023, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.697923206657557e-05, |
|
"loss": 1.1068, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.694845563885709e-05, |
|
"loss": 1.1052, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.691767921113861e-05, |
|
"loss": 1.0968, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6886902783420126e-05, |
|
"loss": 1.1074, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6856126355701643e-05, |
|
"loss": 1.0953, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.682534992798316e-05, |
|
"loss": 1.1027, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.679457350026468e-05, |
|
"loss": 1.0974, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6763797072546198e-05, |
|
"loss": 1.0923, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6733020644827715e-05, |
|
"loss": 1.0968, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6702244217109236e-05, |
|
"loss": 1.1017, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6671467789390753e-05, |
|
"loss": 1.1037, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.664069136167227e-05, |
|
"loss": 1.1003, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.660991493395379e-05, |
|
"loss": 1.102, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.6579138506235307e-05, |
|
"loss": 1.0969, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.6548362078516824e-05, |
|
"loss": 1.1022, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.6517585650798345e-05, |
|
"loss": 1.0969, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.6486809223079862e-05, |
|
"loss": 1.0876, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.645603279536138e-05, |
|
"loss": 1.1026, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.6425256367642896e-05, |
|
"loss": 1.0973, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6394479939924417e-05, |
|
"loss": 1.1026, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6363703512205934e-05, |
|
"loss": 1.0965, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.633292708448745e-05, |
|
"loss": 1.0984, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.630215065676897e-05, |
|
"loss": 1.101, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.627137422905049e-05, |
|
"loss": 1.0917, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6240597801332005e-05, |
|
"loss": 1.0949, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6209821373613526e-05, |
|
"loss": 1.101, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6179044945895043e-05, |
|
"loss": 1.0962, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.614826851817656e-05, |
|
"loss": 1.1, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6117492090458077e-05, |
|
"loss": 1.0923, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6086715662739598e-05, |
|
"loss": 1.0999, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6055939235021115e-05, |
|
"loss": 1.0905, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6025162807302632e-05, |
|
"loss": 1.1041, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.5994386379584152e-05, |
|
"loss": 1.1014, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.596360995186567e-05, |
|
"loss": 1.0937, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.5932833524147187e-05, |
|
"loss": 1.1003, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5902057096428707e-05, |
|
"loss": 1.1029, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5871280668710224e-05, |
|
"loss": 1.0963, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.584050424099174e-05, |
|
"loss": 1.0957, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5809727813273262e-05, |
|
"loss": 1.1007, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.577895138555478e-05, |
|
"loss": 1.1006, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5748174957836296e-05, |
|
"loss": 1.0938, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5717398530117813e-05, |
|
"loss": 1.0928, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5686622102399333e-05, |
|
"loss": 1.0849, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.565584567468085e-05, |
|
"loss": 1.0985, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5625069246962368e-05, |
|
"loss": 1.0899, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5594292819243888e-05, |
|
"loss": 1.0957, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5563516391525405e-05, |
|
"loss": 1.0867, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5532739963806922e-05, |
|
"loss": 1.1027, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5501963536088443e-05, |
|
"loss": 1.0899, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.547118710836996e-05, |
|
"loss": 1.0836, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5440410680651477e-05, |
|
"loss": 1.0939, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5409634252932994e-05, |
|
"loss": 1.0925, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5378857825214515e-05, |
|
"loss": 1.1019, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.534808139749603e-05, |
|
"loss": 1.1007, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.531730496977755e-05, |
|
"loss": 1.0905, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.528652854205907e-05, |
|
"loss": 1.0936, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5255752114340586e-05, |
|
"loss": 1.0953, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5224975686622103e-05, |
|
"loss": 1.0967, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5194199258903624e-05, |
|
"loss": 1.0934, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.516342283118514e-05, |
|
"loss": 1.1003, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5132646403466658e-05, |
|
"loss": 1.0893, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5101869975748175e-05, |
|
"loss": 1.0878, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5071093548029696e-05, |
|
"loss": 1.0921, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5040317120311213e-05, |
|
"loss": 1.0976, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.500954069259273e-05, |
|
"loss": 1.0978, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.497876426487425e-05, |
|
"loss": 1.0948, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.4947987837155767e-05, |
|
"loss": 1.0894, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.4917211409437284e-05, |
|
"loss": 1.0933, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4886434981718805e-05, |
|
"loss": 1.0835, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4855658554000322e-05, |
|
"loss": 1.0971, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.482488212628184e-05, |
|
"loss": 1.0921, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.479410569856336e-05, |
|
"loss": 1.085, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4763329270844877e-05, |
|
"loss": 1.0838, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4732552843126394e-05, |
|
"loss": 1.0863, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.470177641540791e-05, |
|
"loss": 1.0882, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.467099998768943e-05, |
|
"loss": 1.0895, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.464022355997095e-05, |
|
"loss": 1.0949, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4609447132252465e-05, |
|
"loss": 1.0864, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4578670704533986e-05, |
|
"loss": 1.0848, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4547894276815503e-05, |
|
"loss": 1.0895, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.451711784909702e-05, |
|
"loss": 1.0822, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.448634142137854e-05, |
|
"loss": 1.0859, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4455564993660058e-05, |
|
"loss": 1.0809, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4424788565941575e-05, |
|
"loss": 1.0877, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4394012138223092e-05, |
|
"loss": 1.0822, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4363235710504612e-05, |
|
"loss": 1.0981, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.433245928278613e-05, |
|
"loss": 1.0834, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4301682855067647e-05, |
|
"loss": 1.0893, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4270906427349167e-05, |
|
"loss": 1.0872, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4240129999630684e-05, |
|
"loss": 1.0805, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.42093535719122e-05, |
|
"loss": 1.0848, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.417857714419372e-05, |
|
"loss": 1.0869, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.414780071647524e-05, |
|
"loss": 1.0846, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4117024288756756e-05, |
|
"loss": 1.0872, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4086247861038276e-05, |
|
"loss": 1.0835, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4055471433319793e-05, |
|
"loss": 1.0825, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.402469500560131e-05, |
|
"loss": 1.0898, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.3993918577882828e-05, |
|
"loss": 1.0779, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.3963142150164348e-05, |
|
"loss": 1.0823, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.3932365722445865e-05, |
|
"loss": 1.084, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3901589294727382e-05, |
|
"loss": 1.0851, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3870812867008903e-05, |
|
"loss": 1.086, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.384003643929042e-05, |
|
"loss": 1.0877, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3809260011571937e-05, |
|
"loss": 1.0923, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3778483583853457e-05, |
|
"loss": 1.0843, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3747707156134974e-05, |
|
"loss": 1.0748, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.371693072841649e-05, |
|
"loss": 1.0833, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.368615430069801e-05, |
|
"loss": 1.0839, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.365537787297953e-05, |
|
"loss": 1.0942, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3624601445261046e-05, |
|
"loss": 1.0913, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3593825017542563e-05, |
|
"loss": 1.093, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3563048589824084e-05, |
|
"loss": 1.0757, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.35322721621056e-05, |
|
"loss": 1.0821, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3501495734387118e-05, |
|
"loss": 1.0827, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.347071930666864e-05, |
|
"loss": 1.0845, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3439942878950156e-05, |
|
"loss": 1.0876, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3409166451231673e-05, |
|
"loss": 1.0776, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.337839002351319e-05, |
|
"loss": 1.0837, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.334761359579471e-05, |
|
"loss": 1.075, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3316837168076227e-05, |
|
"loss": 1.0931, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3286060740357744e-05, |
|
"loss": 1.0905, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3255284312639265e-05, |
|
"loss": 1.0864, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3224507884920782e-05, |
|
"loss": 1.0845, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.31937314572023e-05, |
|
"loss": 1.0933, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.316295502948382e-05, |
|
"loss": 1.076, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3132178601765337e-05, |
|
"loss": 1.0816, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3101402174046854e-05, |
|
"loss": 1.0791, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.3070625746328374e-05, |
|
"loss": 1.0818, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.303984931860989e-05, |
|
"loss": 1.0844, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.300907289089141e-05, |
|
"loss": 1.0735, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.2978296463172925e-05, |
|
"loss": 1.0766, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.2947520035454446e-05, |
|
"loss": 1.0827, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.2916743607735963e-05, |
|
"loss": 1.0855, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.288596718001748e-05, |
|
"loss": 1.074, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.2855190752299e-05, |
|
"loss": 1.0849, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.2824414324580518e-05, |
|
"loss": 1.0771, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.2793637896862035e-05, |
|
"loss": 1.0858, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.2762861469143555e-05, |
|
"loss": 1.0815, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2732085041425072e-05, |
|
"loss": 1.0783, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.270130861370659e-05, |
|
"loss": 1.0707, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2670532185988107e-05, |
|
"loss": 1.0763, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2639755758269627e-05, |
|
"loss": 1.0802, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2608979330551144e-05, |
|
"loss": 1.0757, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.257820290283266e-05, |
|
"loss": 1.0804, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.254742647511418e-05, |
|
"loss": 1.0786, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.25166500473957e-05, |
|
"loss": 1.0785, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.2485873619677216e-05, |
|
"loss": 1.0696, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.2455097191958736e-05, |
|
"loss": 1.072, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.2424320764240253e-05, |
|
"loss": 1.0787, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.239354433652177e-05, |
|
"loss": 1.0763, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.236276790880329e-05, |
|
"loss": 1.0783, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.2331991481084808e-05, |
|
"loss": 1.0721, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.2301215053366325e-05, |
|
"loss": 1.0833, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.2270438625647842e-05, |
|
"loss": 1.0705, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2239662197929363e-05, |
|
"loss": 1.0791, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.220888577021088e-05, |
|
"loss": 1.0793, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2178109342492397e-05, |
|
"loss": 1.0814, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2147332914773917e-05, |
|
"loss": 1.0752, |
|
"step": 452500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2116556487055434e-05, |
|
"loss": 1.0876, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.208578005933695e-05, |
|
"loss": 1.072, |
|
"step": 453500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.2055003631618472e-05, |
|
"loss": 1.0739, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.202422720389999e-05, |
|
"loss": 1.0814, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.1993450776181506e-05, |
|
"loss": 1.0705, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.1962674348463027e-05, |
|
"loss": 1.0753, |
|
"step": 455500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.1931897920744547e-05, |
|
"loss": 1.0834, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.1901121493026064e-05, |
|
"loss": 1.0849, |
|
"step": 456500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.187034506530758e-05, |
|
"loss": 1.0623, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.18395686375891e-05, |
|
"loss": 1.0664, |
|
"step": 457500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.180879220987062e-05, |
|
"loss": 1.0791, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.1778015782152136e-05, |
|
"loss": 1.0707, |
|
"step": 458500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1747239354433653e-05, |
|
"loss": 1.0701, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1716462926715174e-05, |
|
"loss": 1.0753, |
|
"step": 459500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.168568649899669e-05, |
|
"loss": 1.0739, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1654910071278208e-05, |
|
"loss": 1.0732, |
|
"step": 460500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1624133643559728e-05, |
|
"loss": 1.068, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1593357215841245e-05, |
|
"loss": 1.0816, |
|
"step": 461500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.1562580788122762e-05, |
|
"loss": 1.0682, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.1531804360404283e-05, |
|
"loss": 1.0639, |
|
"step": 462500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.15010279326858e-05, |
|
"loss": 1.0723, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.1470251504967317e-05, |
|
"loss": 1.074, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.1439475077248834e-05, |
|
"loss": 1.0734, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.1408698649530355e-05, |
|
"loss": 1.0698, |
|
"step": 464500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.1377922221811872e-05, |
|
"loss": 1.0669, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.134714579409339e-05, |
|
"loss": 1.0744, |
|
"step": 465500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.131636936637491e-05, |
|
"loss": 1.0741, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.1285592938656426e-05, |
|
"loss": 1.068, |
|
"step": 466500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.1254816510937943e-05, |
|
"loss": 1.0707, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1224040083219464e-05, |
|
"loss": 1.0806, |
|
"step": 467500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.119326365550098e-05, |
|
"loss": 1.0701, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1162487227782498e-05, |
|
"loss": 1.08, |
|
"step": 468500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1131710800064015e-05, |
|
"loss": 1.0767, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1100934372345536e-05, |
|
"loss": 1.0618, |
|
"step": 469500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.1070157944627053e-05, |
|
"loss": 1.0704, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.103938151690857e-05, |
|
"loss": 1.0647, |
|
"step": 470500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.100860508919009e-05, |
|
"loss": 1.0657, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.0977828661471607e-05, |
|
"loss": 1.064, |
|
"step": 471500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.0947052233753125e-05, |
|
"loss": 1.0709, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.0916275806034645e-05, |
|
"loss": 1.0733, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.0885499378316162e-05, |
|
"loss": 1.0783, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.085472295059768e-05, |
|
"loss": 1.0638, |
|
"step": 473500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.08239465228792e-05, |
|
"loss": 1.0615, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.0793170095160717e-05, |
|
"loss": 1.0698, |
|
"step": 474500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.0762393667442234e-05, |
|
"loss": 1.0687, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.073161723972375e-05, |
|
"loss": 1.0697, |
|
"step": 475500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.070084081200527e-05, |
|
"loss": 1.0668, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.067006438428679e-05, |
|
"loss": 1.0686, |
|
"step": 476500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.0639287956568306e-05, |
|
"loss": 1.0624, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.0608511528849826e-05, |
|
"loss": 1.0668, |
|
"step": 477500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.0577735101131343e-05, |
|
"loss": 1.0646, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.054695867341286e-05, |
|
"loss": 1.0695, |
|
"step": 478500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.051618224569438e-05, |
|
"loss": 1.0713, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.0485405817975898e-05, |
|
"loss": 1.0712, |
|
"step": 479500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.0454629390257415e-05, |
|
"loss": 1.065, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.0423852962538932e-05, |
|
"loss": 1.0627, |
|
"step": 480500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.0393076534820452e-05, |
|
"loss": 1.0649, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.036230010710197e-05, |
|
"loss": 1.0738, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.0331523679383487e-05, |
|
"loss": 1.0716, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.0300747251665007e-05, |
|
"loss": 1.0641, |
|
"step": 482500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.0269970823946524e-05, |
|
"loss": 1.0614, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.023919439622804e-05, |
|
"loss": 1.0647, |
|
"step": 483500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0208417968509562e-05, |
|
"loss": 1.0593, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.017764154079108e-05, |
|
"loss": 1.0743, |
|
"step": 484500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0146865113072596e-05, |
|
"loss": 1.0641, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0116088685354113e-05, |
|
"loss": 1.0732, |
|
"step": 485500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0085312257635634e-05, |
|
"loss": 1.0735, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.005453582991715e-05, |
|
"loss": 1.0573, |
|
"step": 486500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0023759402198668e-05, |
|
"loss": 1.0641, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.9992982974480188e-05, |
|
"loss": 1.0585, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.9962206546761705e-05, |
|
"loss": 1.068, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.9931430119043222e-05, |
|
"loss": 1.0679, |
|
"step": 488500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9900653691324743e-05, |
|
"loss": 1.0714, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.986987726360626e-05, |
|
"loss": 1.0592, |
|
"step": 489500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9839100835887777e-05, |
|
"loss": 1.0718, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9808324408169297e-05, |
|
"loss": 1.0697, |
|
"step": 490500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9777547980450815e-05, |
|
"loss": 1.0602, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.974677155273233e-05, |
|
"loss": 1.0675, |
|
"step": 491500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.971599512501385e-05, |
|
"loss": 1.0659, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.968521869729537e-05, |
|
"loss": 1.0687, |
|
"step": 492500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9654442269576886e-05, |
|
"loss": 1.0665, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9623665841858403e-05, |
|
"loss": 1.0683, |
|
"step": 493500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9592889414139924e-05, |
|
"loss": 1.0626, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.956211298642144e-05, |
|
"loss": 1.0596, |
|
"step": 494500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9531336558702958e-05, |
|
"loss": 1.065, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.950056013098448e-05, |
|
"loss": 1.0709, |
|
"step": 495500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9469783703265996e-05, |
|
"loss": 1.0693, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9439007275547513e-05, |
|
"loss": 1.0558, |
|
"step": 496500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.940823084782903e-05, |
|
"loss": 1.0681, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.937745442011055e-05, |
|
"loss": 1.0686, |
|
"step": 497500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9346677992392067e-05, |
|
"loss": 1.0641, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9315901564673584e-05, |
|
"loss": 1.0601, |
|
"step": 498500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9285125136955105e-05, |
|
"loss": 1.0638, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9254348709236622e-05, |
|
"loss": 1.0572, |
|
"step": 499500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.922357228151814e-05, |
|
"loss": 1.063, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.919279585379966e-05, |
|
"loss": 1.0585, |
|
"step": 500500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9162019426081177e-05, |
|
"loss": 1.0664, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9131242998362694e-05, |
|
"loss": 1.0591, |
|
"step": 501500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9100466570644214e-05, |
|
"loss": 1.0509, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.906969014292573e-05, |
|
"loss": 1.0532, |
|
"step": 502500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.903891371520725e-05, |
|
"loss": 1.0721, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9008137287488766e-05, |
|
"loss": 1.0554, |
|
"step": 503500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.8977360859770286e-05, |
|
"loss": 1.0689, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.8946584432051803e-05, |
|
"loss": 1.0692, |
|
"step": 504500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.891580800433332e-05, |
|
"loss": 1.0658, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.888503157661484e-05, |
|
"loss": 1.0602, |
|
"step": 505500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8854255148896358e-05, |
|
"loss": 1.0624, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8823478721177875e-05, |
|
"loss": 1.0545, |
|
"step": 506500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8792702293459395e-05, |
|
"loss": 1.0551, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8761925865740912e-05, |
|
"loss": 1.0572, |
|
"step": 507500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.873114943802243e-05, |
|
"loss": 1.0695, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8700373010303947e-05, |
|
"loss": 1.0517, |
|
"step": 508500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8669596582585467e-05, |
|
"loss": 1.0665, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8638820154866984e-05, |
|
"loss": 1.0576, |
|
"step": 509500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.86080437271485e-05, |
|
"loss": 1.0605, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8577267299430022e-05, |
|
"loss": 1.0606, |
|
"step": 510500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.854649087171154e-05, |
|
"loss": 1.0629, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8515714443993056e-05, |
|
"loss": 1.06, |
|
"step": 511500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8484938016274576e-05, |
|
"loss": 1.0572, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8454161588556093e-05, |
|
"loss": 1.059, |
|
"step": 512500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.842338516083761e-05, |
|
"loss": 1.0534, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8392608733119128e-05, |
|
"loss": 1.0593, |
|
"step": 513500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8361832305400648e-05, |
|
"loss": 1.065, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8331055877682165e-05, |
|
"loss": 1.0593, |
|
"step": 514500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8300279449963682e-05, |
|
"loss": 1.0554, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8269503022245203e-05, |
|
"loss": 1.0605, |
|
"step": 515500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.823872659452672e-05, |
|
"loss": 1.0586, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8207950166808237e-05, |
|
"loss": 1.0597, |
|
"step": 516500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8177173739089757e-05, |
|
"loss": 1.0561, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8146397311371275e-05, |
|
"loss": 1.0484, |
|
"step": 517500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.811562088365279e-05, |
|
"loss": 1.0564, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8084844455934312e-05, |
|
"loss": 1.0528, |
|
"step": 518500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.805406802821583e-05, |
|
"loss": 1.0546, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.8023291600497346e-05, |
|
"loss": 1.0573, |
|
"step": 519500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.7992515172778863e-05, |
|
"loss": 1.0481, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.7961738745060384e-05, |
|
"loss": 1.0636, |
|
"step": 520500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.79309623173419e-05, |
|
"loss": 1.0585, |
|
"step": 521000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7900185889623418e-05, |
|
"loss": 1.0595, |
|
"step": 521500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.786940946190494e-05, |
|
"loss": 1.0621, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7838633034186456e-05, |
|
"loss": 1.0544, |
|
"step": 522500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7807856606467973e-05, |
|
"loss": 1.0543, |
|
"step": 523000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7777080178749493e-05, |
|
"loss": 1.0646, |
|
"step": 523500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.774630375103101e-05, |
|
"loss": 1.0572, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7715527323312527e-05, |
|
"loss": 1.0555, |
|
"step": 524500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7684750895594044e-05, |
|
"loss": 1.06, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.765397446787557e-05, |
|
"loss": 1.0527, |
|
"step": 525500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7623198040157085e-05, |
|
"loss": 1.0552, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7592421612438603e-05, |
|
"loss": 1.0586, |
|
"step": 526500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7561645184720123e-05, |
|
"loss": 1.0532, |
|
"step": 527000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.753086875700164e-05, |
|
"loss": 1.0546, |
|
"step": 527500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7500092329283157e-05, |
|
"loss": 1.0597, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7469315901564674e-05, |
|
"loss": 1.0543, |
|
"step": 528500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7438539473846195e-05, |
|
"loss": 1.0525, |
|
"step": 529000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7407763046127712e-05, |
|
"loss": 1.0597, |
|
"step": 529500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.737698661840923e-05, |
|
"loss": 1.0478, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.734621019069075e-05, |
|
"loss": 1.0532, |
|
"step": 530500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7315433762972266e-05, |
|
"loss": 1.0545, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7284657335253784e-05, |
|
"loss": 1.0438, |
|
"step": 531500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7253880907535304e-05, |
|
"loss": 1.0544, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.722310447981682e-05, |
|
"loss": 1.0539, |
|
"step": 532500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7192328052098338e-05, |
|
"loss": 1.0544, |
|
"step": 533000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7161551624379855e-05, |
|
"loss": 1.0535, |
|
"step": 533500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7130775196661376e-05, |
|
"loss": 1.0501, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7099998768942893e-05, |
|
"loss": 1.0558, |
|
"step": 534500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.706922234122441e-05, |
|
"loss": 1.0466, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.703844591350593e-05, |
|
"loss": 1.0561, |
|
"step": 535500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7007669485787448e-05, |
|
"loss": 1.0504, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.6976893058068965e-05, |
|
"loss": 1.0419, |
|
"step": 536500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.6946116630350485e-05, |
|
"loss": 1.0501, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6915340202632002e-05, |
|
"loss": 1.0584, |
|
"step": 537500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.688456377491352e-05, |
|
"loss": 1.0499, |
|
"step": 538000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6853787347195036e-05, |
|
"loss": 1.0525, |
|
"step": 538500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6823010919476557e-05, |
|
"loss": 1.0508, |
|
"step": 539000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6792234491758074e-05, |
|
"loss": 1.0516, |
|
"step": 539500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.676145806403959e-05, |
|
"loss": 1.0469, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.673068163632111e-05, |
|
"loss": 1.0507, |
|
"step": 540500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.669990520860263e-05, |
|
"loss": 1.053, |
|
"step": 541000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6669128780884146e-05, |
|
"loss": 1.0577, |
|
"step": 541500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6638352353165666e-05, |
|
"loss": 1.0475, |
|
"step": 542000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6607575925447183e-05, |
|
"loss": 1.0467, |
|
"step": 542500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.65767994977287e-05, |
|
"loss": 1.0514, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.654602307001022e-05, |
|
"loss": 1.0497, |
|
"step": 543500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.6515246642291738e-05, |
|
"loss": 1.0483, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.6484470214573255e-05, |
|
"loss": 1.0512, |
|
"step": 544500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.6453693786854772e-05, |
|
"loss": 1.0497, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.6422917359136293e-05, |
|
"loss": 1.0509, |
|
"step": 545500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.639214093141781e-05, |
|
"loss": 1.0495, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.6361364503699327e-05, |
|
"loss": 1.0474, |
|
"step": 546500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.6330588075980847e-05, |
|
"loss": 1.0404, |
|
"step": 547000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.6299811648262364e-05, |
|
"loss": 1.0409, |
|
"step": 547500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.626903522054388e-05, |
|
"loss": 1.0383, |
|
"step": 548000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.6238258792825402e-05, |
|
"loss": 1.0425, |
|
"step": 548500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.620748236510692e-05, |
|
"loss": 1.0421, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.6176705937388436e-05, |
|
"loss": 1.0518, |
|
"step": 549500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.6145929509669953e-05, |
|
"loss": 1.0409, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.6115153081951474e-05, |
|
"loss": 1.0464, |
|
"step": 550500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.608437665423299e-05, |
|
"loss": 1.038, |
|
"step": 551000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.6053600226514508e-05, |
|
"loss": 1.0489, |
|
"step": 551500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.6022823798796028e-05, |
|
"loss": 1.0533, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.5992047371077545e-05, |
|
"loss": 1.0447, |
|
"step": 552500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.5961270943359062e-05, |
|
"loss": 1.0416, |
|
"step": 553000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.5930494515640583e-05, |
|
"loss": 1.0463, |
|
"step": 553500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.58997180879221e-05, |
|
"loss": 1.0365, |
|
"step": 554000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.5868941660203617e-05, |
|
"loss": 1.0371, |
|
"step": 554500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.5838165232485138e-05, |
|
"loss": 1.0506, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.5807388804766655e-05, |
|
"loss": 1.0464, |
|
"step": 555500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.5776612377048172e-05, |
|
"loss": 1.0452, |
|
"step": 556000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.574583594932969e-05, |
|
"loss": 1.0499, |
|
"step": 556500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.571505952161121e-05, |
|
"loss": 1.0477, |
|
"step": 557000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.5684283093892726e-05, |
|
"loss": 1.0486, |
|
"step": 557500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.5653506666174244e-05, |
|
"loss": 1.0367, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.5622730238455764e-05, |
|
"loss": 1.0467, |
|
"step": 558500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.559195381073728e-05, |
|
"loss": 1.0455, |
|
"step": 559000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.5561177383018798e-05, |
|
"loss": 1.0463, |
|
"step": 559500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.553040095530032e-05, |
|
"loss": 1.0446, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.5499624527581836e-05, |
|
"loss": 1.0366, |
|
"step": 560500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.5468848099863353e-05, |
|
"loss": 1.0396, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.543807167214487e-05, |
|
"loss": 1.041, |
|
"step": 561500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.540729524442639e-05, |
|
"loss": 1.0454, |
|
"step": 562000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.5376518816707908e-05, |
|
"loss": 1.0422, |
|
"step": 562500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.5345742388989425e-05, |
|
"loss": 1.042, |
|
"step": 563000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.5314965961270945e-05, |
|
"loss": 1.0425, |
|
"step": 563500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.5284189533552462e-05, |
|
"loss": 1.0461, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.5253413105833981e-05, |
|
"loss": 1.0457, |
|
"step": 564500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5222636678115498e-05, |
|
"loss": 1.0344, |
|
"step": 565000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5191860250397017e-05, |
|
"loss": 1.0468, |
|
"step": 565500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5161083822678534e-05, |
|
"loss": 1.0469, |
|
"step": 566000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5130307394960053e-05, |
|
"loss": 1.0401, |
|
"step": 566500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5099530967241571e-05, |
|
"loss": 1.0404, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.5068754539523089e-05, |
|
"loss": 1.0428, |
|
"step": 567500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.5037978111804607e-05, |
|
"loss": 1.0439, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.5007201684086124e-05, |
|
"loss": 1.0397, |
|
"step": 568500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.4976425256367643e-05, |
|
"loss": 1.0346, |
|
"step": 569000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.4945648828649162e-05, |
|
"loss": 1.0366, |
|
"step": 569500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4914872400930679e-05, |
|
"loss": 1.0415, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4884095973212198e-05, |
|
"loss": 1.0388, |
|
"step": 570500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4853319545493715e-05, |
|
"loss": 1.0404, |
|
"step": 571000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4822543117775234e-05, |
|
"loss": 1.0359, |
|
"step": 571500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4791766690056753e-05, |
|
"loss": 1.0466, |
|
"step": 572000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.476099026233827e-05, |
|
"loss": 1.0335, |
|
"step": 572500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.4730213834619788e-05, |
|
"loss": 1.0328, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.4699437406901306e-05, |
|
"loss": 1.0387, |
|
"step": 573500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.4668660979182824e-05, |
|
"loss": 1.0415, |
|
"step": 574000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.4637884551464343e-05, |
|
"loss": 1.0438, |
|
"step": 574500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.460710812374586e-05, |
|
"loss": 1.0379, |
|
"step": 575000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4576331696027379e-05, |
|
"loss": 1.0424, |
|
"step": 575500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4545555268308898e-05, |
|
"loss": 1.0308, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4514778840590415e-05, |
|
"loss": 1.0334, |
|
"step": 576500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4484002412871934e-05, |
|
"loss": 1.0349, |
|
"step": 577000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.445322598515345e-05, |
|
"loss": 1.0468, |
|
"step": 577500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.442244955743497e-05, |
|
"loss": 1.0394, |
|
"step": 578000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.4391673129716488e-05, |
|
"loss": 1.0469, |
|
"step": 578500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.4360896701998005e-05, |
|
"loss": 1.0423, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.4330120274279524e-05, |
|
"loss": 1.0297, |
|
"step": 579500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.4299343846561041e-05, |
|
"loss": 1.0411, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.426856741884256e-05, |
|
"loss": 1.0392, |
|
"step": 580500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4237790991124079e-05, |
|
"loss": 1.0259, |
|
"step": 581000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4207014563405596e-05, |
|
"loss": 1.035, |
|
"step": 581500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4176238135687115e-05, |
|
"loss": 1.0395, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4145461707968632e-05, |
|
"loss": 1.0358, |
|
"step": 582500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.411468528025015e-05, |
|
"loss": 1.0413, |
|
"step": 583000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.408390885253167e-05, |
|
"loss": 1.0329, |
|
"step": 583500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4053132424813186e-05, |
|
"loss": 1.039, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4022355997094705e-05, |
|
"loss": 1.0357, |
|
"step": 584500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.3991579569376222e-05, |
|
"loss": 1.0297, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.3960803141657741e-05, |
|
"loss": 1.0311, |
|
"step": 585500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.393002671393926e-05, |
|
"loss": 1.0412, |
|
"step": 586000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3899250286220777e-05, |
|
"loss": 1.0364, |
|
"step": 586500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3868473858502296e-05, |
|
"loss": 1.0375, |
|
"step": 587000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3837697430783813e-05, |
|
"loss": 1.0295, |
|
"step": 587500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3806921003065332e-05, |
|
"loss": 1.0396, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.377614457534685e-05, |
|
"loss": 1.0308, |
|
"step": 588500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3745368147628367e-05, |
|
"loss": 1.0396, |
|
"step": 589000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3714591719909886e-05, |
|
"loss": 1.0346, |
|
"step": 589500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3683815292191405e-05, |
|
"loss": 1.0344, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3653038864472922e-05, |
|
"loss": 1.0344, |
|
"step": 590500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3622262436754441e-05, |
|
"loss": 1.0258, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3591486009035958e-05, |
|
"loss": 1.0383, |
|
"step": 591500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3560709581317477e-05, |
|
"loss": 1.0299, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3529933153598996e-05, |
|
"loss": 1.0326, |
|
"step": 592500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3499156725880513e-05, |
|
"loss": 1.0393, |
|
"step": 593000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3468380298162031e-05, |
|
"loss": 1.0327, |
|
"step": 593500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3437603870443549e-05, |
|
"loss": 1.0373, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3406827442725067e-05, |
|
"loss": 1.038, |
|
"step": 594500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3376051015006586e-05, |
|
"loss": 1.0346, |
|
"step": 595000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3345274587288107e-05, |
|
"loss": 1.025, |
|
"step": 595500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3314498159569624e-05, |
|
"loss": 1.0369, |
|
"step": 596000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3283721731851142e-05, |
|
"loss": 1.027, |
|
"step": 596500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3252945304132661e-05, |
|
"loss": 1.037, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3222168876414178e-05, |
|
"loss": 1.0388, |
|
"step": 597500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3191392448695697e-05, |
|
"loss": 1.0262, |
|
"step": 598000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3160616020977214e-05, |
|
"loss": 1.0306, |
|
"step": 598500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3129839593258733e-05, |
|
"loss": 1.0307, |
|
"step": 599000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3099063165540252e-05, |
|
"loss": 1.0347, |
|
"step": 599500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.3068286737821769e-05, |
|
"loss": 1.0303, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.3037510310103288e-05, |
|
"loss": 1.0367, |
|
"step": 600500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.3006733882384806e-05, |
|
"loss": 1.0267, |
|
"step": 601000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.2975957454666324e-05, |
|
"loss": 1.0302, |
|
"step": 601500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.2945181026947842e-05, |
|
"loss": 1.033, |
|
"step": 602000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.291440459922936e-05, |
|
"loss": 1.0207, |
|
"step": 602500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2883628171510878e-05, |
|
"loss": 1.0293, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2852851743792397e-05, |
|
"loss": 1.0309, |
|
"step": 603500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2822075316073914e-05, |
|
"loss": 1.04, |
|
"step": 604000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2791298888355433e-05, |
|
"loss": 1.029, |
|
"step": 604500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.276052246063695e-05, |
|
"loss": 1.0284, |
|
"step": 605000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2729746032918469e-05, |
|
"loss": 1.0186, |
|
"step": 605500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2698969605199987e-05, |
|
"loss": 1.0257, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2668193177481505e-05, |
|
"loss": 1.0252, |
|
"step": 606500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2637416749763023e-05, |
|
"loss": 1.0336, |
|
"step": 607000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.260664032204454e-05, |
|
"loss": 1.0255, |
|
"step": 607500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.257586389432606e-05, |
|
"loss": 1.0264, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.2545087466607578e-05, |
|
"loss": 1.0306, |
|
"step": 608500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.2514311038889095e-05, |
|
"loss": 1.0351, |
|
"step": 609000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.2483534611170612e-05, |
|
"loss": 1.0356, |
|
"step": 609500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.2452758183452131e-05, |
|
"loss": 1.024, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.2421981755733648e-05, |
|
"loss": 1.028, |
|
"step": 610500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2391205328015167e-05, |
|
"loss": 1.0263, |
|
"step": 611000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2360428900296684e-05, |
|
"loss": 1.0299, |
|
"step": 611500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2329652472578203e-05, |
|
"loss": 1.0258, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2298876044859722e-05, |
|
"loss": 1.0265, |
|
"step": 612500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.226809961714124e-05, |
|
"loss": 1.0224, |
|
"step": 613000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2237323189422759e-05, |
|
"loss": 1.0276, |
|
"step": 613500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2206546761704276e-05, |
|
"loss": 1.0318, |
|
"step": 614000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2175770333985795e-05, |
|
"loss": 1.0265, |
|
"step": 614500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2144993906267314e-05, |
|
"loss": 1.0305, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.211421747854883e-05, |
|
"loss": 1.0289, |
|
"step": 615500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.208344105083035e-05, |
|
"loss": 1.0209, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.2052664623111867e-05, |
|
"loss": 1.0253, |
|
"step": 616500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.2021888195393385e-05, |
|
"loss": 1.0313, |
|
"step": 617000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.1991111767674904e-05, |
|
"loss": 1.025, |
|
"step": 617500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.1960335339956421e-05, |
|
"loss": 1.0258, |
|
"step": 618000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.192955891223794e-05, |
|
"loss": 1.016, |
|
"step": 618500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1898782484519457e-05, |
|
"loss": 1.0291, |
|
"step": 619000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1868006056800976e-05, |
|
"loss": 1.0263, |
|
"step": 619500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1837229629082495e-05, |
|
"loss": 1.0274, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1806453201364012e-05, |
|
"loss": 1.0261, |
|
"step": 620500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.177567677364553e-05, |
|
"loss": 1.0293, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1744900345927048e-05, |
|
"loss": 1.0311, |
|
"step": 621500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1714123918208567e-05, |
|
"loss": 1.0236, |
|
"step": 622000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1683347490490085e-05, |
|
"loss": 1.0212, |
|
"step": 622500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1652571062771602e-05, |
|
"loss": 1.0271, |
|
"step": 623000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1621794635053121e-05, |
|
"loss": 1.018, |
|
"step": 623500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1591018207334638e-05, |
|
"loss": 1.032, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1560241779616157e-05, |
|
"loss": 1.0238, |
|
"step": 624500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1529465351897676e-05, |
|
"loss": 1.0294, |
|
"step": 625000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1498688924179193e-05, |
|
"loss": 1.0274, |
|
"step": 625500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1467912496460712e-05, |
|
"loss": 1.0283, |
|
"step": 626000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1437136068742229e-05, |
|
"loss": 1.0293, |
|
"step": 626500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1406359641023748e-05, |
|
"loss": 1.023, |
|
"step": 627000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1375583213305266e-05, |
|
"loss": 1.0227, |
|
"step": 627500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1344806785586783e-05, |
|
"loss": 1.0293, |
|
"step": 628000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1314030357868302e-05, |
|
"loss": 1.022, |
|
"step": 628500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1283253930149821e-05, |
|
"loss": 1.0209, |
|
"step": 629000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1252477502431338e-05, |
|
"loss": 1.0243, |
|
"step": 629500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1221701074712857e-05, |
|
"loss": 1.0291, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1190924646994374e-05, |
|
"loss": 1.0269, |
|
"step": 630500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1160148219275893e-05, |
|
"loss": 1.0256, |
|
"step": 631000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1129371791557412e-05, |
|
"loss": 1.0227, |
|
"step": 631500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1098595363838929e-05, |
|
"loss": 1.0253, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.1067818936120447e-05, |
|
"loss": 1.0284, |
|
"step": 632500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.1037042508401965e-05, |
|
"loss": 1.0226, |
|
"step": 633000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.1006266080683483e-05, |
|
"loss": 1.0222, |
|
"step": 633500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.0975489652965002e-05, |
|
"loss": 1.0258, |
|
"step": 634000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.094471322524652e-05, |
|
"loss": 1.0206, |
|
"step": 634500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0913936797528038e-05, |
|
"loss": 1.0256, |
|
"step": 635000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0883160369809555e-05, |
|
"loss": 1.0255, |
|
"step": 635500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0852383942091074e-05, |
|
"loss": 1.0245, |
|
"step": 636000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0821607514372593e-05, |
|
"loss": 1.0236, |
|
"step": 636500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.079083108665411e-05, |
|
"loss": 1.0197, |
|
"step": 637000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0760054658935629e-05, |
|
"loss": 1.0218, |
|
"step": 637500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0729278231217146e-05, |
|
"loss": 1.0248, |
|
"step": 638000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0698501803498664e-05, |
|
"loss": 1.0275, |
|
"step": 638500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0667725375780183e-05, |
|
"loss": 1.0304, |
|
"step": 639000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.06369489480617e-05, |
|
"loss": 1.0205, |
|
"step": 639500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0606172520343219e-05, |
|
"loss": 1.038, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0575396092624736e-05, |
|
"loss": 1.0189, |
|
"step": 640500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0544619664906255e-05, |
|
"loss": 1.0216, |
|
"step": 641000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0513843237187774e-05, |
|
"loss": 1.0206, |
|
"step": 641500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.048306680946929e-05, |
|
"loss": 1.0216, |
|
"step": 642000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.045229038175081e-05, |
|
"loss": 1.0243, |
|
"step": 642500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0421513954032328e-05, |
|
"loss": 1.015, |
|
"step": 643000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0390737526313845e-05, |
|
"loss": 1.0225, |
|
"step": 643500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0359961098595364e-05, |
|
"loss": 1.0217, |
|
"step": 644000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0329184670876881e-05, |
|
"loss": 1.0163, |
|
"step": 644500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.02984082431584e-05, |
|
"loss": 1.0086, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0267631815439919e-05, |
|
"loss": 1.0167, |
|
"step": 645500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0236855387721436e-05, |
|
"loss": 1.0152, |
|
"step": 646000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0206078960002955e-05, |
|
"loss": 1.0267, |
|
"step": 646500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0175302532284472e-05, |
|
"loss": 1.0146, |
|
"step": 647000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.014452610456599e-05, |
|
"loss": 1.0155, |
|
"step": 647500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.011374967684751e-05, |
|
"loss": 1.0239, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0082973249129028e-05, |
|
"loss": 1.0219, |
|
"step": 648500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0052196821410547e-05, |
|
"loss": 1.0199, |
|
"step": 649000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0021420393692064e-05, |
|
"loss": 1.0163, |
|
"step": 649500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.990643965973583e-06, |
|
"loss": 1.0213, |
|
"step": 650000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.9598675382551e-06, |
|
"loss": 1.0201, |
|
"step": 650500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.929091110536619e-06, |
|
"loss": 1.0217, |
|
"step": 651000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.898314682818138e-06, |
|
"loss": 1.0217, |
|
"step": 651500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.867538255099655e-06, |
|
"loss": 1.0152, |
|
"step": 652000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.836761827381173e-06, |
|
"loss": 1.0236, |
|
"step": 652500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.80598539966269e-06, |
|
"loss": 1.0183, |
|
"step": 653000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.77520897194421e-06, |
|
"loss": 1.0152, |
|
"step": 653500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.744432544225728e-06, |
|
"loss": 1.0205, |
|
"step": 654000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.713656116507245e-06, |
|
"loss": 1.0268, |
|
"step": 654500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.682879688788764e-06, |
|
"loss": 1.03, |
|
"step": 655000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.652103261070283e-06, |
|
"loss": 1.018, |
|
"step": 655500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.6213268333518e-06, |
|
"loss": 1.02, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.590550405633319e-06, |
|
"loss": 1.0198, |
|
"step": 656500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.559773977914836e-06, |
|
"loss": 1.0178, |
|
"step": 657000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.528997550196354e-06, |
|
"loss": 1.016, |
|
"step": 657500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.498221122477873e-06, |
|
"loss": 1.0213, |
|
"step": 658000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.46744469475939e-06, |
|
"loss": 1.0184, |
|
"step": 658500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.436668267040909e-06, |
|
"loss": 1.0244, |
|
"step": 659000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.405891839322426e-06, |
|
"loss": 1.0191, |
|
"step": 659500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.375115411603945e-06, |
|
"loss": 1.0232, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.344338983885464e-06, |
|
"loss": 1.0143, |
|
"step": 660500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.313562556166981e-06, |
|
"loss": 1.0198, |
|
"step": 661000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.2827861284485e-06, |
|
"loss": 1.0201, |
|
"step": 661500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.252009700730017e-06, |
|
"loss": 1.0174, |
|
"step": 662000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.221233273011536e-06, |
|
"loss": 1.0231, |
|
"step": 662500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.190456845293054e-06, |
|
"loss": 1.0119, |
|
"step": 663000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.159680417574571e-06, |
|
"loss": 1.0127, |
|
"step": 663500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.12890398985609e-06, |
|
"loss": 1.0168, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.098127562137607e-06, |
|
"loss": 1.0155, |
|
"step": 664500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.067351134419126e-06, |
|
"loss": 1.0175, |
|
"step": 665000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.036574706700645e-06, |
|
"loss": 1.0174, |
|
"step": 665500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.005798278982162e-06, |
|
"loss": 1.0173, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.97502185126368e-06, |
|
"loss": 1.0178, |
|
"step": 666500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.944245423545198e-06, |
|
"loss": 1.0124, |
|
"step": 667000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.913468995826717e-06, |
|
"loss": 1.0189, |
|
"step": 667500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.882692568108235e-06, |
|
"loss": 1.0231, |
|
"step": 668000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.851916140389752e-06, |
|
"loss": 1.0143, |
|
"step": 668500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.821139712671271e-06, |
|
"loss": 1.0153, |
|
"step": 669000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.79036328495279e-06, |
|
"loss": 1.0174, |
|
"step": 669500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.759586857234307e-06, |
|
"loss": 1.0158, |
|
"step": 670000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.728810429515826e-06, |
|
"loss": 1.0116, |
|
"step": 670500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.698034001797343e-06, |
|
"loss": 1.02, |
|
"step": 671000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.667257574078862e-06, |
|
"loss": 1.014, |
|
"step": 671500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.63648114636038e-06, |
|
"loss": 1.016, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.605704718641898e-06, |
|
"loss": 1.0173, |
|
"step": 672500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.574928290923416e-06, |
|
"loss": 1.0045, |
|
"step": 673000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.544151863204934e-06, |
|
"loss": 1.0185, |
|
"step": 673500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.513375435486452e-06, |
|
"loss": 1.0156, |
|
"step": 674000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.482599007767971e-06, |
|
"loss": 1.0106, |
|
"step": 674500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.451822580049488e-06, |
|
"loss": 1.0153, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.421046152331007e-06, |
|
"loss": 1.01, |
|
"step": 675500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.390269724612524e-06, |
|
"loss": 1.0161, |
|
"step": 676000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.359493296894043e-06, |
|
"loss": 1.0146, |
|
"step": 676500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.328716869175562e-06, |
|
"loss": 1.0152, |
|
"step": 677000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.297940441457079e-06, |
|
"loss": 1.0089, |
|
"step": 677500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.267164013738598e-06, |
|
"loss": 1.0139, |
|
"step": 678000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.236387586020115e-06, |
|
"loss": 1.0202, |
|
"step": 678500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.205611158301633e-06, |
|
"loss": 1.0082, |
|
"step": 679000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.174834730583152e-06, |
|
"loss": 1.0067, |
|
"step": 679500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.14405830286467e-06, |
|
"loss": 1.0156, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.113281875146188e-06, |
|
"loss": 1.0075, |
|
"step": 680500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.082505447427705e-06, |
|
"loss": 1.0108, |
|
"step": 681000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.051729019709224e-06, |
|
"loss": 1.0115, |
|
"step": 681500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.020952591990743e-06, |
|
"loss": 1.014, |
|
"step": 682000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.99017616427226e-06, |
|
"loss": 1.0147, |
|
"step": 682500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.95939973655378e-06, |
|
"loss": 1.006, |
|
"step": 683000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.928623308835297e-06, |
|
"loss": 1.016, |
|
"step": 683500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.897846881116816e-06, |
|
"loss": 1.025, |
|
"step": 684000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.867070453398335e-06, |
|
"loss": 1.0174, |
|
"step": 684500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.836294025679852e-06, |
|
"loss": 1.0158, |
|
"step": 685000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.80551759796137e-06, |
|
"loss": 1.0128, |
|
"step": 685500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.774741170242888e-06, |
|
"loss": 1.0142, |
|
"step": 686000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.743964742524407e-06, |
|
"loss": 1.0199, |
|
"step": 686500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.713188314805925e-06, |
|
"loss": 1.0173, |
|
"step": 687000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.682411887087443e-06, |
|
"loss": 1.0143, |
|
"step": 687500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.651635459368961e-06, |
|
"loss": 1.0135, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.620859031650479e-06, |
|
"loss": 1.0042, |
|
"step": 688500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.590082603931997e-06, |
|
"loss": 1.0161, |
|
"step": 689000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.559306176213515e-06, |
|
"loss": 1.01, |
|
"step": 689500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.528529748495033e-06, |
|
"loss": 1.0041, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.497753320776552e-06, |
|
"loss": 1.0185, |
|
"step": 690500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.46697689305807e-06, |
|
"loss": 1.0042, |
|
"step": 691000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.436200465339588e-06, |
|
"loss": 0.9985, |
|
"step": 691500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.405424037621106e-06, |
|
"loss": 1.0094, |
|
"step": 692000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.3746476099026245e-06, |
|
"loss": 1.0084, |
|
"step": 692500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.343871182184142e-06, |
|
"loss": 1.0106, |
|
"step": 693000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.31309475446566e-06, |
|
"loss": 1.0174, |
|
"step": 693500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.282318326747178e-06, |
|
"loss": 1.0137, |
|
"step": 694000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.251541899028696e-06, |
|
"loss": 1.0066, |
|
"step": 694500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.220765471310215e-06, |
|
"loss": 1.0081, |
|
"step": 695000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.189989043591733e-06, |
|
"loss": 1.0042, |
|
"step": 695500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.159212615873251e-06, |
|
"loss": 1.0139, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.128436188154769e-06, |
|
"loss": 1.0137, |
|
"step": 696500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.097659760436287e-06, |
|
"loss": 1.0135, |
|
"step": 697000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.0668833327178055e-06, |
|
"loss": 1.01, |
|
"step": 697500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.0361069049993235e-06, |
|
"loss": 1.0041, |
|
"step": 698000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.005330477280841e-06, |
|
"loss": 1.0142, |
|
"step": 698500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 6.974554049562359e-06, |
|
"loss": 1.0058, |
|
"step": 699000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 6.943777621843878e-06, |
|
"loss": 1.0086, |
|
"step": 699500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.913001194125396e-06, |
|
"loss": 1.0055, |
|
"step": 700000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.882224766406914e-06, |
|
"loss": 1.0148, |
|
"step": 700500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.851448338688432e-06, |
|
"loss": 1.004, |
|
"step": 701000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.82067191096995e-06, |
|
"loss": 1.0096, |
|
"step": 701500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.789895483251469e-06, |
|
"loss": 1.0043, |
|
"step": 702000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.759119055532987e-06, |
|
"loss": 1.0115, |
|
"step": 702500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.7283426278145045e-06, |
|
"loss": 1.0067, |
|
"step": 703000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.6975662000960225e-06, |
|
"loss": 1.0118, |
|
"step": 703500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.66678977237754e-06, |
|
"loss": 1.0056, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.636013344659059e-06, |
|
"loss": 1.0146, |
|
"step": 704500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.605236916940577e-06, |
|
"loss": 1.0111, |
|
"step": 705000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.574460489222095e-06, |
|
"loss": 1.0074, |
|
"step": 705500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.543684061503613e-06, |
|
"loss": 1.0116, |
|
"step": 706000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.512907633785132e-06, |
|
"loss": 1.0076, |
|
"step": 706500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.48213120606665e-06, |
|
"loss": 1.008, |
|
"step": 707000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.451354778348168e-06, |
|
"loss": 1.0123, |
|
"step": 707500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.420578350629686e-06, |
|
"loss": 1.0056, |
|
"step": 708000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.3898019229112035e-06, |
|
"loss": 1.0049, |
|
"step": 708500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.359025495192722e-06, |
|
"loss": 1.0028, |
|
"step": 709000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.32824906747424e-06, |
|
"loss": 1.0069, |
|
"step": 709500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.297472639755758e-06, |
|
"loss": 1.0091, |
|
"step": 710000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.266696212037276e-06, |
|
"loss": 1.0159, |
|
"step": 710500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.235919784318795e-06, |
|
"loss": 1.0079, |
|
"step": 711000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.205143356600313e-06, |
|
"loss": 1.0096, |
|
"step": 711500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.174366928881832e-06, |
|
"loss": 1.0024, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.1435905011633496e-06, |
|
"loss": 0.9978, |
|
"step": 712500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.1128140734448675e-06, |
|
"loss": 1.0059, |
|
"step": 713000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.0820376457263854e-06, |
|
"loss": 1.0076, |
|
"step": 713500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.051261218007904e-06, |
|
"loss": 1.0115, |
|
"step": 714000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.020484790289422e-06, |
|
"loss": 1.0101, |
|
"step": 714500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.98970836257094e-06, |
|
"loss": 1.011, |
|
"step": 715000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.958931934852458e-06, |
|
"loss": 1.0119, |
|
"step": 715500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.928155507133976e-06, |
|
"loss": 1.0122, |
|
"step": 716000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.897379079415495e-06, |
|
"loss": 1.0039, |
|
"step": 716500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.866602651697013e-06, |
|
"loss": 1.0031, |
|
"step": 717000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.835826223978531e-06, |
|
"loss": 1.0059, |
|
"step": 717500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.8050497962600486e-06, |
|
"loss": 1.0082, |
|
"step": 718000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.7742733685415665e-06, |
|
"loss": 1.0027, |
|
"step": 718500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.743496940823085e-06, |
|
"loss": 0.9952, |
|
"step": 719000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.712720513104603e-06, |
|
"loss": 1.0025, |
|
"step": 719500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.681944085386121e-06, |
|
"loss": 1.0005, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.651167657667639e-06, |
|
"loss": 1.0066, |
|
"step": 720500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.620391229949158e-06, |
|
"loss": 1.0004, |
|
"step": 721000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.589614802230676e-06, |
|
"loss": 1.0066, |
|
"step": 721500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.558838374512194e-06, |
|
"loss": 1.0082, |
|
"step": 722000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.528061946793712e-06, |
|
"loss": 1.0082, |
|
"step": 722500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.49728551907523e-06, |
|
"loss": 1.0108, |
|
"step": 723000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.466509091356748e-06, |
|
"loss": 1.0073, |
|
"step": 723500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.435732663638266e-06, |
|
"loss": 1.0071, |
|
"step": 724000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.404956235919784e-06, |
|
"loss": 1.0023, |
|
"step": 724500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.374179808201302e-06, |
|
"loss": 0.9992, |
|
"step": 725000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.34340338048282e-06, |
|
"loss": 1.0023, |
|
"step": 725500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.312626952764339e-06, |
|
"loss": 0.9983, |
|
"step": 726000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.281850525045857e-06, |
|
"loss": 1.0009, |
|
"step": 726500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.251074097327376e-06, |
|
"loss": 1.0027, |
|
"step": 727000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.220297669608894e-06, |
|
"loss": 1.0083, |
|
"step": 727500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.1895212418904115e-06, |
|
"loss": 1.0076, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.15874481417193e-06, |
|
"loss": 0.9977, |
|
"step": 728500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.127968386453448e-06, |
|
"loss": 1.0028, |
|
"step": 729000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.097191958734966e-06, |
|
"loss": 1.0072, |
|
"step": 729500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.066415531016484e-06, |
|
"loss": 1.0031, |
|
"step": 730000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.035639103298002e-06, |
|
"loss": 1.0051, |
|
"step": 730500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.004862675579521e-06, |
|
"loss": 1.0055, |
|
"step": 731000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.974086247861039e-06, |
|
"loss": 1.0034, |
|
"step": 731500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.943309820142557e-06, |
|
"loss": 1.0042, |
|
"step": 732000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.912533392424075e-06, |
|
"loss": 1.0039, |
|
"step": 732500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.8817569647055934e-06, |
|
"loss": 1.0033, |
|
"step": 733000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.850980536987111e-06, |
|
"loss": 0.9988, |
|
"step": 733500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.820204109268629e-06, |
|
"loss": 1.0052, |
|
"step": 734000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.789427681550147e-06, |
|
"loss": 0.9979, |
|
"step": 734500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.758651253831665e-06, |
|
"loss": 0.9971, |
|
"step": 735000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.727874826113184e-06, |
|
"loss": 1.0054, |
|
"step": 735500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.697098398394702e-06, |
|
"loss": 0.9994, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.66632197067622e-06, |
|
"loss": 0.9963, |
|
"step": 736500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.635545542957738e-06, |
|
"loss": 0.9992, |
|
"step": 737000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.604769115239256e-06, |
|
"loss": 1.0022, |
|
"step": 737500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.5739926875207745e-06, |
|
"loss": 1.0108, |
|
"step": 738000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.5432162598022924e-06, |
|
"loss": 0.9971, |
|
"step": 738500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.51243983208381e-06, |
|
"loss": 0.9978, |
|
"step": 739000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.481663404365328e-06, |
|
"loss": 1.0005, |
|
"step": 739500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.450886976646847e-06, |
|
"loss": 1.0058, |
|
"step": 740000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.420110548928365e-06, |
|
"loss": 0.9967, |
|
"step": 740500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.389334121209883e-06, |
|
"loss": 0.9919, |
|
"step": 741000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.358557693491401e-06, |
|
"loss": 1.0065, |
|
"step": 741500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.327781265772919e-06, |
|
"loss": 1.0024, |
|
"step": 742000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.297004838054438e-06, |
|
"loss": 0.9953, |
|
"step": 742500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.2662284103359556e-06, |
|
"loss": 0.9981, |
|
"step": 743000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.2354519826174735e-06, |
|
"loss": 0.9962, |
|
"step": 743500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.2046755548989914e-06, |
|
"loss": 1.0003, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.17389912718051e-06, |
|
"loss": 0.995, |
|
"step": 744500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.143122699462028e-06, |
|
"loss": 1.0064, |
|
"step": 745000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.112346271743547e-06, |
|
"loss": 1.001, |
|
"step": 745500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.081569844025065e-06, |
|
"loss": 0.9993, |
|
"step": 746000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.050793416306583e-06, |
|
"loss": 0.9941, |
|
"step": 746500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.020016988588101e-06, |
|
"loss": 1.0024, |
|
"step": 747000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.9892405608696195e-06, |
|
"loss": 1.0016, |
|
"step": 747500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.9584641331511375e-06, |
|
"loss": 1.004, |
|
"step": 748000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.927687705432655e-06, |
|
"loss": 1.0006, |
|
"step": 748500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.896911277714173e-06, |
|
"loss": 1.0003, |
|
"step": 749000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.866134849995691e-06, |
|
"loss": 0.9968, |
|
"step": 749500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.83535842227721e-06, |
|
"loss": 0.9979, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.804581994558728e-06, |
|
"loss": 1.0025, |
|
"step": 750500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.773805566840246e-06, |
|
"loss": 0.9947, |
|
"step": 751000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.7430291391217643e-06, |
|
"loss": 0.9975, |
|
"step": 751500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.7122527114032822e-06, |
|
"loss": 1.0006, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.6814762836848e-06, |
|
"loss": 0.9894, |
|
"step": 752500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.6506998559663185e-06, |
|
"loss": 1.0005, |
|
"step": 753000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.6199234282478365e-06, |
|
"loss": 1.0106, |
|
"step": 753500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.589147000529355e-06, |
|
"loss": 1.0023, |
|
"step": 754000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.5583705728108728e-06, |
|
"loss": 0.9934, |
|
"step": 754500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.527594145092391e-06, |
|
"loss": 0.9958, |
|
"step": 755000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.496817717373909e-06, |
|
"loss": 0.9978, |
|
"step": 755500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.466041289655427e-06, |
|
"loss": 0.9994, |
|
"step": 756000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.4352648619369454e-06, |
|
"loss": 1.0067, |
|
"step": 756500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.4044884342184633e-06, |
|
"loss": 0.9952, |
|
"step": 757000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.3737120064999817e-06, |
|
"loss": 1.0057, |
|
"step": 757500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.3429355787814996e-06, |
|
"loss": 0.9982, |
|
"step": 758000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.312159151063018e-06, |
|
"loss": 0.9944, |
|
"step": 758500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.281382723344536e-06, |
|
"loss": 0.9935, |
|
"step": 759000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.250606295626054e-06, |
|
"loss": 0.9997, |
|
"step": 759500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.219829867907572e-06, |
|
"loss": 0.9982, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.18905344018909e-06, |
|
"loss": 0.9938, |
|
"step": 760500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.1582770124706085e-06, |
|
"loss": 0.9998, |
|
"step": 761000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.1275005847521264e-06, |
|
"loss": 0.9983, |
|
"step": 761500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.0967241570336448e-06, |
|
"loss": 1.0039, |
|
"step": 762000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.065947729315163e-06, |
|
"loss": 0.9966, |
|
"step": 762500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.035171301596681e-06, |
|
"loss": 0.9901, |
|
"step": 763000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.0043948738781994e-06, |
|
"loss": 0.9968, |
|
"step": 763500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.9736184461597174e-06, |
|
"loss": 0.9932, |
|
"step": 764000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.9428420184412353e-06, |
|
"loss": 0.9969, |
|
"step": 764500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.9120655907227537e-06, |
|
"loss": 0.9881, |
|
"step": 765000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.8812891630042716e-06, |
|
"loss": 0.9991, |
|
"step": 765500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.8505127352857904e-06, |
|
"loss": 0.994, |
|
"step": 766000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.8197363075673083e-06, |
|
"loss": 0.9993, |
|
"step": 766500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.7889598798488263e-06, |
|
"loss": 0.987, |
|
"step": 767000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.7581834521303446e-06, |
|
"loss": 0.9931, |
|
"step": 767500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.7274070244118626e-06, |
|
"loss": 0.9928, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.696630596693381e-06, |
|
"loss": 0.9971, |
|
"step": 768500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.665854168974899e-06, |
|
"loss": 0.9889, |
|
"step": 769000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.6350777412564172e-06, |
|
"loss": 0.9953, |
|
"step": 769500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.604301313537935e-06, |
|
"loss": 0.9981, |
|
"step": 770000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.573524885819453e-06, |
|
"loss": 0.9927, |
|
"step": 770500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.5427484581009715e-06, |
|
"loss": 0.9945, |
|
"step": 771000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.5119720303824894e-06, |
|
"loss": 1.0024, |
|
"step": 771500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.4811956026640078e-06, |
|
"loss": 0.9978, |
|
"step": 772000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.4504191749455257e-06, |
|
"loss": 0.991, |
|
"step": 772500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.419642747227044e-06, |
|
"loss": 0.9995, |
|
"step": 773000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.388866319508562e-06, |
|
"loss": 1.0027, |
|
"step": 773500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.35808989179008e-06, |
|
"loss": 0.9949, |
|
"step": 774000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.3273134640715983e-06, |
|
"loss": 0.9938, |
|
"step": 774500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.2965370363531166e-06, |
|
"loss": 1.0029, |
|
"step": 775000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.265760608634635e-06, |
|
"loss": 0.9957, |
|
"step": 775500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.234984180916153e-06, |
|
"loss": 0.9978, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.204207753197671e-06, |
|
"loss": 0.9916, |
|
"step": 776500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.1734313254791892e-06, |
|
"loss": 0.9881, |
|
"step": 777000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.142654897760707e-06, |
|
"loss": 0.9957, |
|
"step": 777500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.1118784700422255e-06, |
|
"loss": 0.9952, |
|
"step": 778000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.0811020423237435e-06, |
|
"loss": 0.9965, |
|
"step": 778500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.050325614605262e-06, |
|
"loss": 0.9938, |
|
"step": 779000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.0195491868867798e-06, |
|
"loss": 0.9886, |
|
"step": 779500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.9887727591682977e-06, |
|
"loss": 0.9973, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.957996331449816e-06, |
|
"loss": 0.9928, |
|
"step": 780500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.927219903731334e-06, |
|
"loss": 0.9938, |
|
"step": 781000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8964434760128522e-06, |
|
"loss": 1.0034, |
|
"step": 781500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8656670482943703e-06, |
|
"loss": 0.9944, |
|
"step": 782000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8348906205758885e-06, |
|
"loss": 0.995, |
|
"step": 782500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8041141928574066e-06, |
|
"loss": 0.9901, |
|
"step": 783000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.773337765138925e-06, |
|
"loss": 0.9913, |
|
"step": 783500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7425613374204431e-06, |
|
"loss": 1.0005, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7117849097019613e-06, |
|
"loss": 1.003, |
|
"step": 784500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.6810084819834794e-06, |
|
"loss": 0.9944, |
|
"step": 785000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.6502320542649976e-06, |
|
"loss": 1.0032, |
|
"step": 785500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.6194556265465157e-06, |
|
"loss": 0.9896, |
|
"step": 786000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.5886791988280339e-06, |
|
"loss": 0.9901, |
|
"step": 786500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.5579027711095518e-06, |
|
"loss": 0.9902, |
|
"step": 787000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.52712634339107e-06, |
|
"loss": 0.9962, |
|
"step": 787500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.496349915672588e-06, |
|
"loss": 0.9987, |
|
"step": 788000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.4655734879541062e-06, |
|
"loss": 0.9993, |
|
"step": 788500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.4347970602356244e-06, |
|
"loss": 0.9912, |
|
"step": 789000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4040206325171425e-06, |
|
"loss": 0.9937, |
|
"step": 789500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.3732442047986607e-06, |
|
"loss": 0.9925, |
|
"step": 790000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.3424677770801788e-06, |
|
"loss": 0.991, |
|
"step": 790500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.311691349361697e-06, |
|
"loss": 0.9939, |
|
"step": 791000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.2809149216432151e-06, |
|
"loss": 0.9964, |
|
"step": 791500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.2501384939247333e-06, |
|
"loss": 0.9929, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2193620662062514e-06, |
|
"loss": 0.9886, |
|
"step": 792500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.1885856384877696e-06, |
|
"loss": 0.9947, |
|
"step": 793000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.1578092107692875e-06, |
|
"loss": 1.0, |
|
"step": 793500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.1270327830508057e-06, |
|
"loss": 0.9931, |
|
"step": 794000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.096256355332324e-06, |
|
"loss": 0.9967, |
|
"step": 794500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0654799276138422e-06, |
|
"loss": 0.9944, |
|
"step": 795000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0347034998953603e-06, |
|
"loss": 0.9877, |
|
"step": 795500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0039270721768783e-06, |
|
"loss": 0.9924, |
|
"step": 796000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 9.731506444583964e-07, |
|
"loss": 0.9876, |
|
"step": 796500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 9.423742167399145e-07, |
|
"loss": 0.9878, |
|
"step": 797000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 9.115977890214327e-07, |
|
"loss": 0.9909, |
|
"step": 797500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.808213613029508e-07, |
|
"loss": 0.9948, |
|
"step": 798000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.50044933584469e-07, |
|
"loss": 0.9887, |
|
"step": 798500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.192685058659872e-07, |
|
"loss": 0.9965, |
|
"step": 799000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 7.884920781475054e-07, |
|
"loss": 0.9906, |
|
"step": 799500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 7.577156504290234e-07, |
|
"loss": 0.9947, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.269392227105416e-07, |
|
"loss": 0.9913, |
|
"step": 800500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.961627949920597e-07, |
|
"loss": 0.9915, |
|
"step": 801000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.653863672735778e-07, |
|
"loss": 0.9846, |
|
"step": 801500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.346099395550959e-07, |
|
"loss": 1.0002, |
|
"step": 802000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.038335118366142e-07, |
|
"loss": 0.99, |
|
"step": 802500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.730570841181322e-07, |
|
"loss": 1.0009, |
|
"step": 803000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.422806563996504e-07, |
|
"loss": 0.9913, |
|
"step": 803500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.115042286811685e-07, |
|
"loss": 0.9849, |
|
"step": 804000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.807278009626867e-07, |
|
"loss": 0.9922, |
|
"step": 804500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.499513732442048e-07, |
|
"loss": 0.9885, |
|
"step": 805000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.1917494552572297e-07, |
|
"loss": 0.9909, |
|
"step": 805500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.883985178072411e-07, |
|
"loss": 0.99, |
|
"step": 806000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.576220900887592e-07, |
|
"loss": 0.9906, |
|
"step": 806500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.268456623702774e-07, |
|
"loss": 0.9865, |
|
"step": 807000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.960692346517955e-07, |
|
"loss": 0.9871, |
|
"step": 807500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.652928069333136e-07, |
|
"loss": 0.9933, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.3451637921483178e-07, |
|
"loss": 0.9954, |
|
"step": 808500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.037399514963499e-07, |
|
"loss": 0.9935, |
|
"step": 809000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.7296352377786808e-07, |
|
"loss": 0.9843, |
|
"step": 809500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.421870960593862e-07, |
|
"loss": 0.9897, |
|
"step": 810000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.1141066834090434e-07, |
|
"loss": 0.9914, |
|
"step": 810500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.063424062242248e-08, |
|
"loss": 0.9921, |
|
"step": 811000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.985781290394062e-08, |
|
"loss": 0.9981, |
|
"step": 811500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.9081385185458755e-08, |
|
"loss": 1.0003, |
|
"step": 812000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 812310, |
|
"total_flos": 3.420875271615529e+18, |
|
"train_loss": 1.1082924517030088, |
|
"train_runtime": 412795.0235, |
|
"train_samples_per_second": 31.485, |
|
"train_steps_per_second": 1.968 |
|
} |
|
], |
|
"max_steps": 812310, |
|
"num_train_epochs": 3, |
|
"total_flos": 3.420875271615529e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|