|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6055880638592613, |
|
"global_step": 560000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.999098827285924e-05, |
|
"loss": 5.3192, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.998197654571848e-05, |
|
"loss": 5.4021, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9972964818577715e-05, |
|
"loss": 5.4284, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.996395309143695e-05, |
|
"loss": 5.4341, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.995494136429619e-05, |
|
"loss": 5.4215, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.994592963715543e-05, |
|
"loss": 5.3564, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9936917910014664e-05, |
|
"loss": 5.3319, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.99279061828739e-05, |
|
"loss": 5.3326, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.991889445573314e-05, |
|
"loss": 5.3575, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.990988272859237e-05, |
|
"loss": 5.3404, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.990087100145161e-05, |
|
"loss": 5.339, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.989185927431085e-05, |
|
"loss": 5.2714, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.988284754717009e-05, |
|
"loss": 5.2691, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9873835820029326e-05, |
|
"loss": 5.2559, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9864824092888563e-05, |
|
"loss": 5.209, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9855812365747794e-05, |
|
"loss": 5.26, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.984680063860703e-05, |
|
"loss": 5.1878, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.983778891146627e-05, |
|
"loss": 5.212, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9828777184325506e-05, |
|
"loss": 5.2063, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.981976545718475e-05, |
|
"loss": 5.2132, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.981075373004399e-05, |
|
"loss": 5.221, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.980174200290322e-05, |
|
"loss": 5.1786, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9792730275762456e-05, |
|
"loss": 5.1868, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.978371854862169e-05, |
|
"loss": 5.1585, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.977470682148093e-05, |
|
"loss": 5.2465, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.976569509434017e-05, |
|
"loss": 5.1645, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9756683367199405e-05, |
|
"loss": 5.1462, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.974767164005864e-05, |
|
"loss": 5.1588, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.973865991291788e-05, |
|
"loss": 5.145, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.972964818577712e-05, |
|
"loss": 5.1256, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9720636458636355e-05, |
|
"loss": 5.1227, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.971162473149559e-05, |
|
"loss": 5.096, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.970261300435483e-05, |
|
"loss": 5.1427, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.969360127721407e-05, |
|
"loss": 5.121, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9684589550073305e-05, |
|
"loss": 5.1324, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.967557782293254e-05, |
|
"loss": 5.1476, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.966656609579178e-05, |
|
"loss": 5.0538, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.965755436865102e-05, |
|
"loss": 5.0635, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9648542641510254e-05, |
|
"loss": 5.0309, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.963953091436949e-05, |
|
"loss": 5.0623, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.963051918722872e-05, |
|
"loss": 5.0624, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.962150746008796e-05, |
|
"loss": 5.0844, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9612495732947204e-05, |
|
"loss": 5.0667, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.960348400580644e-05, |
|
"loss": 5.0536, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.959447227866568e-05, |
|
"loss": 5.0783, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9585460551524916e-05, |
|
"loss": 5.0335, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.957644882438415e-05, |
|
"loss": 5.0321, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9567437097243384e-05, |
|
"loss": 5.037, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.955842537010262e-05, |
|
"loss": 5.0187, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.954941364296186e-05, |
|
"loss": 5.0357, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.95404019158211e-05, |
|
"loss": 5.0128, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.953139018868034e-05, |
|
"loss": 5.0553, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.952237846153957e-05, |
|
"loss": 5.0024, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.951336673439881e-05, |
|
"loss": 5.0177, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9504355007258046e-05, |
|
"loss": 5.0174, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.949534328011728e-05, |
|
"loss": 5.0167, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.948633155297652e-05, |
|
"loss": 4.9896, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.947731982583576e-05, |
|
"loss": 5.0355, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9468308098694995e-05, |
|
"loss": 4.9929, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.945929637155423e-05, |
|
"loss": 4.9702, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.945028464441347e-05, |
|
"loss": 4.944, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.944127291727271e-05, |
|
"loss": 4.9957, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9432261190131945e-05, |
|
"loss": 4.9908, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.942324946299118e-05, |
|
"loss": 4.9816, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.941423773585042e-05, |
|
"loss": 4.9649, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.940522600870966e-05, |
|
"loss": 4.9434, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9396214281568895e-05, |
|
"loss": 5.0387, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.938720255442813e-05, |
|
"loss": 4.9799, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.937819082728737e-05, |
|
"loss": 4.9648, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.936917910014661e-05, |
|
"loss": 4.9593, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9360167373005844e-05, |
|
"loss": 4.9687, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9351155645865075e-05, |
|
"loss": 4.9474, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.934214391872431e-05, |
|
"loss": 4.9344, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9333132191583556e-05, |
|
"loss": 4.932, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9324120464442794e-05, |
|
"loss": 5.0116, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.931510873730203e-05, |
|
"loss": 4.9311, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.930609701016127e-05, |
|
"loss": 4.9114, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.92970852830205e-05, |
|
"loss": 4.9517, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.928807355587974e-05, |
|
"loss": 4.9541, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9279061828738974e-05, |
|
"loss": 4.9637, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.927005010159821e-05, |
|
"loss": 4.9498, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.926103837445745e-05, |
|
"loss": 4.8924, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.925202664731669e-05, |
|
"loss": 4.9596, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.924301492017593e-05, |
|
"loss": 4.9264, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.923400319303516e-05, |
|
"loss": 4.9179, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.92249914658944e-05, |
|
"loss": 4.9151, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9215979738753636e-05, |
|
"loss": 4.9101, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.920696801161287e-05, |
|
"loss": 4.9541, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.919795628447211e-05, |
|
"loss": 4.9423, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.918894455733135e-05, |
|
"loss": 4.8763, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9179932830190586e-05, |
|
"loss": 4.9015, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.917092110304982e-05, |
|
"loss": 4.9179, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.916190937590906e-05, |
|
"loss": 4.8837, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.91528976487683e-05, |
|
"loss": 4.9141, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9143885921627535e-05, |
|
"loss": 4.8766, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.913487419448677e-05, |
|
"loss": 4.9088, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.912586246734601e-05, |
|
"loss": 4.9137, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.911685074020525e-05, |
|
"loss": 4.8692, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9107839013064485e-05, |
|
"loss": 4.8607, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.909882728592372e-05, |
|
"loss": 4.8573, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.908981555878296e-05, |
|
"loss": 4.9472, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.90808038316422e-05, |
|
"loss": 4.9144, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.907179210450143e-05, |
|
"loss": 4.973, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9062780377360665e-05, |
|
"loss": 4.9413, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.90537686502199e-05, |
|
"loss": 4.972, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9044756923079147e-05, |
|
"loss": 4.9722, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.9035745195938384e-05, |
|
"loss": 4.9126, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.902673346879762e-05, |
|
"loss": 4.9117, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.901772174165686e-05, |
|
"loss": 4.9233, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.900871001451609e-05, |
|
"loss": 4.9693, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.899969828737533e-05, |
|
"loss": 4.9875, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8990686560234564e-05, |
|
"loss": 4.9741, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.89816748330938e-05, |
|
"loss": 4.9411, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8972663105953046e-05, |
|
"loss": 4.9281, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.896365137881228e-05, |
|
"loss": 4.9392, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8954639651671514e-05, |
|
"loss": 4.9473, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.894562792453075e-05, |
|
"loss": 4.9333, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.893661619738999e-05, |
|
"loss": 4.9547, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8927604470249226e-05, |
|
"loss": 4.9422, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.891859274310846e-05, |
|
"loss": 4.9182, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.89095810159677e-05, |
|
"loss": 4.9282, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.890056928882694e-05, |
|
"loss": 4.943, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8891557561686176e-05, |
|
"loss": 4.9436, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.888254583454541e-05, |
|
"loss": 4.9253, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.887353410740465e-05, |
|
"loss": 4.9442, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.886452238026389e-05, |
|
"loss": 4.8888, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8855510653123125e-05, |
|
"loss": 4.9155, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8846498925982356e-05, |
|
"loss": 4.9519, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.88374871988416e-05, |
|
"loss": 4.9563, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.882847547170084e-05, |
|
"loss": 4.9553, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8819463744560075e-05, |
|
"loss": 4.8865, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.881045201741931e-05, |
|
"loss": 4.8987, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.880144029027855e-05, |
|
"loss": 4.9129, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.879242856313779e-05, |
|
"loss": 4.9565, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.878341683599702e-05, |
|
"loss": 4.8831, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8774405108856255e-05, |
|
"loss": 4.9007, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.87653933817155e-05, |
|
"loss": 4.9337, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8756381654574737e-05, |
|
"loss": 4.8446, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8747369927433974e-05, |
|
"loss": 4.9388, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.873835820029321e-05, |
|
"loss": 4.9655, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.872934647315244e-05, |
|
"loss": 4.9309, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.872033474601168e-05, |
|
"loss": 4.9102, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.871132301887092e-05, |
|
"loss": 4.8491, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8702311291730154e-05, |
|
"loss": 4.895, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.86932995645894e-05, |
|
"loss": 4.9222, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8684287837448636e-05, |
|
"loss": 4.8966, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8675276110307866e-05, |
|
"loss": 4.8669, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8666264383167104e-05, |
|
"loss": 4.8332, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.865725265602634e-05, |
|
"loss": 4.9127, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.864824092888558e-05, |
|
"loss": 4.9251, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8639229201744816e-05, |
|
"loss": 4.9379, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8630217474604053e-05, |
|
"loss": 4.8682, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.862120574746329e-05, |
|
"loss": 4.8762, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.861219402032253e-05, |
|
"loss": 4.8544, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8603182293181766e-05, |
|
"loss": 4.8835, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8594170566041e-05, |
|
"loss": 4.8346, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.858515883890024e-05, |
|
"loss": 4.9229, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.857614711175948e-05, |
|
"loss": 4.9159, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.856713538461871e-05, |
|
"loss": 4.8566, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.855812365747795e-05, |
|
"loss": 4.8155, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.854911193033719e-05, |
|
"loss": 4.9222, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.854010020319643e-05, |
|
"loss": 4.8545, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8531088476055665e-05, |
|
"loss": 4.8295, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.85220767489149e-05, |
|
"loss": 4.8223, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.851306502177414e-05, |
|
"loss": 4.8637, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.850405329463337e-05, |
|
"loss": 4.878, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.849504156749261e-05, |
|
"loss": 4.8677, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.848602984035185e-05, |
|
"loss": 4.8636, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.847701811321109e-05, |
|
"loss": 4.8708, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.846800638607033e-05, |
|
"loss": 4.8608, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8458994658929564e-05, |
|
"loss": 4.8347, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8449982931788795e-05, |
|
"loss": 4.8385, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.844097120464803e-05, |
|
"loss": 4.8565, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.843195947750727e-05, |
|
"loss": 4.867, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.842294775036651e-05, |
|
"loss": 4.8456, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.841393602322575e-05, |
|
"loss": 4.8739, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.840492429608499e-05, |
|
"loss": 4.8473, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.839591256894422e-05, |
|
"loss": 4.8496, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8386900841803456e-05, |
|
"loss": 4.8579, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8377889114662694e-05, |
|
"loss": 4.8543, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.836887738752193e-05, |
|
"loss": 4.8855, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.835986566038117e-05, |
|
"loss": 4.8511, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8350853933240406e-05, |
|
"loss": 4.8682, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8341842206099643e-05, |
|
"loss": 4.8074, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.833283047895888e-05, |
|
"loss": 4.8034, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.832381875181812e-05, |
|
"loss": 4.842, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8314807024677356e-05, |
|
"loss": 4.8146, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.830579529753659e-05, |
|
"loss": 4.8353, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.829678357039583e-05, |
|
"loss": 4.8151, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.828777184325507e-05, |
|
"loss": 4.8127, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8278760116114305e-05, |
|
"loss": 4.833, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.826974838897354e-05, |
|
"loss": 4.8383, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.826073666183278e-05, |
|
"loss": 4.8441, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.825172493469202e-05, |
|
"loss": 4.8794, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8242713207551255e-05, |
|
"loss": 4.828, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.823370148041049e-05, |
|
"loss": 4.7572, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.822468975326972e-05, |
|
"loss": 4.7658, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.821567802612896e-05, |
|
"loss": 4.8123, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8206666298988204e-05, |
|
"loss": 4.8082, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.819765457184744e-05, |
|
"loss": 4.7542, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.818864284470668e-05, |
|
"loss": 4.8264, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.817963111756592e-05, |
|
"loss": 4.7541, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.817061939042515e-05, |
|
"loss": 4.7992, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8161607663284385e-05, |
|
"loss": 4.8078, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.815259593614362e-05, |
|
"loss": 4.8453, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.814358420900286e-05, |
|
"loss": 4.8276, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.81345724818621e-05, |
|
"loss": 4.7253, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.812556075472134e-05, |
|
"loss": 4.8102, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.811654902758057e-05, |
|
"loss": 4.8006, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.810753730043981e-05, |
|
"loss": 4.7603, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8098525573299047e-05, |
|
"loss": 4.7124, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8089513846158284e-05, |
|
"loss": 4.7757, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.808050211901752e-05, |
|
"loss": 4.7593, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.807149039187676e-05, |
|
"loss": 4.8501, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8062478664735996e-05, |
|
"loss": 4.8105, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8053466937595234e-05, |
|
"loss": 4.7692, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.804445521045447e-05, |
|
"loss": 4.7855, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.803544348331371e-05, |
|
"loss": 4.8032, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8026431756172946e-05, |
|
"loss": 4.7814, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.801742002903218e-05, |
|
"loss": 4.8473, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.800840830189142e-05, |
|
"loss": 4.8047, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.799939657475066e-05, |
|
"loss": 4.8326, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7990384847609895e-05, |
|
"loss": 4.7993, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.798137312046913e-05, |
|
"loss": 4.7892, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.797236139332837e-05, |
|
"loss": 4.751, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.796334966618761e-05, |
|
"loss": 4.7795, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7954337939046845e-05, |
|
"loss": 4.7684, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7945326211906076e-05, |
|
"loss": 4.7673, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.793631448476531e-05, |
|
"loss": 4.7614, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.792730275762455e-05, |
|
"loss": 4.7529, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7918291030483795e-05, |
|
"loss": 4.8215, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.790927930334303e-05, |
|
"loss": 4.7351, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.790026757620227e-05, |
|
"loss": 4.7878, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.78912558490615e-05, |
|
"loss": 4.7618, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.788224412192074e-05, |
|
"loss": 4.846, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7873232394779975e-05, |
|
"loss": 4.8027, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.786422066763921e-05, |
|
"loss": 4.7415, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.785520894049845e-05, |
|
"loss": 4.7554, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7846197213357694e-05, |
|
"loss": 4.7287, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.783718548621693e-05, |
|
"loss": 4.81, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.782817375907616e-05, |
|
"loss": 4.7374, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.78191620319354e-05, |
|
"loss": 4.7541, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7810150304794637e-05, |
|
"loss": 4.7704, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7801138577653874e-05, |
|
"loss": 4.7743, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.779212685051311e-05, |
|
"loss": 4.7569, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.778311512337235e-05, |
|
"loss": 4.69, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7774103396231586e-05, |
|
"loss": 4.8213, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.7765091669090824e-05, |
|
"loss": 4.7616, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.775607994195006e-05, |
|
"loss": 4.7587, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.77470682148093e-05, |
|
"loss": 4.7599, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7738056487668536e-05, |
|
"loss": 4.692, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.772904476052777e-05, |
|
"loss": 4.8163, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7720033033387004e-05, |
|
"loss": 4.7533, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.771102130624625e-05, |
|
"loss": 4.7933, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7702009579105485e-05, |
|
"loss": 4.7659, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.769299785196472e-05, |
|
"loss": 4.7502, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.768398612482396e-05, |
|
"loss": 4.7412, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.76749743976832e-05, |
|
"loss": 4.7917, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.766596267054243e-05, |
|
"loss": 4.7984, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7656950943401666e-05, |
|
"loss": 4.7151, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.76479392162609e-05, |
|
"loss": 4.7101, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.763892748912015e-05, |
|
"loss": 4.7416, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7629915761979385e-05, |
|
"loss": 4.7401, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.762090403483862e-05, |
|
"loss": 4.7234, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.761189230769785e-05, |
|
"loss": 4.7334, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.760288058055709e-05, |
|
"loss": 4.7305, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.759386885341633e-05, |
|
"loss": 4.7889, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7584857126275565e-05, |
|
"loss": 4.7615, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.75758453991348e-05, |
|
"loss": 4.6827, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7566833671994046e-05, |
|
"loss": 4.7555, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7557821944853284e-05, |
|
"loss": 4.7644, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7548810217712514e-05, |
|
"loss": 4.7292, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.753979849057175e-05, |
|
"loss": 4.7221, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.753078676343099e-05, |
|
"loss": 4.7045, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.752177503629023e-05, |
|
"loss": 4.6832, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7512763309149464e-05, |
|
"loss": 4.7221, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.75037515820087e-05, |
|
"loss": 4.6595, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.749473985486794e-05, |
|
"loss": 4.7322, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7485728127727176e-05, |
|
"loss": 4.7332, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7476716400586414e-05, |
|
"loss": 4.7665, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.746770467344565e-05, |
|
"loss": 4.6936, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.745869294630489e-05, |
|
"loss": 4.7322, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7449681219164126e-05, |
|
"loss": 4.7406, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7440669492023356e-05, |
|
"loss": 4.757, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.74316577648826e-05, |
|
"loss": 4.7518, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.742264603774184e-05, |
|
"loss": 4.6843, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7413634310601075e-05, |
|
"loss": 4.6937, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.740462258346031e-05, |
|
"loss": 4.7167, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.739561085631955e-05, |
|
"loss": 4.7101, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.738659912917878e-05, |
|
"loss": 4.7401, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.737758740203802e-05, |
|
"loss": 4.7357, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7368575674897256e-05, |
|
"loss": 4.7034, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.73595639477565e-05, |
|
"loss": 4.6982, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.735055222061574e-05, |
|
"loss": 4.729, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7341540493474975e-05, |
|
"loss": 4.7402, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.733252876633421e-05, |
|
"loss": 4.7249, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.732351703919344e-05, |
|
"loss": 4.6795, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.731450531205268e-05, |
|
"loss": 4.7496, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.730549358491192e-05, |
|
"loss": 4.7258, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7296481857771155e-05, |
|
"loss": 4.7273, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.72874701306304e-05, |
|
"loss": 4.6983, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7278458403489636e-05, |
|
"loss": 4.6593, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.726944667634887e-05, |
|
"loss": 4.6931, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7260434949208104e-05, |
|
"loss": 4.6728, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.725142322206734e-05, |
|
"loss": 4.6942, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.724241149492658e-05, |
|
"loss": 4.655, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.723339976778582e-05, |
|
"loss": 4.6958, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7224388040645054e-05, |
|
"loss": 4.727, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.721537631350429e-05, |
|
"loss": 4.7039, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.720636458636353e-05, |
|
"loss": 4.6621, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7197352859222766e-05, |
|
"loss": 4.7307, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7188341132082004e-05, |
|
"loss": 4.6781, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.717932940494124e-05, |
|
"loss": 4.6862, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.717031767780048e-05, |
|
"loss": 4.6321, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.716130595065971e-05, |
|
"loss": 4.6918, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.715229422351895e-05, |
|
"loss": 4.7254, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.714328249637819e-05, |
|
"loss": 4.6808, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.713427076923743e-05, |
|
"loss": 4.6929, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7125259042096665e-05, |
|
"loss": 4.6183, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.71162473149559e-05, |
|
"loss": 4.6005, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.710723558781514e-05, |
|
"loss": 4.7159, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.709822386067437e-05, |
|
"loss": 4.6412, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.708921213353361e-05, |
|
"loss": 4.6927, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.708020040639285e-05, |
|
"loss": 4.7037, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.707118867925209e-05, |
|
"loss": 4.7063, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.706217695211133e-05, |
|
"loss": 4.739, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.7053165224970565e-05, |
|
"loss": 4.6985, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.7044153497829795e-05, |
|
"loss": 4.6828, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.703514177068903e-05, |
|
"loss": 4.7187, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.702613004354827e-05, |
|
"loss": 4.7055, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.701711831640751e-05, |
|
"loss": 4.6414, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.7008106589266745e-05, |
|
"loss": 4.6793, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.699909486212599e-05, |
|
"loss": 4.7155, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.699008313498522e-05, |
|
"loss": 4.6599, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.698107140784446e-05, |
|
"loss": 4.6949, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.6972059680703695e-05, |
|
"loss": 4.6781, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.696304795356293e-05, |
|
"loss": 4.6621, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.695403622642217e-05, |
|
"loss": 4.675, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.694502449928141e-05, |
|
"loss": 4.6254, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.6936012772140644e-05, |
|
"loss": 4.7044, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.692700104499988e-05, |
|
"loss": 4.6353, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.691798931785912e-05, |
|
"loss": 4.6393, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6908977590718356e-05, |
|
"loss": 4.6692, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6899965863577594e-05, |
|
"loss": 4.6501, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.689095413643683e-05, |
|
"loss": 4.6289, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.688194240929607e-05, |
|
"loss": 4.6656, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6872930682155306e-05, |
|
"loss": 4.6542, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.686391895501454e-05, |
|
"loss": 4.678, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.685490722787378e-05, |
|
"loss": 4.648, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.684589550073302e-05, |
|
"loss": 4.6518, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6836883773592256e-05, |
|
"loss": 4.7169, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.682787204645149e-05, |
|
"loss": 4.6243, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6818860319310724e-05, |
|
"loss": 4.6988, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.680984859216996e-05, |
|
"loss": 4.5944, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.68008368650292e-05, |
|
"loss": 4.7104, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.679182513788844e-05, |
|
"loss": 4.6633, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.678281341074768e-05, |
|
"loss": 4.6841, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.677380168360692e-05, |
|
"loss": 4.6535, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.676478995646615e-05, |
|
"loss": 4.7139, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6755778229325385e-05, |
|
"loss": 4.6433, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.674676650218462e-05, |
|
"loss": 4.7148, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.673775477504386e-05, |
|
"loss": 4.6483, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.67287430479031e-05, |
|
"loss": 4.6044, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.671973132076234e-05, |
|
"loss": 4.6271, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.671071959362157e-05, |
|
"loss": 4.6416, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.670170786648081e-05, |
|
"loss": 4.6732, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.669269613934005e-05, |
|
"loss": 4.6461, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6683684412199285e-05, |
|
"loss": 4.6583, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.667467268505852e-05, |
|
"loss": 4.6572, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.666566095791776e-05, |
|
"loss": 4.6394, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6656649230777e-05, |
|
"loss": 4.676, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6647637503636234e-05, |
|
"loss": 4.6573, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.663862577649547e-05, |
|
"loss": 4.6528, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.662961404935471e-05, |
|
"loss": 4.658, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6620602322213946e-05, |
|
"loss": 4.6363, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6611590595073184e-05, |
|
"loss": 4.6629, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.660257886793242e-05, |
|
"loss": 4.6319, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.659356714079166e-05, |
|
"loss": 4.6833, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.6584555413650896e-05, |
|
"loss": 4.586, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.657554368651013e-05, |
|
"loss": 4.6757, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.656653195936937e-05, |
|
"loss": 4.6509, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.655752023222861e-05, |
|
"loss": 4.6792, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6548508505087846e-05, |
|
"loss": 4.6738, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6539496777947076e-05, |
|
"loss": 4.6407, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6530485050806314e-05, |
|
"loss": 4.6581, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.652147332366555e-05, |
|
"loss": 4.688, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6512461596524795e-05, |
|
"loss": 4.6858, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.650344986938403e-05, |
|
"loss": 4.6618, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.649443814224327e-05, |
|
"loss": 4.6565, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.64854264151025e-05, |
|
"loss": 4.6477, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.647641468796174e-05, |
|
"loss": 4.6347, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6467402960820975e-05, |
|
"loss": 4.6384, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.645839123368021e-05, |
|
"loss": 4.6041, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.644937950653945e-05, |
|
"loss": 4.6302, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6440367779398694e-05, |
|
"loss": 4.582, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6431356052257925e-05, |
|
"loss": 4.6465, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.642234432511716e-05, |
|
"loss": 4.6427, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.64133325979764e-05, |
|
"loss": 4.6421, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.640432087083564e-05, |
|
"loss": 4.6108, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6395309143694875e-05, |
|
"loss": 4.6228, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.638629741655411e-05, |
|
"loss": 4.5645, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.637728568941335e-05, |
|
"loss": 4.5875, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.636827396227259e-05, |
|
"loss": 4.6283, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6359262235131824e-05, |
|
"loss": 4.6218, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.635025050799106e-05, |
|
"loss": 4.6801, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.63412387808503e-05, |
|
"loss": 4.6695, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6332227053709536e-05, |
|
"loss": 4.684, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6323215326568774e-05, |
|
"loss": 4.5908, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6314203599428004e-05, |
|
"loss": 4.6085, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.630519187228725e-05, |
|
"loss": 4.6316, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6296180145146486e-05, |
|
"loss": 4.6607, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6287168418005723e-05, |
|
"loss": 4.6351, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.627815669086496e-05, |
|
"loss": 4.6443, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.62691449637242e-05, |
|
"loss": 4.6842, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.626013323658343e-05, |
|
"loss": 4.6173, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6251121509442666e-05, |
|
"loss": 4.593, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6242109782301904e-05, |
|
"loss": 4.606, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.623309805516115e-05, |
|
"loss": 4.6188, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6224086328020385e-05, |
|
"loss": 4.6307, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.621507460087962e-05, |
|
"loss": 4.5919, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.620606287373885e-05, |
|
"loss": 4.6507, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.619705114659809e-05, |
|
"loss": 4.6382, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.618803941945733e-05, |
|
"loss": 4.5784, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6179027692316565e-05, |
|
"loss": 4.5813, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.61700159651758e-05, |
|
"loss": 4.6059, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.616100423803505e-05, |
|
"loss": 4.5996, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6151992510894284e-05, |
|
"loss": 4.6524, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6142980783753515e-05, |
|
"loss": 4.6452, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.613396905661275e-05, |
|
"loss": 4.6752, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.612495732947199e-05, |
|
"loss": 4.5912, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.611594560233123e-05, |
|
"loss": 4.6646, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6106933875190465e-05, |
|
"loss": 4.6234, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.60979221480497e-05, |
|
"loss": 4.6457, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.608891042090894e-05, |
|
"loss": 4.6285, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.607989869376818e-05, |
|
"loss": 4.6047, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.6070886966627414e-05, |
|
"loss": 4.5877, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.606187523948665e-05, |
|
"loss": 4.6101, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.605286351234589e-05, |
|
"loss": 4.6867, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.6043851785205126e-05, |
|
"loss": 4.6508, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.603484005806436e-05, |
|
"loss": 4.6099, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.60258283309236e-05, |
|
"loss": 4.6508, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.601681660378284e-05, |
|
"loss": 4.6105, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.6007804876642076e-05, |
|
"loss": 4.6001, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.5998793149501313e-05, |
|
"loss": 4.6344, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.598978142236055e-05, |
|
"loss": 4.585, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.598076969521978e-05, |
|
"loss": 4.5558, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.597175796807902e-05, |
|
"loss": 4.5825, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.5962746240938256e-05, |
|
"loss": 4.5569, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.59537345137975e-05, |
|
"loss": 4.5647, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.594472278665674e-05, |
|
"loss": 4.5887, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.5935711059515975e-05, |
|
"loss": 4.5825, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.5926699332375206e-05, |
|
"loss": 4.5739, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.591768760523444e-05, |
|
"loss": 4.5726, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.590867587809368e-05, |
|
"loss": 4.6447, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.589966415095292e-05, |
|
"loss": 4.5851, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5890652423812155e-05, |
|
"loss": 4.5571, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.58816406966714e-05, |
|
"loss": 4.5877, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.587262896953064e-05, |
|
"loss": 4.5896, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.586361724238987e-05, |
|
"loss": 4.591, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5854605515249105e-05, |
|
"loss": 4.5587, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.584559378810834e-05, |
|
"loss": 4.5871, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.583658206096758e-05, |
|
"loss": 4.6129, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.582757033382682e-05, |
|
"loss": 4.5838, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5818558606686055e-05, |
|
"loss": 4.6555, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.580954687954529e-05, |
|
"loss": 4.5784, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.580053515240453e-05, |
|
"loss": 4.5853, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.579152342526377e-05, |
|
"loss": 4.5536, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5782511698123004e-05, |
|
"loss": 4.6067, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.577349997098224e-05, |
|
"loss": 4.6091, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.576448824384148e-05, |
|
"loss": 4.5912, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.575547651670071e-05, |
|
"loss": 4.5887, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5746464789559954e-05, |
|
"loss": 4.5748, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.573745306241919e-05, |
|
"loss": 4.537, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.572844133527843e-05, |
|
"loss": 4.518, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5719429608137666e-05, |
|
"loss": 4.5982, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5710417880996904e-05, |
|
"loss": 4.5996, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5701406153856134e-05, |
|
"loss": 4.6103, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.569239442671537e-05, |
|
"loss": 4.5725, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.568338269957461e-05, |
|
"loss": 4.6039, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.567437097243385e-05, |
|
"loss": 4.5271, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.566535924529309e-05, |
|
"loss": 4.6387, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.565634751815233e-05, |
|
"loss": 4.5238, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5647335791011565e-05, |
|
"loss": 4.5608, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5638324063870796e-05, |
|
"loss": 4.582, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.562931233673003e-05, |
|
"loss": 4.5491, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.562030060958927e-05, |
|
"loss": 4.5778, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.561128888244851e-05, |
|
"loss": 4.6373, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5602277155307746e-05, |
|
"loss": 4.6209, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.559326542816699e-05, |
|
"loss": 4.5673, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.558425370102622e-05, |
|
"loss": 4.5685, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.557524197388546e-05, |
|
"loss": 4.5357, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5566230246744695e-05, |
|
"loss": 4.576, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.555721851960393e-05, |
|
"loss": 4.602, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.554820679246317e-05, |
|
"loss": 4.4973, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.553919506532241e-05, |
|
"loss": 4.5782, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5530183338181645e-05, |
|
"loss": 4.5825, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.552117161104088e-05, |
|
"loss": 4.5145, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.551215988390012e-05, |
|
"loss": 4.5698, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.550314815675936e-05, |
|
"loss": 4.5806, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5494136429618594e-05, |
|
"loss": 4.5052, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.548512470247783e-05, |
|
"loss": 4.5976, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.547611297533706e-05, |
|
"loss": 4.5667, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5467101248196307e-05, |
|
"loss": 4.5431, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5458089521055544e-05, |
|
"loss": 4.5659, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.544907779391478e-05, |
|
"loss": 4.5484, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.544006606677402e-05, |
|
"loss": 4.5668, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5431054339633256e-05, |
|
"loss": 4.5605, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5422042612492494e-05, |
|
"loss": 4.5686, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5413030885351724e-05, |
|
"loss": 4.5157, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.540401915821096e-05, |
|
"loss": 4.5193, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.53950074310702e-05, |
|
"loss": 4.5781, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.538599570392944e-05, |
|
"loss": 4.5719, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.537698397678868e-05, |
|
"loss": 4.5509, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536797224964792e-05, |
|
"loss": 4.5617, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.535896052250715e-05, |
|
"loss": 4.5149, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5349948795366386e-05, |
|
"loss": 4.5628, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.534093706822562e-05, |
|
"loss": 4.5809, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.533192534108486e-05, |
|
"loss": 4.525, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.53229136139441e-05, |
|
"loss": 4.5036, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.531390188680334e-05, |
|
"loss": 4.5399, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.530489015966257e-05, |
|
"loss": 4.5939, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.529587843252181e-05, |
|
"loss": 4.605, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.528686670538105e-05, |
|
"loss": 4.4758, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5277854978240285e-05, |
|
"loss": 4.5757, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.526884325109952e-05, |
|
"loss": 4.5944, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.525983152395876e-05, |
|
"loss": 4.5485, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5250819796818e-05, |
|
"loss": 4.6034, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5241808069677235e-05, |
|
"loss": 4.5887, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.523279634253647e-05, |
|
"loss": 4.5265, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.522378461539571e-05, |
|
"loss": 4.5177, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.521477288825495e-05, |
|
"loss": 4.6046, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5205761161114184e-05, |
|
"loss": 4.5481, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.519674943397342e-05, |
|
"loss": 4.5171, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.518773770683265e-05, |
|
"loss": 4.522, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5178725979691897e-05, |
|
"loss": 4.5399, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5169714252551134e-05, |
|
"loss": 4.5763, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.516070252541037e-05, |
|
"loss": 4.5583, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.515169079826961e-05, |
|
"loss": 4.569, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5142679071128846e-05, |
|
"loss": 4.5812, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.513366734398808e-05, |
|
"loss": 4.5136, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5124655616847314e-05, |
|
"loss": 4.525, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.511564388970655e-05, |
|
"loss": 4.4892, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5106632162565796e-05, |
|
"loss": 4.4955, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.509762043542503e-05, |
|
"loss": 4.5783, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.508860870828427e-05, |
|
"loss": 4.5685, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.50795969811435e-05, |
|
"loss": 4.5577, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.507058525400274e-05, |
|
"loss": 4.6029, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.5061573526861976e-05, |
|
"loss": 4.5451, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.5052561799721213e-05, |
|
"loss": 4.5816, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.504355007258045e-05, |
|
"loss": 4.4644, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.5034538345439695e-05, |
|
"loss": 4.5545, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.5025526618298926e-05, |
|
"loss": 4.5686, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.501651489115816e-05, |
|
"loss": 4.598, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.50075031640174e-05, |
|
"loss": 4.5273, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.499849143687664e-05, |
|
"loss": 4.498, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4989479709735875e-05, |
|
"loss": 4.5226, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.498046798259511e-05, |
|
"loss": 4.5453, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.497145625545435e-05, |
|
"loss": 4.5878, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.496244452831359e-05, |
|
"loss": 4.4889, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4953432801172825e-05, |
|
"loss": 4.531, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.494442107403206e-05, |
|
"loss": 4.5446, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.49354093468913e-05, |
|
"loss": 4.5806, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.492639761975054e-05, |
|
"loss": 4.5838, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4917385892609774e-05, |
|
"loss": 4.5335, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4908374165469005e-05, |
|
"loss": 4.4475, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.489936243832825e-05, |
|
"loss": 4.4819, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.489035071118749e-05, |
|
"loss": 4.4793, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4881338984046724e-05, |
|
"loss": 4.5127, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.487232725690596e-05, |
|
"loss": 4.5424, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.48633155297652e-05, |
|
"loss": 4.4961, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.485430380262443e-05, |
|
"loss": 4.5096, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.484529207548367e-05, |
|
"loss": 4.5307, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4836280348342904e-05, |
|
"loss": 4.5627, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.482726862120215e-05, |
|
"loss": 4.526, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4818256894061386e-05, |
|
"loss": 4.5265, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.480924516692062e-05, |
|
"loss": 4.5695, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4800233439779854e-05, |
|
"loss": 4.5962, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.479122171263909e-05, |
|
"loss": 4.5317, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.478220998549833e-05, |
|
"loss": 4.5309, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4773198258357566e-05, |
|
"loss": 4.5631, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4764186531216803e-05, |
|
"loss": 4.5116, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.475517480407605e-05, |
|
"loss": 4.5155, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.474616307693528e-05, |
|
"loss": 4.5214, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4737151349794516e-05, |
|
"loss": 4.5764, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.472813962265375e-05, |
|
"loss": 4.5458, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.471912789551299e-05, |
|
"loss": 4.5426, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.471011616837223e-05, |
|
"loss": 4.5371, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4701104441231465e-05, |
|
"loss": 4.5577, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.46920927140907e-05, |
|
"loss": 4.4803, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.468308098694994e-05, |
|
"loss": 4.5597, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.467406925980918e-05, |
|
"loss": 4.5193, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4665057532668415e-05, |
|
"loss": 4.4773, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.465604580552765e-05, |
|
"loss": 4.5625, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.464703407838689e-05, |
|
"loss": 4.5206, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.463802235124613e-05, |
|
"loss": 4.49, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.462901062410536e-05, |
|
"loss": 4.511, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.46199988969646e-05, |
|
"loss": 4.4423, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.461098716982384e-05, |
|
"loss": 4.5147, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.460197544268308e-05, |
|
"loss": 4.5474, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4592963715542314e-05, |
|
"loss": 4.5493, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.458395198840155e-05, |
|
"loss": 4.49, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.457494026126078e-05, |
|
"loss": 4.5583, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.456592853412002e-05, |
|
"loss": 4.5288, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.455691680697926e-05, |
|
"loss": 4.5368, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.45479050798385e-05, |
|
"loss": 4.5171, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.453889335269774e-05, |
|
"loss": 4.4643, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4529881625556976e-05, |
|
"loss": 4.4714, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4520869898416207e-05, |
|
"loss": 4.5301, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4511858171275444e-05, |
|
"loss": 4.4925, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.450284644413468e-05, |
|
"loss": 4.534, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.449383471699392e-05, |
|
"loss": 4.5594, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4484822989853156e-05, |
|
"loss": 4.5079, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4475811262712394e-05, |
|
"loss": 4.5475, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.446679953557164e-05, |
|
"loss": 4.4948, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.445778780843087e-05, |
|
"loss": 4.5257, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4448776081290106e-05, |
|
"loss": 4.4913, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.443976435414934e-05, |
|
"loss": 4.5662, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.443075262700858e-05, |
|
"loss": 4.5267, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.442174089986782e-05, |
|
"loss": 4.4999, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4412729172727055e-05, |
|
"loss": 4.5111, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.440371744558629e-05, |
|
"loss": 4.5107, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.439470571844553e-05, |
|
"loss": 4.5682, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.438569399130477e-05, |
|
"loss": 4.5016, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4376682264164005e-05, |
|
"loss": 4.4973, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.436767053702324e-05, |
|
"loss": 4.5022, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.435865880988248e-05, |
|
"loss": 4.5441, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.434964708274171e-05, |
|
"loss": 4.5459, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4340635355600955e-05, |
|
"loss": 4.5141, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.433162362846019e-05, |
|
"loss": 4.5329, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.432261190131943e-05, |
|
"loss": 4.5172, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.431360017417867e-05, |
|
"loss": 4.5264, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4304588447037904e-05, |
|
"loss": 4.5352, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4295576719897135e-05, |
|
"loss": 4.5287, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.428656499275637e-05, |
|
"loss": 4.5314, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.427755326561561e-05, |
|
"loss": 4.5219, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.426854153847485e-05, |
|
"loss": 4.5136, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.425952981133409e-05, |
|
"loss": 4.483, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.425051808419333e-05, |
|
"loss": 4.4569, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4241506357052566e-05, |
|
"loss": 4.522, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4232494629911797e-05, |
|
"loss": 4.4795, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4223482902771034e-05, |
|
"loss": 4.5522, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.421447117563027e-05, |
|
"loss": 4.49, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.420545944848951e-05, |
|
"loss": 4.5291, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4196447721348746e-05, |
|
"loss": 4.5003, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.418743599420799e-05, |
|
"loss": 4.4768, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.417842426706722e-05, |
|
"loss": 4.498, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.416941253992646e-05, |
|
"loss": 4.4942, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4160400812785696e-05, |
|
"loss": 4.5224, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.415138908564493e-05, |
|
"loss": 4.4901, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.414237735850417e-05, |
|
"loss": 4.4871, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.413336563136341e-05, |
|
"loss": 4.4594, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4124353904222645e-05, |
|
"loss": 4.515, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.411534217708188e-05, |
|
"loss": 4.4832, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.410633044994112e-05, |
|
"loss": 4.5075, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.409731872280036e-05, |
|
"loss": 4.4815, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4088306995659595e-05, |
|
"loss": 4.5843, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.407929526851883e-05, |
|
"loss": 4.4808, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.407028354137806e-05, |
|
"loss": 4.5299, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.40612718142373e-05, |
|
"loss": 4.4625, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.4052260087096545e-05, |
|
"loss": 4.5026, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.404324835995578e-05, |
|
"loss": 4.4895, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.403423663281502e-05, |
|
"loss": 4.5502, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.402522490567426e-05, |
|
"loss": 4.4647, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.401621317853349e-05, |
|
"loss": 4.4907, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.4007201451392725e-05, |
|
"loss": 4.4623, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.399818972425196e-05, |
|
"loss": 4.5451, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.39891779971112e-05, |
|
"loss": 4.5007, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.3980166269970444e-05, |
|
"loss": 4.507, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.397115454282968e-05, |
|
"loss": 4.4595, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.396214281568892e-05, |
|
"loss": 4.5234, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.395313108854815e-05, |
|
"loss": 4.4674, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.394411936140739e-05, |
|
"loss": 4.4806, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.3935107634266624e-05, |
|
"loss": 4.4845, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.392609590712586e-05, |
|
"loss": 4.5202, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.39170841799851e-05, |
|
"loss": 4.5301, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.390807245284434e-05, |
|
"loss": 4.5139, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3899060725703574e-05, |
|
"loss": 4.4715, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.389004899856281e-05, |
|
"loss": 4.4752, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.388103727142205e-05, |
|
"loss": 4.4945, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3872025544281286e-05, |
|
"loss": 4.4648, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.386301381714052e-05, |
|
"loss": 4.5011, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.385400208999976e-05, |
|
"loss": 4.527, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3844990362859e-05, |
|
"loss": 4.5182, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3835978635718235e-05, |
|
"loss": 4.4455, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.382696690857747e-05, |
|
"loss": 4.4408, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.381795518143671e-05, |
|
"loss": 4.4726, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.380894345429595e-05, |
|
"loss": 4.4912, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3799931727155185e-05, |
|
"loss": 4.4715, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3790920000014416e-05, |
|
"loss": 4.4808, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.378190827287365e-05, |
|
"loss": 4.469, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.37728965457329e-05, |
|
"loss": 4.5089, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3763884818592135e-05, |
|
"loss": 4.4609, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.375487309145137e-05, |
|
"loss": 4.5222, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.374586136431061e-05, |
|
"loss": 4.4459, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.373684963716985e-05, |
|
"loss": 4.5242, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.372783791002908e-05, |
|
"loss": 4.4754, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3718826182888315e-05, |
|
"loss": 4.4723, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.370981445574755e-05, |
|
"loss": 4.4723, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3700802728606796e-05, |
|
"loss": 4.489, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3691791001466034e-05, |
|
"loss": 4.4956, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.368277927432527e-05, |
|
"loss": 4.5123, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.36737675471845e-05, |
|
"loss": 4.5453, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.366475582004374e-05, |
|
"loss": 4.4737, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.365574409290298e-05, |
|
"loss": 4.4692, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3646732365762214e-05, |
|
"loss": 4.4465, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.363772063862145e-05, |
|
"loss": 4.4285, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3628708911480696e-05, |
|
"loss": 4.5237, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3619697184339926e-05, |
|
"loss": 4.5195, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3610685457199164e-05, |
|
"loss": 4.492, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.36016737300584e-05, |
|
"loss": 4.485, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.359266200291764e-05, |
|
"loss": 4.4856, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3583650275776876e-05, |
|
"loss": 4.5072, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.357463854863611e-05, |
|
"loss": 4.4757, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.356562682149535e-05, |
|
"loss": 4.4942, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.355661509435459e-05, |
|
"loss": 4.4687, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3547603367213825e-05, |
|
"loss": 4.4391, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.353859164007306e-05, |
|
"loss": 4.4766, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.35295799129323e-05, |
|
"loss": 4.5417, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.352056818579154e-05, |
|
"loss": 4.4657, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3511556458650775e-05, |
|
"loss": 4.5017, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3502544731510006e-05, |
|
"loss": 4.4788, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.349353300436925e-05, |
|
"loss": 4.4984, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.348452127722849e-05, |
|
"loss": 4.543, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3475509550087725e-05, |
|
"loss": 4.4832, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.346649782294696e-05, |
|
"loss": 4.5137, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.34574860958062e-05, |
|
"loss": 4.4928, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.344847436866543e-05, |
|
"loss": 4.4666, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.343946264152467e-05, |
|
"loss": 4.4647, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3430450914383905e-05, |
|
"loss": 4.4378, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.342143918724315e-05, |
|
"loss": 4.4485, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3412427460102386e-05, |
|
"loss": 4.4648, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3403415732961624e-05, |
|
"loss": 4.4384, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3394404005820855e-05, |
|
"loss": 4.4319, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.338539227868009e-05, |
|
"loss": 4.4791, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.337638055153933e-05, |
|
"loss": 4.5161, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.336736882439857e-05, |
|
"loss": 4.4157, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3358357097257804e-05, |
|
"loss": 4.4717, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.334934537011704e-05, |
|
"loss": 4.479, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.334033364297628e-05, |
|
"loss": 4.4731, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3331321915835516e-05, |
|
"loss": 4.4899, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3322310188694754e-05, |
|
"loss": 4.502, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.331329846155399e-05, |
|
"loss": 4.4384, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.330428673441323e-05, |
|
"loss": 4.4643, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3295275007272466e-05, |
|
"loss": 4.4491, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.32862632801317e-05, |
|
"loss": 4.409, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.327725155299094e-05, |
|
"loss": 4.4673, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.326823982585018e-05, |
|
"loss": 4.4603, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3259228098709416e-05, |
|
"loss": 4.4743, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.325021637156865e-05, |
|
"loss": 4.4634, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.324120464442789e-05, |
|
"loss": 4.4576, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.323219291728713e-05, |
|
"loss": 4.484, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.322318119014636e-05, |
|
"loss": 4.431, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.32141694630056e-05, |
|
"loss": 4.5062, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.320515773586484e-05, |
|
"loss": 4.4464, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.319614600872408e-05, |
|
"loss": 4.4734, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3187134281583315e-05, |
|
"loss": 4.4615, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.317812255444255e-05, |
|
"loss": 4.4707, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.316911082730178e-05, |
|
"loss": 4.5084, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.316009910016102e-05, |
|
"loss": 4.4335, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.315108737302026e-05, |
|
"loss": 4.5115, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.31420756458795e-05, |
|
"loss": 4.495, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.313306391873874e-05, |
|
"loss": 4.5261, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3124052191597977e-05, |
|
"loss": 4.5579, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.311504046445721e-05, |
|
"loss": 4.4861, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3106028737316445e-05, |
|
"loss": 4.4294, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.309701701017568e-05, |
|
"loss": 4.4975, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.308800528303492e-05, |
|
"loss": 4.4527, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.307899355589416e-05, |
|
"loss": 4.4521, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3069981828753394e-05, |
|
"loss": 4.5024, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.306097010161263e-05, |
|
"loss": 4.4181, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.305195837447187e-05, |
|
"loss": 4.4786, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3042946647331106e-05, |
|
"loss": 4.4245, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3033934920190344e-05, |
|
"loss": 4.4878, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.302492319304958e-05, |
|
"loss": 4.4427, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.301591146590882e-05, |
|
"loss": 4.3975, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3006899738768056e-05, |
|
"loss": 4.4566, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.299788801162729e-05, |
|
"loss": 4.4441, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.298887628448653e-05, |
|
"loss": 4.4606, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.297986455734577e-05, |
|
"loss": 4.5232, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.2970852830205006e-05, |
|
"loss": 4.464, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.296184110306424e-05, |
|
"loss": 4.3564, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.295282937592348e-05, |
|
"loss": 4.4523, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.294381764878271e-05, |
|
"loss": 4.4288, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.2934805921641955e-05, |
|
"loss": 4.3775, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.292579419450119e-05, |
|
"loss": 4.4579, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.291678246736043e-05, |
|
"loss": 4.4408, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.290777074021967e-05, |
|
"loss": 4.4396, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2898759013078905e-05, |
|
"loss": 4.4438, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2889747285938135e-05, |
|
"loss": 4.46, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.288073555879737e-05, |
|
"loss": 4.522, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.287172383165661e-05, |
|
"loss": 4.471, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.286271210451585e-05, |
|
"loss": 4.3984, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.285370037737509e-05, |
|
"loss": 4.4826, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.284468865023433e-05, |
|
"loss": 4.4343, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.283567692309356e-05, |
|
"loss": 4.4479, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.28266651959528e-05, |
|
"loss": 4.4768, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2817653468812035e-05, |
|
"loss": 4.4508, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.280864174167127e-05, |
|
"loss": 4.4707, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.279963001453051e-05, |
|
"loss": 4.4679, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.279061828738975e-05, |
|
"loss": 4.3981, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.278160656024899e-05, |
|
"loss": 4.4731, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.277259483310822e-05, |
|
"loss": 4.4104, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.276358310596746e-05, |
|
"loss": 4.4197, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2754571378826696e-05, |
|
"loss": 4.4555, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2745559651685934e-05, |
|
"loss": 4.4885, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.273654792454517e-05, |
|
"loss": 4.3961, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.272753619740441e-05, |
|
"loss": 4.4982, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2718524470263646e-05, |
|
"loss": 4.4241, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2709512743122883e-05, |
|
"loss": 4.4929, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.270050101598212e-05, |
|
"loss": 4.438, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.269148928884136e-05, |
|
"loss": 4.4581, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2682477561700596e-05, |
|
"loss": 4.4261, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.267346583455983e-05, |
|
"loss": 4.4314, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2664454107419064e-05, |
|
"loss": 4.5199, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.26554423802783e-05, |
|
"loss": 4.3874, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2646430653137545e-05, |
|
"loss": 4.4064, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.263741892599678e-05, |
|
"loss": 4.4102, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.262840719885602e-05, |
|
"loss": 4.4532, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.261939547171526e-05, |
|
"loss": 4.4605, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.261038374457449e-05, |
|
"loss": 4.4169, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2601372017433725e-05, |
|
"loss": 4.4427, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.259236029029296e-05, |
|
"loss": 4.4733, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.25833485631522e-05, |
|
"loss": 4.5038, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2574336836011444e-05, |
|
"loss": 4.4452, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.256532510887068e-05, |
|
"loss": 4.4282, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.255631338172992e-05, |
|
"loss": 4.4557, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.254730165458915e-05, |
|
"loss": 4.4631, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.253828992744839e-05, |
|
"loss": 4.4623, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2529278200307625e-05, |
|
"loss": 4.4419, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.252026647316686e-05, |
|
"loss": 4.4337, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.25112547460261e-05, |
|
"loss": 4.4549, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2502243018885344e-05, |
|
"loss": 4.4857, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2493231291744574e-05, |
|
"loss": 4.4788, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.248421956460381e-05, |
|
"loss": 4.4158, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.247520783746305e-05, |
|
"loss": 4.4255, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2466196110322286e-05, |
|
"loss": 4.5044, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2457184383181524e-05, |
|
"loss": 4.352, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.244817265604076e-05, |
|
"loss": 4.4666, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.24391609289e-05, |
|
"loss": 4.4477, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2430149201759236e-05, |
|
"loss": 4.4425, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2421137474618473e-05, |
|
"loss": 4.5022, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.241212574747771e-05, |
|
"loss": 4.3832, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.240311402033695e-05, |
|
"loss": 4.4686, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2394102293196186e-05, |
|
"loss": 4.4394, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2385090566055416e-05, |
|
"loss": 4.4662, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2376078838914654e-05, |
|
"loss": 4.4482, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.23670671117739e-05, |
|
"loss": 4.4238, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2358055384633135e-05, |
|
"loss": 4.399, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.234904365749237e-05, |
|
"loss": 4.4646, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.234003193035161e-05, |
|
"loss": 4.4333, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.233102020321084e-05, |
|
"loss": 4.4222, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.232200847607008e-05, |
|
"loss": 4.4807, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2312996748929315e-05, |
|
"loss": 4.4585, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.230398502178855e-05, |
|
"loss": 4.4629, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.22949732946478e-05, |
|
"loss": 4.3969, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2285961567507034e-05, |
|
"loss": 4.4375, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.227694984036627e-05, |
|
"loss": 4.4462, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.22679381132255e-05, |
|
"loss": 4.3994, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.225892638608474e-05, |
|
"loss": 4.441, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.224991465894398e-05, |
|
"loss": 4.4581, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2240902931803215e-05, |
|
"loss": 4.4271, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.223189120466245e-05, |
|
"loss": 4.4268, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2222879477521696e-05, |
|
"loss": 4.4299, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.221386775038093e-05, |
|
"loss": 4.4034, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2204856023240164e-05, |
|
"loss": 4.5251, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.21958442960994e-05, |
|
"loss": 4.4132, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.218683256895864e-05, |
|
"loss": 4.4342, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2177820841817877e-05, |
|
"loss": 4.4125, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2168809114677114e-05, |
|
"loss": 4.4599, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.215979738753635e-05, |
|
"loss": 4.3972, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.215078566039559e-05, |
|
"loss": 4.5031, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2141773933254826e-05, |
|
"loss": 4.4313, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2132762206114064e-05, |
|
"loss": 4.4108, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.21237504789733e-05, |
|
"loss": 4.4509, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.211473875183254e-05, |
|
"loss": 4.4684, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.210572702469177e-05, |
|
"loss": 4.4394, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2096715297551006e-05, |
|
"loss": 4.3804, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.208770357041025e-05, |
|
"loss": 4.4641, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.207869184326949e-05, |
|
"loss": 4.3934, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.2069680116128725e-05, |
|
"loss": 4.3989, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.206066838898796e-05, |
|
"loss": 4.447, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.20516566618472e-05, |
|
"loss": 4.4046, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.204264493470643e-05, |
|
"loss": 4.4264, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.203363320756567e-05, |
|
"loss": 4.3891, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.2024621480424906e-05, |
|
"loss": 4.4143, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.201560975328415e-05, |
|
"loss": 4.4362, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.200659802614339e-05, |
|
"loss": 4.4681, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.1997586299002625e-05, |
|
"loss": 4.4628, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.1988574571861855e-05, |
|
"loss": 4.4444, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.197956284472109e-05, |
|
"loss": 4.3894, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.197055111758033e-05, |
|
"loss": 4.4775, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.196153939043957e-05, |
|
"loss": 4.3898, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.1952527663298805e-05, |
|
"loss": 4.4591, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.194351593615804e-05, |
|
"loss": 4.4336, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.193450420901728e-05, |
|
"loss": 4.4063, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.192549248187652e-05, |
|
"loss": 4.4326, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1916480754735754e-05, |
|
"loss": 4.4418, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.190746902759499e-05, |
|
"loss": 4.4141, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.189845730045423e-05, |
|
"loss": 4.3698, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1889445573313467e-05, |
|
"loss": 4.4296, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1880433846172704e-05, |
|
"loss": 4.4399, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.187142211903194e-05, |
|
"loss": 4.4123, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.186241039189118e-05, |
|
"loss": 4.3735, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1853398664750416e-05, |
|
"loss": 4.3984, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1844386937609654e-05, |
|
"loss": 4.4167, |
|
"step": 452500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.183537521046889e-05, |
|
"loss": 4.3666, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.182636348332813e-05, |
|
"loss": 4.4422, |
|
"step": 453500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.181735175618736e-05, |
|
"loss": 4.3986, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.18083400290466e-05, |
|
"loss": 4.4333, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.179932830190584e-05, |
|
"loss": 4.4112, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.179031657476508e-05, |
|
"loss": 4.42, |
|
"step": 455500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1781304847624315e-05, |
|
"loss": 4.4114, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.177229312048355e-05, |
|
"loss": 4.3842, |
|
"step": 456500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1763281393342783e-05, |
|
"loss": 4.4399, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.175426966620202e-05, |
|
"loss": 4.4283, |
|
"step": 457500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.174525793906126e-05, |
|
"loss": 4.4357, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1736246211920496e-05, |
|
"loss": 4.4383, |
|
"step": 458500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.172723448477974e-05, |
|
"loss": 4.4069, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.171822275763898e-05, |
|
"loss": 4.4093, |
|
"step": 459500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.170921103049821e-05, |
|
"loss": 4.4003, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1700199303357445e-05, |
|
"loss": 4.3554, |
|
"step": 460500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.169118757621668e-05, |
|
"loss": 4.4233, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.168217584907592e-05, |
|
"loss": 4.4297, |
|
"step": 461500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.167316412193516e-05, |
|
"loss": 4.41, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1664152394794395e-05, |
|
"loss": 4.4319, |
|
"step": 462500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.165514066765363e-05, |
|
"loss": 4.4113, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.164612894051287e-05, |
|
"loss": 4.4162, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.163711721337211e-05, |
|
"loss": 4.437, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1628105486231344e-05, |
|
"loss": 4.4412, |
|
"step": 464500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.161909375909058e-05, |
|
"loss": 4.4154, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.161008203194982e-05, |
|
"loss": 4.4167, |
|
"step": 465500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1601070304809057e-05, |
|
"loss": 4.4659, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1592058577668294e-05, |
|
"loss": 4.4041, |
|
"step": 466500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.158304685052753e-05, |
|
"loss": 4.4115, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.157403512338677e-05, |
|
"loss": 4.4393, |
|
"step": 467500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1565023396246006e-05, |
|
"loss": 4.3725, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1556011669105244e-05, |
|
"loss": 4.4011, |
|
"step": 468500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.154699994196448e-05, |
|
"loss": 4.353, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.153798821482371e-05, |
|
"loss": 4.3823, |
|
"step": 469500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.152897648768295e-05, |
|
"loss": 4.4488, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.151996476054219e-05, |
|
"loss": 4.4014, |
|
"step": 470500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.151095303340143e-05, |
|
"loss": 4.3857, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.150194130626067e-05, |
|
"loss": 4.4427, |
|
"step": 471500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1492929579119905e-05, |
|
"loss": 4.4374, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1483917851979136e-05, |
|
"loss": 4.3678, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1474906124838373e-05, |
|
"loss": 4.3926, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.146589439769761e-05, |
|
"loss": 4.426, |
|
"step": 473500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.145688267055685e-05, |
|
"loss": 4.4384, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.144787094341609e-05, |
|
"loss": 4.4226, |
|
"step": 474500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.143885921627533e-05, |
|
"loss": 4.3985, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.142984748913456e-05, |
|
"loss": 4.3802, |
|
"step": 475500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.14208357619938e-05, |
|
"loss": 4.4457, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1411824034853035e-05, |
|
"loss": 4.4333, |
|
"step": 476500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.140281230771227e-05, |
|
"loss": 4.3906, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.139380058057151e-05, |
|
"loss": 4.3618, |
|
"step": 477500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.138478885343075e-05, |
|
"loss": 4.4389, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1375777126289985e-05, |
|
"loss": 4.354, |
|
"step": 478500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.136676539914922e-05, |
|
"loss": 4.4335, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.135775367200846e-05, |
|
"loss": 4.405, |
|
"step": 479500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.13487419448677e-05, |
|
"loss": 4.4223, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1339730217726934e-05, |
|
"loss": 4.4074, |
|
"step": 480500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.133071849058617e-05, |
|
"loss": 4.3557, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.132170676344541e-05, |
|
"loss": 4.4342, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.131269503630465e-05, |
|
"loss": 4.3986, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1303683309163884e-05, |
|
"loss": 4.4292, |
|
"step": 482500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.129467158202312e-05, |
|
"loss": 4.4526, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.128565985488236e-05, |
|
"loss": 4.4217, |
|
"step": 483500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1276648127741596e-05, |
|
"loss": 4.3949, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1267636400600834e-05, |
|
"loss": 4.4406, |
|
"step": 484500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1258624673460064e-05, |
|
"loss": 4.4383, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.12496129463193e-05, |
|
"loss": 4.4106, |
|
"step": 485500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1240601219178546e-05, |
|
"loss": 4.382, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.123158949203778e-05, |
|
"loss": 4.3267, |
|
"step": 486500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.122257776489702e-05, |
|
"loss": 4.4221, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.121356603775626e-05, |
|
"loss": 4.3966, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.120455431061549e-05, |
|
"loss": 4.3824, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1195542583474726e-05, |
|
"loss": 4.4346, |
|
"step": 488500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1186530856333963e-05, |
|
"loss": 4.3681, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.11775191291932e-05, |
|
"loss": 4.405, |
|
"step": 489500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1168507402052445e-05, |
|
"loss": 4.4267, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.115949567491168e-05, |
|
"loss": 4.4356, |
|
"step": 490500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.115048394777091e-05, |
|
"loss": 4.3915, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.114147222063015e-05, |
|
"loss": 4.4071, |
|
"step": 491500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.113246049348939e-05, |
|
"loss": 4.4558, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1123448766348625e-05, |
|
"loss": 4.4161, |
|
"step": 492500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.111443703920786e-05, |
|
"loss": 4.4322, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.11054253120671e-05, |
|
"loss": 4.4165, |
|
"step": 493500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1096413584926344e-05, |
|
"loss": 4.3936, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1087401857785575e-05, |
|
"loss": 4.464, |
|
"step": 494500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.107839013064481e-05, |
|
"loss": 4.4453, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.106937840350405e-05, |
|
"loss": 4.4496, |
|
"step": 495500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.106036667636329e-05, |
|
"loss": 4.4243, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.1051354949222524e-05, |
|
"loss": 4.4202, |
|
"step": 496500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.104234322208176e-05, |
|
"loss": 4.4393, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.1033331494941e-05, |
|
"loss": 4.3986, |
|
"step": 497500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.102431976780024e-05, |
|
"loss": 4.3453, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.1015308040659474e-05, |
|
"loss": 4.4282, |
|
"step": 498500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.100629631351871e-05, |
|
"loss": 4.4063, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.099728458637795e-05, |
|
"loss": 4.4041, |
|
"step": 499500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.0988272859237186e-05, |
|
"loss": 4.4121, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.097926113209642e-05, |
|
"loss": 4.3535, |
|
"step": 500500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.0970249404955654e-05, |
|
"loss": 4.4579, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.09612376778149e-05, |
|
"loss": 4.4047, |
|
"step": 501500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.0952225950674136e-05, |
|
"loss": 4.3953, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.094321422353337e-05, |
|
"loss": 4.3709, |
|
"step": 502500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.093420249639261e-05, |
|
"loss": 4.4017, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.092519076925184e-05, |
|
"loss": 4.3861, |
|
"step": 503500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.091617904211108e-05, |
|
"loss": 4.4664, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0907167314970316e-05, |
|
"loss": 4.4029, |
|
"step": 504500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0898155587829554e-05, |
|
"loss": 4.386, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.08891438606888e-05, |
|
"loss": 4.3983, |
|
"step": 505500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0880132133548035e-05, |
|
"loss": 4.3899, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.087112040640727e-05, |
|
"loss": 4.3988, |
|
"step": 506500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.08621086792665e-05, |
|
"loss": 4.3771, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.085309695212574e-05, |
|
"loss": 4.3443, |
|
"step": 507500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.084408522498498e-05, |
|
"loss": 4.3714, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0835073497844215e-05, |
|
"loss": 4.3909, |
|
"step": 508500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.082606177070345e-05, |
|
"loss": 4.4214, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.081705004356269e-05, |
|
"loss": 4.4305, |
|
"step": 509500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.080803831642193e-05, |
|
"loss": 4.3784, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0799026589281165e-05, |
|
"loss": 4.4198, |
|
"step": 510500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.07900148621404e-05, |
|
"loss": 4.3954, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.078100313499964e-05, |
|
"loss": 4.4075, |
|
"step": 511500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.077199140785888e-05, |
|
"loss": 4.4065, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0762979680718115e-05, |
|
"loss": 4.4122, |
|
"step": 512500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.075396795357735e-05, |
|
"loss": 4.4, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.074495622643659e-05, |
|
"loss": 4.3722, |
|
"step": 513500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.073594449929583e-05, |
|
"loss": 4.3375, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0726932772155064e-05, |
|
"loss": 4.3655, |
|
"step": 514500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.07179210450143e-05, |
|
"loss": 4.3714, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.070890931787354e-05, |
|
"loss": 4.4154, |
|
"step": 515500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.069989759073277e-05, |
|
"loss": 4.4121, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.069088586359201e-05, |
|
"loss": 4.4102, |
|
"step": 516500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.068187413645125e-05, |
|
"loss": 4.3882, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.067286240931049e-05, |
|
"loss": 4.4476, |
|
"step": 517500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0663850682169726e-05, |
|
"loss": 4.3978, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.065483895502896e-05, |
|
"loss": 4.4405, |
|
"step": 518500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.06458272278882e-05, |
|
"loss": 4.3647, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.063681550074743e-05, |
|
"loss": 4.3729, |
|
"step": 519500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.062780377360667e-05, |
|
"loss": 4.4138, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0618792046465906e-05, |
|
"loss": 4.3248, |
|
"step": 520500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0609780319325144e-05, |
|
"loss": 4.422, |
|
"step": 521000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.060076859218439e-05, |
|
"loss": 4.3538, |
|
"step": 521500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0591756865043625e-05, |
|
"loss": 4.4099, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0582745137902856e-05, |
|
"loss": 4.4193, |
|
"step": 522500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.057373341076209e-05, |
|
"loss": 4.3988, |
|
"step": 523000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.056472168362133e-05, |
|
"loss": 4.4022, |
|
"step": 523500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.055570995648057e-05, |
|
"loss": 4.3413, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0546698229339805e-05, |
|
"loss": 4.434, |
|
"step": 524500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.053768650219904e-05, |
|
"loss": 4.3744, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.052867477505828e-05, |
|
"loss": 4.418, |
|
"step": 525500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.051966304791752e-05, |
|
"loss": 4.3814, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0510651320776755e-05, |
|
"loss": 4.3454, |
|
"step": 526500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.050163959363599e-05, |
|
"loss": 4.3251, |
|
"step": 527000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.049262786649523e-05, |
|
"loss": 4.4182, |
|
"step": 527500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.048361613935447e-05, |
|
"loss": 4.3319, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0474604412213705e-05, |
|
"loss": 4.3861, |
|
"step": 528500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.046559268507294e-05, |
|
"loss": 4.4092, |
|
"step": 529000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.045658095793218e-05, |
|
"loss": 4.397, |
|
"step": 529500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.044756923079142e-05, |
|
"loss": 4.3839, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0438557503650654e-05, |
|
"loss": 4.4383, |
|
"step": 530500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.042954577650989e-05, |
|
"loss": 4.4198, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.042053404936912e-05, |
|
"loss": 4.3632, |
|
"step": 531500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.041152232222836e-05, |
|
"loss": 4.3722, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.04025105950876e-05, |
|
"loss": 4.3664, |
|
"step": 532500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.039349886794684e-05, |
|
"loss": 4.357, |
|
"step": 533000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.038448714080608e-05, |
|
"loss": 4.3484, |
|
"step": 533500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0375475413665316e-05, |
|
"loss": 4.4506, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0366463686524553e-05, |
|
"loss": 4.37, |
|
"step": 534500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0357451959383784e-05, |
|
"loss": 4.3452, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.034844023224302e-05, |
|
"loss": 4.4018, |
|
"step": 535500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.033942850510226e-05, |
|
"loss": 4.4079, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0330416777961496e-05, |
|
"loss": 4.3569, |
|
"step": 536500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.032140505082074e-05, |
|
"loss": 4.3495, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.031239332367998e-05, |
|
"loss": 4.3752, |
|
"step": 537500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.030338159653921e-05, |
|
"loss": 4.3821, |
|
"step": 538000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0294369869398446e-05, |
|
"loss": 4.431, |
|
"step": 538500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.028535814225768e-05, |
|
"loss": 4.3057, |
|
"step": 539000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.027634641511692e-05, |
|
"loss": 4.3249, |
|
"step": 539500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.026733468797616e-05, |
|
"loss": 4.3181, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0258322960835395e-05, |
|
"loss": 4.3905, |
|
"step": 540500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.024931123369463e-05, |
|
"loss": 4.3406, |
|
"step": 541000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.024029950655387e-05, |
|
"loss": 4.3545, |
|
"step": 541500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.023128777941311e-05, |
|
"loss": 4.3554, |
|
"step": 542000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0222276052272345e-05, |
|
"loss": 4.4182, |
|
"step": 542500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.021326432513158e-05, |
|
"loss": 4.4599, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.020425259799082e-05, |
|
"loss": 4.326, |
|
"step": 543500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.019524087085006e-05, |
|
"loss": 4.3247, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0186229143709295e-05, |
|
"loss": 4.4027, |
|
"step": 544500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.017721741656853e-05, |
|
"loss": 4.315, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.016820568942777e-05, |
|
"loss": 4.3967, |
|
"step": 545500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.015919396228701e-05, |
|
"loss": 4.3808, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0150182235146244e-05, |
|
"loss": 4.3609, |
|
"step": 546500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.014117050800548e-05, |
|
"loss": 4.3969, |
|
"step": 547000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.013215878086471e-05, |
|
"loss": 4.3735, |
|
"step": 547500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.012314705372395e-05, |
|
"loss": 4.3567, |
|
"step": 548000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0114135326583194e-05, |
|
"loss": 4.3614, |
|
"step": 548500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.010512359944243e-05, |
|
"loss": 4.3611, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.009611187230167e-05, |
|
"loss": 4.376, |
|
"step": 549500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0087100145160906e-05, |
|
"loss": 4.3128, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.007808841802014e-05, |
|
"loss": 4.3885, |
|
"step": 550500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.0069076690879374e-05, |
|
"loss": 4.3767, |
|
"step": 551000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.006006496373861e-05, |
|
"loss": 4.3457, |
|
"step": 551500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.005105323659785e-05, |
|
"loss": 4.462, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.004204150945709e-05, |
|
"loss": 4.3849, |
|
"step": 552500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.003302978231633e-05, |
|
"loss": 4.3644, |
|
"step": 553000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.002401805517556e-05, |
|
"loss": 4.3445, |
|
"step": 553500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.00150063280348e-05, |
|
"loss": 4.3573, |
|
"step": 554000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.0005994600894036e-05, |
|
"loss": 4.3702, |
|
"step": 554500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.999698287375327e-05, |
|
"loss": 4.335, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.998797114661251e-05, |
|
"loss": 4.3592, |
|
"step": 555500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.997895941947175e-05, |
|
"loss": 4.3702, |
|
"step": 556000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.9969947692330985e-05, |
|
"loss": 4.3976, |
|
"step": 556500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.996093596519022e-05, |
|
"loss": 4.3542, |
|
"step": 557000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.995192423804946e-05, |
|
"loss": 4.3243, |
|
"step": 557500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.99429125109087e-05, |
|
"loss": 4.3865, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.9933900783767935e-05, |
|
"loss": 4.3937, |
|
"step": 558500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.992488905662717e-05, |
|
"loss": 4.4588, |
|
"step": 559000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.991587732948641e-05, |
|
"loss": 4.3763, |
|
"step": 559500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.990686560234565e-05, |
|
"loss": 4.3972, |
|
"step": 560000 |
|
} |
|
], |
|
"max_steps": 2774163, |
|
"num_train_epochs": 3, |
|
"total_flos": 3.658088448e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|