|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9995835068721366, |
|
"global_step": 168000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9940500981733805e-05, |
|
"loss": 1.1973, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9881001963467605e-05, |
|
"loss": 1.2168, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9821502945201405e-05, |
|
"loss": 1.1454, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9762003926935208e-05, |
|
"loss": 1.1647, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9702504908669008e-05, |
|
"loss": 1.1795, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.964300589040281e-05, |
|
"loss": 1.1723, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.958350687213661e-05, |
|
"loss": 1.157, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.952400785387041e-05, |
|
"loss": 1.1699, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9464508835604215e-05, |
|
"loss": 1.1593, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9405009817338018e-05, |
|
"loss": 1.1421, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9345510799071814e-05, |
|
"loss": 1.0648, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9286011780805618e-05, |
|
"loss": 1.1345, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.922651276253942e-05, |
|
"loss": 1.1722, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.916701374427322e-05, |
|
"loss": 1.1588, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.910751472600702e-05, |
|
"loss": 1.1502, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9048015707740824e-05, |
|
"loss": 1.1297, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8988516689474624e-05, |
|
"loss": 1.1478, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8929017671208427e-05, |
|
"loss": 1.1025, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8869518652942227e-05, |
|
"loss": 1.126, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8810019634676027e-05, |
|
"loss": 1.1438, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.875052061640983e-05, |
|
"loss": 1.1064, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8691021598143634e-05, |
|
"loss": 1.1369, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8631522579877434e-05, |
|
"loss": 1.1036, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8572023561611234e-05, |
|
"loss": 1.0771, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8512524543345037e-05, |
|
"loss": 1.0841, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8453025525078837e-05, |
|
"loss": 1.0818, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.839352650681264e-05, |
|
"loss": 1.1462, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.833402748854644e-05, |
|
"loss": 1.1335, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.827452847028024e-05, |
|
"loss": 1.079, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8215029452014044e-05, |
|
"loss": 1.1276, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8155530433747847e-05, |
|
"loss": 1.1119, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8096031415481647e-05, |
|
"loss": 1.1343, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8036532397215447e-05, |
|
"loss": 1.0787, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.797703337894925e-05, |
|
"loss": 1.1122, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.791753436068305e-05, |
|
"loss": 1.0904, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.785803534241685e-05, |
|
"loss": 1.0882, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7798536324150653e-05, |
|
"loss": 1.1311, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7739037305884453e-05, |
|
"loss": 1.0936, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7679538287618257e-05, |
|
"loss": 1.0959, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7620039269352056e-05, |
|
"loss": 1.0879, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7560540251085856e-05, |
|
"loss": 1.0763, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.750104123281966e-05, |
|
"loss": 1.0713, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7441542214553463e-05, |
|
"loss": 1.1204, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7382043196287263e-05, |
|
"loss": 1.0808, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7322544178021063e-05, |
|
"loss": 1.1301, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7263045159754866e-05, |
|
"loss": 1.1141, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7203546141488666e-05, |
|
"loss": 1.109, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.714404712322247e-05, |
|
"loss": 1.0691, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.708454810495627e-05, |
|
"loss": 1.0805, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.702504908669007e-05, |
|
"loss": 1.0795, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6965550068423873e-05, |
|
"loss": 1.0431, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6906051050157676e-05, |
|
"loss": 1.0553, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6846552031891476e-05, |
|
"loss": 1.0786, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6787053013625276e-05, |
|
"loss": 1.1008, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.672755399535908e-05, |
|
"loss": 1.0776, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.666805497709288e-05, |
|
"loss": 1.0993, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.660855595882668e-05, |
|
"loss": 1.1101, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6549056940560482e-05, |
|
"loss": 1.0791, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6489557922294282e-05, |
|
"loss": 1.0705, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6430058904028086e-05, |
|
"loss": 1.0618, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6370559885761885e-05, |
|
"loss": 1.0408, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.631106086749569e-05, |
|
"loss": 1.1367, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.625156184922949e-05, |
|
"loss": 1.0727, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6192062830963292e-05, |
|
"loss": 1.0444, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6132563812697092e-05, |
|
"loss": 1.0723, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.6073064794430892e-05, |
|
"loss": 1.0627, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.6013565776164695e-05, |
|
"loss": 1.0835, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.5954066757898495e-05, |
|
"loss": 1.1624, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.58945677396323e-05, |
|
"loss": 1.0383, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.58350687213661e-05, |
|
"loss": 1.0805, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.57755697030999e-05, |
|
"loss": 1.0426, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.57160706848337e-05, |
|
"loss": 1.1281, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5656571666567505e-05, |
|
"loss": 1.0695, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5597072648301305e-05, |
|
"loss": 1.0617, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5537573630035105e-05, |
|
"loss": 1.0849, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5478074611768908e-05, |
|
"loss": 1.0923, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5418575593502708e-05, |
|
"loss": 1.1421, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5359076575236508e-05, |
|
"loss": 1.0981, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.529957755697031e-05, |
|
"loss": 1.0991, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5240078538704113e-05, |
|
"loss": 1.0568, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5180579520437915e-05, |
|
"loss": 1.1249, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5121080502171714e-05, |
|
"loss": 1.0764, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.5061581483905516e-05, |
|
"loss": 1.0582, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.5002082465639318e-05, |
|
"loss": 1.0795, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.494258344737312e-05, |
|
"loss": 1.0514, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.488308442910692e-05, |
|
"loss": 1.0427, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4823585410840723e-05, |
|
"loss": 1.0842, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4764086392574524e-05, |
|
"loss": 1.0961, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4704587374308326e-05, |
|
"loss": 1.1226, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4645088356042127e-05, |
|
"loss": 1.0607, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4585589337775927e-05, |
|
"loss": 1.0479, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4526090319509729e-05, |
|
"loss": 1.0572, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.446659130124353e-05, |
|
"loss": 1.1465, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4407092282977332e-05, |
|
"loss": 1.041, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4347593264711132e-05, |
|
"loss": 1.0366, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4288094246444934e-05, |
|
"loss": 1.0661, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4228595228178737e-05, |
|
"loss": 1.0495, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4169096209912539e-05, |
|
"loss": 1.0769, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4109597191646339e-05, |
|
"loss": 1.0329, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.405009817338014e-05, |
|
"loss": 1.0702, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.3990599155113942e-05, |
|
"loss": 1.0354, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.3931100136847744e-05, |
|
"loss": 1.0817, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.3871601118581544e-05, |
|
"loss": 1.0362, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.3812102100315345e-05, |
|
"loss": 1.0761, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.3752603082049147e-05, |
|
"loss": 1.0295, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.369310406378295e-05, |
|
"loss": 1.0365, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3633605045516748e-05, |
|
"loss": 1.0378, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3574106027250552e-05, |
|
"loss": 1.0172, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3514607008984353e-05, |
|
"loss": 1.0553, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3455107990718155e-05, |
|
"loss": 1.0595, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3395608972451957e-05, |
|
"loss": 1.0443, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3336109954185756e-05, |
|
"loss": 1.0359, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3276610935919558e-05, |
|
"loss": 1.0631, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.321711191765336e-05, |
|
"loss": 1.0293, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3157612899387161e-05, |
|
"loss": 1.0878, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.3098113881120961e-05, |
|
"loss": 1.054, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.3038614862854765e-05, |
|
"loss": 1.0523, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.2979115844588566e-05, |
|
"loss": 1.0117, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.2919616826322368e-05, |
|
"loss": 1.0612, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2860117808056168e-05, |
|
"loss": 1.0106, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.280061878978997e-05, |
|
"loss": 1.026, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2741119771523771e-05, |
|
"loss": 1.0533, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2681620753257573e-05, |
|
"loss": 1.0651, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2622121734991373e-05, |
|
"loss": 1.0717, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2562622716725174e-05, |
|
"loss": 1.0679, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2503123698458976e-05, |
|
"loss": 1.0594, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2443624680192779e-05, |
|
"loss": 1.0457, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2384125661926577e-05, |
|
"loss": 1.0491, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.232462664366038e-05, |
|
"loss": 1.0212, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2265127625394182e-05, |
|
"loss": 1.0079, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2205628607127984e-05, |
|
"loss": 1.0513, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2146129588861786e-05, |
|
"loss": 1.0651, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.2086630570595585e-05, |
|
"loss": 1.1196, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.2027131552329387e-05, |
|
"loss": 1.0522, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.1967632534063189e-05, |
|
"loss": 1.0775, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.1908133515796992e-05, |
|
"loss": 1.0494, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.184863449753079e-05, |
|
"loss": 1.0312, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1789135479264594e-05, |
|
"loss": 0.9866, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1729636460998395e-05, |
|
"loss": 1.0072, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1670137442732197e-05, |
|
"loss": 1.0145, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1610638424465997e-05, |
|
"loss": 1.0334, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1551139406199798e-05, |
|
"loss": 1.1029, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.14916403879336e-05, |
|
"loss": 1.0556, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1432141369667402e-05, |
|
"loss": 1.0046, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1372642351401202e-05, |
|
"loss": 1.0827, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1313143333135003e-05, |
|
"loss": 1.0392, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1253644314868807e-05, |
|
"loss": 1.0635, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1194145296602608e-05, |
|
"loss": 1.0246, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1134646278336408e-05, |
|
"loss": 1.0271, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.107514726007021e-05, |
|
"loss": 0.9673, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1015648241804011e-05, |
|
"loss": 1.0277, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.0956149223537813e-05, |
|
"loss": 1.0755, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0896650205271615e-05, |
|
"loss": 1.017, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0837151187005414e-05, |
|
"loss": 1.012, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0777652168739216e-05, |
|
"loss": 1.0078, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0718153150473018e-05, |
|
"loss": 1.0136, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0658654132206821e-05, |
|
"loss": 1.0571, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.059915511394062e-05, |
|
"loss": 1.0558, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0539656095674423e-05, |
|
"loss": 0.9997, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0480157077408224e-05, |
|
"loss": 1.0411, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0420658059142026e-05, |
|
"loss": 1.0569, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0361159040875826e-05, |
|
"loss": 1.0182, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0301660022609627e-05, |
|
"loss": 1.028, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0242161004343429e-05, |
|
"loss": 1.0548, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.018266198607723e-05, |
|
"loss": 1.0717, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.012316296781103e-05, |
|
"loss": 1.0518, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0063663949544832e-05, |
|
"loss": 1.025, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0004164931278636e-05, |
|
"loss": 1.0131, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.944665913012435e-06, |
|
"loss": 1.0403, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.885166894746237e-06, |
|
"loss": 0.9678, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.825667876480039e-06, |
|
"loss": 1.0123, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.76616885821384e-06, |
|
"loss": 1.0033, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.706669839947642e-06, |
|
"loss": 1.0143, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.647170821681444e-06, |
|
"loss": 1.0296, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.587671803415245e-06, |
|
"loss": 1.0152, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.528172785149045e-06, |
|
"loss": 1.0504, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.468673766882848e-06, |
|
"loss": 1.0308, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.409174748616648e-06, |
|
"loss": 1.0092, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.34967573035045e-06, |
|
"loss": 1.0669, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.290176712084252e-06, |
|
"loss": 1.0227, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.230677693818053e-06, |
|
"loss": 1.0142, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.171178675551853e-06, |
|
"loss": 1.073, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.111679657285657e-06, |
|
"loss": 1.0564, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.052180639019456e-06, |
|
"loss": 1.0278, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.992681620753258e-06, |
|
"loss": 1.0213, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.93318260248706e-06, |
|
"loss": 1.0354, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.873683584220861e-06, |
|
"loss": 1.0315, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.814184565954663e-06, |
|
"loss": 1.0585, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.754685547688465e-06, |
|
"loss": 1.0345, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.695186529422264e-06, |
|
"loss": 1.0612, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.635687511156066e-06, |
|
"loss": 1.0734, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.576188492889868e-06, |
|
"loss": 1.0822, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.51668947462367e-06, |
|
"loss": 1.0095, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.457190456357471e-06, |
|
"loss": 1.0104, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.397691438091273e-06, |
|
"loss": 1.0637, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.338192419825074e-06, |
|
"loss": 1.0427, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.278693401558874e-06, |
|
"loss": 1.0073, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.219194383292677e-06, |
|
"loss": 0.9647, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.159695365026477e-06, |
|
"loss": 1.0151, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.100196346760279e-06, |
|
"loss": 1.0446, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.04069732849408e-06, |
|
"loss": 1.0598, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.981198310227882e-06, |
|
"loss": 1.0044, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.921699291961684e-06, |
|
"loss": 1.0272, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.862200273695486e-06, |
|
"loss": 1.009, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.802701255429285e-06, |
|
"loss": 1.0505, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.743202237163087e-06, |
|
"loss": 1.0559, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.683703218896889e-06, |
|
"loss": 1.0412, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.62420420063069e-06, |
|
"loss": 1.0651, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.564705182364491e-06, |
|
"loss": 0.9968, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.505206164098294e-06, |
|
"loss": 1.0395, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.445707145832094e-06, |
|
"loss": 1.0378, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.386208127565896e-06, |
|
"loss": 0.99, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.326709109299697e-06, |
|
"loss": 0.9792, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.267210091033498e-06, |
|
"loss": 1.0482, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.207711072767299e-06, |
|
"loss": 1.0143, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.148212054501102e-06, |
|
"loss": 1.0321, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.088713036234903e-06, |
|
"loss": 1.0312, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.029214017968704e-06, |
|
"loss": 1.022, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.969714999702506e-06, |
|
"loss": 1.0529, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.9102159814363064e-06, |
|
"loss": 1.0269, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.850716963170109e-06, |
|
"loss": 0.9678, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.79121794490391e-06, |
|
"loss": 1.0315, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.731718926637711e-06, |
|
"loss": 0.9849, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.672219908371512e-06, |
|
"loss": 1.0397, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.6127208901053146e-06, |
|
"loss": 1.0511, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.553221871839115e-06, |
|
"loss": 1.0381, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.493722853572917e-06, |
|
"loss": 1.0584, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.434223835306718e-06, |
|
"loss": 0.9933, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.374724817040519e-06, |
|
"loss": 1.0439, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.31522579877432e-06, |
|
"loss": 1.047, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.255726780508123e-06, |
|
"loss": 1.0349, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.196227762241923e-06, |
|
"loss": 0.9923, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.136728743975725e-06, |
|
"loss": 0.9953, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.077229725709526e-06, |
|
"loss": 1.0083, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.017730707443327e-06, |
|
"loss": 1.0258, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.958231689177128e-06, |
|
"loss": 0.9729, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.898732670910931e-06, |
|
"loss": 1.0351, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.8392336526447314e-06, |
|
"loss": 1.0298, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.779734634378533e-06, |
|
"loss": 1.0067, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.7202356161123355e-06, |
|
"loss": 1.0235, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.660736597846136e-06, |
|
"loss": 1.0018, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.601237579579938e-06, |
|
"loss": 1.0096, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.541738561313739e-06, |
|
"loss": 0.9732, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.48223954304754e-06, |
|
"loss": 1.0359, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.422740524781341e-06, |
|
"loss": 0.9913, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.363241506515144e-06, |
|
"loss": 1.0132, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.303742488248944e-06, |
|
"loss": 1.0526, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.244243469982746e-06, |
|
"loss": 0.9742, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.184744451716547e-06, |
|
"loss": 0.9988, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.125245433450348e-06, |
|
"loss": 0.9938, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.065746415184149e-06, |
|
"loss": 1.022, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.006247396917952e-06, |
|
"loss": 1.0214, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.946748378651752e-06, |
|
"loss": 0.9758, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.887249360385554e-06, |
|
"loss": 1.0235, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.827750342119356e-06, |
|
"loss": 1.0387, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.768251323853157e-06, |
|
"loss": 1.0319, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.708752305586958e-06, |
|
"loss": 0.9961, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.64925328732076e-06, |
|
"loss": 1.0318, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.589754269054561e-06, |
|
"loss": 1.1137, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.530255250788362e-06, |
|
"loss": 1.0398, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.470756232522164e-06, |
|
"loss": 0.9347, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.411257214255965e-06, |
|
"loss": 1.0618, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.351758195989766e-06, |
|
"loss": 0.9826, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.292259177723568e-06, |
|
"loss": 1.0422, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.232760159457369e-06, |
|
"loss": 0.9928, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.17326114119117e-06, |
|
"loss": 1.0487, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.113762122924972e-06, |
|
"loss": 1.0379, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.054263104658773e-06, |
|
"loss": 1.0225, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.994764086392575e-06, |
|
"loss": 0.951, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.935265068126376e-06, |
|
"loss": 1.0393, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.875766049860177e-06, |
|
"loss": 0.9851, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.816267031593979e-06, |
|
"loss": 0.9716, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.7567680133277807e-06, |
|
"loss": 0.9974, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.697268995061582e-06, |
|
"loss": 1.0052, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6377699767953835e-06, |
|
"loss": 0.9949, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.5782709585291847e-06, |
|
"loss": 1.0279, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.518771940262986e-06, |
|
"loss": 0.9636, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.4592729219967875e-06, |
|
"loss": 1.0322, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.3997739037305887e-06, |
|
"loss": 1.0153, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.3402748854643903e-06, |
|
"loss": 0.9982, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.2807758671981915e-06, |
|
"loss": 1.022, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.2212768489319927e-06, |
|
"loss": 1.0208, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.1617778306657944e-06, |
|
"loss": 0.9761, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.1022788123995956e-06, |
|
"loss": 1.0125, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.0427797941333968e-06, |
|
"loss": 1.0531, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.9832807758671984e-06, |
|
"loss": 1.0232, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.9237817576009996e-06, |
|
"loss": 1.067, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.8642827393348012e-06, |
|
"loss": 0.9986, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.8047837210686024e-06, |
|
"loss": 1.0009, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.7452847028024036e-06, |
|
"loss": 1.0113, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6857856845362052e-06, |
|
"loss": 0.9991, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.6262866662700064e-06, |
|
"loss": 1.0153, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5667876480038085e-06, |
|
"loss": 1.0382, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.5072886297376097e-06, |
|
"loss": 0.993, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.447789611471411e-06, |
|
"loss": 0.9704, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.388290593205212e-06, |
|
"loss": 0.9935, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.3287915749390137e-06, |
|
"loss": 1.0239, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.269292556672815e-06, |
|
"loss": 0.9927, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.2097935384066165e-06, |
|
"loss": 1.0262, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.150294520140418e-06, |
|
"loss": 0.9762, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.0907955018742194e-06, |
|
"loss": 1.0298, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.0312964836080206e-06, |
|
"loss": 0.9869, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.971797465341822e-06, |
|
"loss": 1.0192, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.9122984470756234e-06, |
|
"loss": 1.0034, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.8527994288094248e-06, |
|
"loss": 1.0179, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.7933004105432262e-06, |
|
"loss": 1.0189, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.7338013922770274e-06, |
|
"loss": 0.9545, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6743023740108288e-06, |
|
"loss": 1.0462, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.6148033557446302e-06, |
|
"loss": 1.0083, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.5553043374784319e-06, |
|
"loss": 0.9755, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.4958053192122333e-06, |
|
"loss": 1.0234, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.4363063009460345e-06, |
|
"loss": 1.0116, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3768072826798359e-06, |
|
"loss": 1.0502, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3173082644136373e-06, |
|
"loss": 0.9593, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.2578092461474387e-06, |
|
"loss": 1.0195, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.19831022788124e-06, |
|
"loss": 1.0212, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1388112096150413e-06, |
|
"loss": 0.9523, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.079312191348843e-06, |
|
"loss": 0.9902, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.0198131730826442e-06, |
|
"loss": 1.0248, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.603141548164456e-07, |
|
"loss": 1.0062, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.00815136550247e-07, |
|
"loss": 1.0035, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.413161182840483e-07, |
|
"loss": 0.9755, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.818171000178498e-07, |
|
"loss": 1.0131, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.223180817516512e-07, |
|
"loss": 1.0353, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.628190634854525e-07, |
|
"loss": 1.0138, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.033200452192539e-07, |
|
"loss": 0.9738, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 5.438210269530553e-07, |
|
"loss": 0.9878, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.843220086868568e-07, |
|
"loss": 0.9787, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.2482299042065806e-07, |
|
"loss": 0.9903, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.653239721544595e-07, |
|
"loss": 1.032, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.0582495388826084e-07, |
|
"loss": 1.0436, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.4632593562206225e-07, |
|
"loss": 0.9956, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.8682691735586366e-07, |
|
"loss": 1.0102, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.2732789908966505e-07, |
|
"loss": 0.9634, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 6.782888082346642e-08, |
|
"loss": 1.004, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.329862557267807e-09, |
|
"loss": 0.9777, |
|
"step": 168000 |
|
} |
|
], |
|
"max_steps": 168070, |
|
"num_train_epochs": 1, |
|
"total_flos": 6437535729057792.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|