|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 338, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.8181818181818183e-06, |
|
"loss": 1.1507, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.6363636363636366e-06, |
|
"loss": 1.1908, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 1.0904, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.272727272727273e-06, |
|
"loss": 0.8683, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.4314, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 0.2006, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.2727272727272728e-05, |
|
"loss": 0.2114, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.4545454545454546e-05, |
|
"loss": 0.2914, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 0.2122, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.1695, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1938, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9999538500851633e-05, |
|
"loss": 0.2241, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9998154046002822e-05, |
|
"loss": 0.1599, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9995846763238514e-05, |
|
"loss": 0.1816, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9992616865520515e-05, |
|
"loss": 0.1611, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9988464650967834e-05, |
|
"loss": 0.1586, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9983390502829168e-05, |
|
"loss": 0.138, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9977394889447526e-05, |
|
"loss": 0.1399, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9970478364217e-05, |
|
"loss": 0.13, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9962641565531694e-05, |
|
"loss": 0.1468, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9953885216726788e-05, |
|
"loss": 0.1403, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.994421012601179e-05, |
|
"loss": 0.1265, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9933617186395917e-05, |
|
"loss": 0.135, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.99221073756057e-05, |
|
"loss": 0.1206, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.990968175599471e-05, |
|
"loss": 0.1249, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9896341474445526e-05, |
|
"loss": 0.1247, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9882087762263857e-05, |
|
"loss": 0.1144, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9866921935064907e-05, |
|
"loss": 0.1302, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.985084539265195e-05, |
|
"loss": 0.1281, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.983385961888711e-05, |
|
"loss": 0.127, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.9815966181554412e-05, |
|
"loss": 0.1328, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.9797166732215078e-05, |
|
"loss": 0.131, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.977746300605507e-05, |
|
"loss": 0.1185, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.975685682172497e-05, |
|
"loss": 0.1217, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.973535008117207e-05, |
|
"loss": 0.1381, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9712944769464864e-05, |
|
"loss": 0.1154, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9689642954609808e-05, |
|
"loss": 0.1154, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.9665446787360444e-05, |
|
"loss": 0.1044, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9640358501018885e-05, |
|
"loss": 0.134, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9614380411229693e-05, |
|
"loss": 0.1193, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.9587514915766124e-05, |
|
"loss": 0.1016, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9559764494308838e-05, |
|
"loss": 0.1231, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.9531131708217005e-05, |
|
"loss": 0.1098, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.950161920029191e-05, |
|
"loss": 0.1309, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9471229694533003e-05, |
|
"loss": 0.0985, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.943996599588649e-05, |
|
"loss": 0.1102, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.940783098998643e-05, |
|
"loss": 0.0974, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.93748276428884e-05, |
|
"loss": 0.0985, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.9340959000795707e-05, |
|
"loss": 0.1023, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9306228189778255e-05, |
|
"loss": 0.1018, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.927063841548398e-05, |
|
"loss": 0.1147, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9234192962842996e-05, |
|
"loss": 0.114, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9196895195764363e-05, |
|
"loss": 0.1166, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.9158748556825637e-05, |
|
"loss": 0.1073, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.9119756566955092e-05, |
|
"loss": 0.1037, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.907992282510675e-05, |
|
"loss": 0.0958, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.90392510079282e-05, |
|
"loss": 0.1067, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.8997744869421248e-05, |
|
"loss": 0.0968, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.8955408240595396e-05, |
|
"loss": 0.1027, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.891224502911428e-05, |
|
"loss": 0.0979, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.886825921893497e-05, |
|
"loss": 0.0979, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8823454869940243e-05, |
|
"loss": 0.0902, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8777836117563894e-05, |
|
"loss": 0.1023, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.873140717240899e-05, |
|
"loss": 0.1046, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.8684172319859258e-05, |
|
"loss": 0.0806, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.863613591968355e-05, |
|
"loss": 0.0977, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.858730240563342e-05, |
|
"loss": 0.0903, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.8537676285033886e-05, |
|
"loss": 0.0986, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.848726213836744e-05, |
|
"loss": 0.1011, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.8436064618851225e-05, |
|
"loss": 0.0873, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.838408845200758e-05, |
|
"loss": 0.0946, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8331338435227838e-05, |
|
"loss": 0.0852, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8277819437329577e-05, |
|
"loss": 0.0755, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8223536398107177e-05, |
|
"loss": 0.075, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8168494327875918e-05, |
|
"loss": 0.0957, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8112698307009506e-05, |
|
"loss": 0.1017, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.8056153485471167e-05, |
|
"loss": 0.0807, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.799886508233829e-05, |
|
"loss": 0.0768, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.7940838385320732e-05, |
|
"loss": 0.0972, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.788207875027274e-05, |
|
"loss": 0.1041, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.7822591600698632e-05, |
|
"loss": 0.0901, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.776238242725217e-05, |
|
"loss": 0.1026, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.7701456787229805e-05, |
|
"loss": 0.0743, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.7639820304057745e-05, |
|
"loss": 0.0913, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.7577478666772886e-05, |
|
"loss": 0.0971, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.751443762949772e-05, |
|
"loss": 0.0891, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.7450703010909263e-05, |
|
"loss": 0.0889, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.738628069370195e-05, |
|
"loss": 0.0903, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.732117662404469e-05, |
|
"loss": 0.0933, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.7255396811032014e-05, |
|
"loss": 0.0938, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.718894732612947e-05, |
|
"loss": 0.0982, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.712183430261319e-05, |
|
"loss": 0.0791, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7054063935003813e-05, |
|
"loss": 0.0809, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.698564247849473e-05, |
|
"loss": 0.0918, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.691657624837472e-05, |
|
"loss": 0.0808, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.684687161944506e-05, |
|
"loss": 0.0803, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.677653502543113e-05, |
|
"loss": 0.0806, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.6705572958388576e-05, |
|
"loss": 0.0836, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.6633991968104095e-05, |
|
"loss": 0.089, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.6561798661490904e-05, |
|
"loss": 0.083, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.6488999701978905e-05, |
|
"loss": 0.0685, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.6415601808899658e-05, |
|
"loss": 0.0946, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.63416117568662e-05, |
|
"loss": 0.0752, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.6267036375147728e-05, |
|
"loss": 0.094, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.619188254703927e-05, |
|
"loss": 0.096, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.6116157209226356e-05, |
|
"loss": 0.0887, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.6039867351144778e-05, |
|
"loss": 0.0888, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.5963020014335437e-05, |
|
"loss": 0.0784, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.588562229179443e-05, |
|
"loss": 0.0831, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5807681327318372e-05, |
|
"loss": 0.0834, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.5729204314845002e-05, |
|
"loss": 0.0748, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.56501984977892e-05, |
|
"loss": 0.0873, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.557067116837444e-05, |
|
"loss": 0.0841, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5490629666959668e-05, |
|
"loss": 0.0842, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.541008138136183e-05, |
|
"loss": 0.0943, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5329033746173975e-05, |
|
"loss": 0.0789, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5247494242079024e-05, |
|
"loss": 0.0628, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5165470395159314e-05, |
|
"loss": 0.0873, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5082969776201948e-05, |
|
"loss": 0.0891, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.0954, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4916568724649688e-05, |
|
"loss": 0.0958, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.483268365084351e-05, |
|
"loss": 0.0747, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4748352521159492e-05, |
|
"loss": 0.0852, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.466358311934654e-05, |
|
"loss": 0.078, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4578383269606004e-05, |
|
"loss": 0.0833, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4492760835869504e-05, |
|
"loss": 0.084, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4406723721073088e-05, |
|
"loss": 0.0666, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4320279866427798e-05, |
|
"loss": 0.0843, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4233437250686695e-05, |
|
"loss": 0.0841, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4146203889408418e-05, |
|
"loss": 0.0721, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4058587834217356e-05, |
|
"loss": 0.0853, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.3970597172060482e-05, |
|
"loss": 0.0654, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.3882240024460928e-05, |
|
"loss": 0.0854, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.3793524546768358e-05, |
|
"loss": 0.0761, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.3704458927406261e-05, |
|
"loss": 0.0798, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.3615051387116131e-05, |
|
"loss": 0.0879, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.3525310178198707e-05, |
|
"loss": 0.0714, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.3435243583752294e-05, |
|
"loss": 0.0989, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.3344859916908206e-05, |
|
"loss": 0.0726, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.325416752006351e-05, |
|
"loss": 0.0946, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.3163174764110985e-05, |
|
"loss": 0.0696, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3071890047666498e-05, |
|
"loss": 0.0754, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.2980321796293838e-05, |
|
"loss": 0.1014, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.288847846172701e-05, |
|
"loss": 0.0966, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.2796368521090143e-05, |
|
"loss": 0.0679, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.2704000476115079e-05, |
|
"loss": 0.0691, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.2611382852356632e-05, |
|
"loss": 0.0721, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2518524198405699e-05, |
|
"loss": 0.0656, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.2425433085100224e-05, |
|
"loss": 0.0811, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.233211810473411e-05, |
|
"loss": 0.0739, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.2238587870264152e-05, |
|
"loss": 0.0639, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.2144851014515055e-05, |
|
"loss": 0.0813, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2050916189382646e-05, |
|
"loss": 0.0826, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.1956792065035281e-05, |
|
"loss": 0.079, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.1862487329113606e-05, |
|
"loss": 0.0679, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.1768010685928686e-05, |
|
"loss": 0.0743, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.1673370855658592e-05, |
|
"loss": 0.0761, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.1578576573543541e-05, |
|
"loss": 0.0687, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.1483636589079627e-05, |
|
"loss": 0.055, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.1388559665211241e-05, |
|
"loss": 0.0726, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.1293354577522264e-05, |
|
"loss": 0.0573, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1198030113426074e-05, |
|
"loss": 0.0713, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1102595071354471e-05, |
|
"loss": 0.0647, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.1007058259945584e-05, |
|
"loss": 0.0694, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0911428497230834e-05, |
|
"loss": 0.0719, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0815714609821027e-05, |
|
"loss": 0.0771, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0719925432091671e-05, |
|
"loss": 0.0826, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.0624069805367558e-05, |
|
"loss": 0.0656, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.0528156577106703e-05, |
|
"loss": 0.0801, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.043219460008374e-05, |
|
"loss": 0.0514, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.0336192731572805e-05, |
|
"loss": 0.0515, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.0240159832530007e-05, |
|
"loss": 0.0693, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.0144104766775574e-05, |
|
"loss": 0.0608, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.004803640017571e-05, |
|
"loss": 0.0559, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.951963599824294e-06, |
|
"loss": 0.0663, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.855895233224431e-06, |
|
"loss": 0.0568, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.759840167469995e-06, |
|
"loss": 0.0584, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.663807268427197e-06, |
|
"loss": 0.0519, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.56780539991626e-06, |
|
"loss": 0.0656, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.471843422893299e-06, |
|
"loss": 0.068, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.375930194632447e-06, |
|
"loss": 0.0625, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.28007456790833e-06, |
|
"loss": 0.0619, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.184285390178978e-06, |
|
"loss": 0.0557, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.08857150276917e-06, |
|
"loss": 0.0626, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.992941740054418e-06, |
|
"loss": 0.0658, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.897404928645529e-06, |
|
"loss": 0.0627, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.80196988657393e-06, |
|
"loss": 0.0521, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.706645422477739e-06, |
|
"loss": 0.0554, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.611440334788762e-06, |
|
"loss": 0.0721, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.516363410920376e-06, |
|
"loss": 0.0601, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.42142342645646e-06, |
|
"loss": 0.0637, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.326629144341408e-06, |
|
"loss": 0.0592, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.231989314071318e-06, |
|
"loss": 0.0665, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.137512670886397e-06, |
|
"loss": 0.05, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.043207934964722e-06, |
|
"loss": 0.0558, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 7.949083810617358e-06, |
|
"loss": 0.0848, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 7.855148985484946e-06, |
|
"loss": 0.0479, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 7.761412129735853e-06, |
|
"loss": 0.055, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 7.667881895265895e-06, |
|
"loss": 0.0588, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 7.574566914899779e-06, |
|
"loss": 0.0478, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 7.481475801594302e-06, |
|
"loss": 0.0583, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.388617147643371e-06, |
|
"loss": 0.0541, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 7.295999523884921e-06, |
|
"loss": 0.043, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.203631478909857e-06, |
|
"loss": 0.0539, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.111521538272997e-06, |
|
"loss": 0.0547, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.019678203706164e-06, |
|
"loss": 0.0499, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 6.928109952333506e-06, |
|
"loss": 0.0637, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 6.83682523588902e-06, |
|
"loss": 0.0557, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.745832479936492e-06, |
|
"loss": 0.0582, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 6.655140083091794e-06, |
|
"loss": 0.0506, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 6.564756416247712e-06, |
|
"loss": 0.06, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 6.474689821801295e-06, |
|
"loss": 0.0507, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6.384948612883872e-06, |
|
"loss": 0.0453, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 6.2955410725937405e-06, |
|
"loss": 0.0575, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 6.206475453231644e-06, |
|
"loss": 0.0537, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.117759975539075e-06, |
|
"loss": 0.0686, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 6.029402827939519e-06, |
|
"loss": 0.0466, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.941412165782645e-06, |
|
"loss": 0.0584, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.853796110591583e-06, |
|
"loss": 0.0575, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.766562749313309e-06, |
|
"loss": 0.0537, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.6797201335722064e-06, |
|
"loss": 0.0635, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.593276278926912e-06, |
|
"loss": 0.0553, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 5.507239164130501e-06, |
|
"loss": 0.0415, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.421616730394e-06, |
|
"loss": 0.056, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5.336416880653461e-06, |
|
"loss": 0.0501, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.251647478840511e-06, |
|
"loss": 0.0404, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 5.167316349156495e-06, |
|
"loss": 0.0521, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 5.083431275350312e-06, |
|
"loss": 0.045, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 5.000000000000003e-06, |
|
"loss": 0.0522, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.917030223798057e-06, |
|
"loss": 0.0489, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.834529604840686e-06, |
|
"loss": 0.0479, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.7525057579209775e-06, |
|
"loss": 0.0488, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.670966253826027e-06, |
|
"loss": 0.0591, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.589918618638173e-06, |
|
"loss": 0.043, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.5093703330403385e-06, |
|
"loss": 0.049, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.429328831625565e-06, |
|
"loss": 0.0635, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.349801502210801e-06, |
|
"loss": 0.0471, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.270795685155001e-06, |
|
"loss": 0.0456, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.192318672681631e-06, |
|
"loss": 0.0498, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.1143777082055715e-06, |
|
"loss": 0.0647, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.036979985664566e-06, |
|
"loss": 0.0597, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.960132648855226e-06, |
|
"loss": 0.0533, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.883842790773647e-06, |
|
"loss": 0.0611, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.8081174529607346e-06, |
|
"loss": 0.0607, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.732963624852275e-06, |
|
"loss": 0.063, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.6583882431338047e-06, |
|
"loss": 0.0494, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.584398191100341e-06, |
|
"loss": 0.0534, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.511000298021098e-06, |
|
"loss": 0.0488, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.4382013385090985e-06, |
|
"loss": 0.056, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.3660080318959043e-06, |
|
"loss": 0.0472, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.2944270416114256e-06, |
|
"loss": 0.037, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.223464974568874e-06, |
|
"loss": 0.0551, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.153128380554941e-06, |
|
"loss": 0.0449, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.0834237516252817e-06, |
|
"loss": 0.0673, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.0143575215052732e-06, |
|
"loss": 0.0443, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.94593606499619e-06, |
|
"loss": 0.0527, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.878165697386812e-06, |
|
"loss": 0.0631, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.8110526738705345e-06, |
|
"loss": 0.0499, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7446031889679893e-06, |
|
"loss": 0.0565, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.678823375955314e-06, |
|
"loss": 0.0489, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.6137193062980506e-06, |
|
"loss": 0.0662, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.5492969890907383e-06, |
|
"loss": 0.0428, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.485562370502279e-06, |
|
"loss": 0.0637, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4225213332271203e-06, |
|
"loss": 0.0468, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3601796959422585e-06, |
|
"loss": 0.0561, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.2985432127701945e-06, |
|
"loss": 0.0594, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.2376175727478346e-06, |
|
"loss": 0.0519, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.1774083993013715e-06, |
|
"loss": 0.0525, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.1179212497272582e-06, |
|
"loss": 0.0584, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.0591616146792705e-06, |
|
"loss": 0.0564, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.0011349176617133e-06, |
|
"loss": 0.0453, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.9438465145288377e-06, |
|
"loss": 0.045, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.8873016929904942e-06, |
|
"loss": 0.0419, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.8315056721240831e-06, |
|
"loss": 0.0449, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.7764636018928249e-06, |
|
"loss": 0.0548, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.722180562670428e-06, |
|
"loss": 0.0491, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.6686615647721638e-06, |
|
"loss": 0.0455, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 1.6159115479924259e-06, |
|
"loss": 0.0518, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.5639353811487744e-06, |
|
"loss": 0.0625, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.5127378616325606e-06, |
|
"loss": 0.0486, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.462323714966114e-06, |
|
"loss": 0.0586, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.4126975943665844e-06, |
|
"loss": 0.0619, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 1.3638640803164516e-06, |
|
"loss": 0.055, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.3158276801407432e-06, |
|
"loss": 0.0382, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.2685928275910142e-06, |
|
"loss": 0.0558, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.222163882436107e-06, |
|
"loss": 0.0592, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.1765451300597574e-06, |
|
"loss": 0.054, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.1317407810650372e-06, |
|
"loss": 0.0533, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 1.0877549708857228e-06, |
|
"loss": 0.0427, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.0445917594046073e-06, |
|
"loss": 0.0549, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 1.0022551305787564e-06, |
|
"loss": 0.0471, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.607489920717983e-07, |
|
"loss": 0.0481, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.200771748932513e-07, |
|
"loss": 0.0625, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 8.802434330449128e-07, |
|
"loss": 0.0543, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.412514431743657e-07, |
|
"loss": 0.0492, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.031048042356393e-07, |
|
"loss": 0.0478, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.65807037157007e-07, |
|
"loss": 0.0483, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 7.293615845160196e-07, |
|
"loss": 0.0541, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 6.937718102217461e-07, |
|
"loss": 0.0639, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.590409992042957e-07, |
|
"loss": 0.0346, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 6.251723571116031e-07, |
|
"loss": 0.0479, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.921690100135713e-07, |
|
"loss": 0.0402, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.600340041135133e-07, |
|
"loss": 0.0513, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.287703054670012e-07, |
|
"loss": 0.0436, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.983807997080925e-07, |
|
"loss": 0.0565, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.6886829178299676e-07, |
|
"loss": 0.0577, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.402355056911656e-07, |
|
"loss": 0.031, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.124850842338779e-07, |
|
"loss": 0.0409, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.8561958877030957e-07, |
|
"loss": 0.0408, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.5964149898111587e-07, |
|
"loss": 0.0442, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.345532126395579e-07, |
|
"loss": 0.0507, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.1035704539019384e-07, |
|
"loss": 0.0521, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2.870552305351382e-07, |
|
"loss": 0.0538, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.646499188279328e-07, |
|
"loss": 0.052, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.4314317827503375e-07, |
|
"loss": 0.0515, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.2253699394493066e-07, |
|
"loss": 0.0386, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.028332677849254e-07, |
|
"loss": 0.0348, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.840338184455881e-07, |
|
"loss": 0.0503, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.6614038111289034e-07, |
|
"loss": 0.0487, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.49154607348051e-07, |
|
"loss": 0.0505, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.330780649350938e-07, |
|
"loss": 0.0463, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.1791223773614635e-07, |
|
"loss": 0.0503, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.0365852555447642e-07, |
|
"loss": 0.0504, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 9.031824400528854e-08, |
|
"loss": 0.0438, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 7.789262439430012e-08, |
|
"loss": 0.0493, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 6.638281360408339e-08, |
|
"loss": 0.044, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5.578987398821345e-08, |
|
"loss": 0.0382, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 4.6114783273213395e-08, |
|
"loss": 0.0444, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.735843446830867e-08, |
|
"loss": 0.0414, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2.9521635783001932e-08, |
|
"loss": 0.0419, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 2.2605110552477162e-08, |
|
"loss": 0.0447, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.6609497170834154e-08, |
|
"loss": 0.0417, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.1535349032167908e-08, |
|
"loss": 0.057, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 7.3831344794872415e-09, |
|
"loss": 0.0445, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 4.153236761488266e-09, |
|
"loss": 0.0412, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8459539971804608e-09, |
|
"loss": 0.05, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 4.614991483686826e-10, |
|
"loss": 0.0469, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0432, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 338, |
|
"total_flos": 14257366695936.0, |
|
"train_loss": 0.08880592421931628, |
|
"train_runtime": 3923.9549, |
|
"train_samples_per_second": 9.629, |
|
"train_steps_per_second": 0.086 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 338, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 2718, |
|
"total_flos": 14257366695936.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|