diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 17.0, - "global_step": 3431331, + "epoch": 34.0, + "global_step": 6862662, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -41365,11 +41365,41376 @@ "eval_samples_per_second": 1748.189, "eval_steps_per_second": 72.846, "step": 3431331 + }, + { + "epoch": 17.0, + "learning_rate": 4.1502638189087565e-05, + "loss": 2.3765, + "step": 3431500 + }, + { + "epoch": 17.0, + "learning_rate": 4.150139960266148e-05, + "loss": 2.3414, + "step": 3432000 + }, + { + "epoch": 17.01, + "learning_rate": 4.150016101623539e-05, + "loss": 2.3531, + "step": 3432500 + }, + { + "epoch": 17.01, + "learning_rate": 4.149892242980931e-05, + "loss": 2.3628, + "step": 3433000 + }, + { + "epoch": 17.01, + "learning_rate": 4.1497683843383226e-05, + "loss": 2.3441, + "step": 3433500 + }, + { + "epoch": 17.01, + "learning_rate": 4.149644525695714e-05, + "loss": 2.3688, + "step": 3434000 + }, + { + "epoch": 17.02, + "learning_rate": 4.149520667053106e-05, + "loss": 2.3296, + "step": 3434500 + }, + { + "epoch": 17.02, + "learning_rate": 4.149396808410497e-05, + "loss": 2.3295, + "step": 3435000 + }, + { + "epoch": 17.02, + "learning_rate": 4.1492729497678886e-05, + "loss": 2.3542, + "step": 3435500 + }, + { + "epoch": 17.02, + "learning_rate": 4.1491490911252803e-05, + "loss": 2.3233, + "step": 3436000 + }, + { + "epoch": 17.03, + "learning_rate": 4.149025232482672e-05, + "loss": 2.3492, + "step": 3436500 + }, + { + "epoch": 17.03, + "learning_rate": 4.148901373840064e-05, + "loss": 2.341, + "step": 3437000 + }, + { + "epoch": 17.03, + "learning_rate": 4.1487775151974554e-05, + "loss": 2.356, + "step": 3437500 + }, + { + "epoch": 17.03, + "learning_rate": 4.148653656554847e-05, + "loss": 2.3461, + "step": 3438000 + }, + { + "epoch": 17.04, + "learning_rate": 4.148529797912239e-05, + "loss": 2.3431, + "step": 3438500 + }, + { + "epoch": 17.04, + "learning_rate": 4.148406186986916e-05, + "loss": 2.3379, + "step": 3439000 + }, + { + "epoch": 17.04, + "learning_rate": 4.1482823283443074e-05, + "loss": 2.3413, + "step": 3439500 + }, + { + "epoch": 17.04, + "learning_rate": 4.148158469701699e-05, + "loss": 2.3506, + "step": 3440000 + }, + { + "epoch": 17.05, + "learning_rate": 4.148034858776376e-05, + "loss": 2.3299, + "step": 3440500 + }, + { + "epoch": 17.05, + "learning_rate": 4.1479110001337676e-05, + "loss": 2.3431, + "step": 3441000 + }, + { + "epoch": 17.05, + "learning_rate": 4.147787141491159e-05, + "loss": 2.3623, + "step": 3441500 + }, + { + "epoch": 17.05, + "learning_rate": 4.1476632828485504e-05, + "loss": 2.3722, + "step": 3442000 + }, + { + "epoch": 17.06, + "learning_rate": 4.147539424205942e-05, + "loss": 2.3552, + "step": 3442500 + }, + { + "epoch": 17.06, + "learning_rate": 4.147415565563334e-05, + "loss": 2.3468, + "step": 3443000 + }, + { + "epoch": 17.06, + "learning_rate": 4.1472917069207254e-05, + "loss": 2.3322, + "step": 3443500 + }, + { + "epoch": 17.06, + "learning_rate": 4.147167848278117e-05, + "loss": 2.3492, + "step": 3444000 + }, + { + "epoch": 17.07, + "learning_rate": 4.147044237352794e-05, + "loss": 2.3549, + "step": 3444500 + }, + { + "epoch": 17.07, + "learning_rate": 4.146920378710186e-05, + "loss": 2.373, + "step": 3445000 + }, + { + "epoch": 17.07, + "learning_rate": 4.1467965200675774e-05, + "loss": 2.3629, + "step": 3445500 + }, + { + "epoch": 17.07, + "learning_rate": 4.146672909142254e-05, + "loss": 2.3718, + "step": 3446000 + }, + { + "epoch": 17.08, + "learning_rate": 4.146549050499646e-05, + "loss": 2.3301, + "step": 3446500 + }, + { + "epoch": 17.08, + "learning_rate": 4.1464251918570377e-05, + "loss": 2.3385, + "step": 3447000 + }, + { + "epoch": 17.08, + "learning_rate": 4.1463013332144294e-05, + "loss": 2.3374, + "step": 3447500 + }, + { + "epoch": 17.08, + "learning_rate": 4.146177474571821e-05, + "loss": 2.3562, + "step": 3448000 + }, + { + "epoch": 17.09, + "learning_rate": 4.146053615929212e-05, + "loss": 2.3514, + "step": 3448500 + }, + { + "epoch": 17.09, + "learning_rate": 4.145929757286604e-05, + "loss": 2.3568, + "step": 3449000 + }, + { + "epoch": 17.09, + "learning_rate": 4.1458058986439954e-05, + "loss": 2.3525, + "step": 3449500 + }, + { + "epoch": 17.09, + "learning_rate": 4.145682287718672e-05, + "loss": 2.3269, + "step": 3450000 + }, + { + "epoch": 17.09, + "learning_rate": 4.145558429076064e-05, + "loss": 2.3618, + "step": 3450500 + }, + { + "epoch": 17.1, + "learning_rate": 4.145434570433456e-05, + "loss": 2.3438, + "step": 3451000 + }, + { + "epoch": 17.1, + "learning_rate": 4.1453107117908474e-05, + "loss": 2.341, + "step": 3451500 + }, + { + "epoch": 17.1, + "learning_rate": 4.145186853148239e-05, + "loss": 2.3567, + "step": 3452000 + }, + { + "epoch": 17.1, + "learning_rate": 4.145063489940201e-05, + "loss": 2.3433, + "step": 3452500 + }, + { + "epoch": 17.11, + "learning_rate": 4.144939631297593e-05, + "loss": 2.3524, + "step": 3453000 + }, + { + "epoch": 17.11, + "learning_rate": 4.1448157726549846e-05, + "loss": 2.3444, + "step": 3453500 + }, + { + "epoch": 17.11, + "learning_rate": 4.144691914012376e-05, + "loss": 2.3531, + "step": 3454000 + }, + { + "epoch": 17.11, + "learning_rate": 4.144568055369768e-05, + "loss": 2.3459, + "step": 3454500 + }, + { + "epoch": 17.12, + "learning_rate": 4.1444441967271596e-05, + "loss": 2.35, + "step": 3455000 + }, + { + "epoch": 17.12, + "learning_rate": 4.144320338084551e-05, + "loss": 2.338, + "step": 3455500 + }, + { + "epoch": 17.12, + "learning_rate": 4.1441964794419423e-05, + "loss": 2.3489, + "step": 3456000 + }, + { + "epoch": 17.12, + "learning_rate": 4.14407286851662e-05, + "loss": 2.3561, + "step": 3456500 + }, + { + "epoch": 17.13, + "learning_rate": 4.1439490098740116e-05, + "loss": 2.3618, + "step": 3457000 + }, + { + "epoch": 17.13, + "learning_rate": 4.143825151231403e-05, + "loss": 2.3614, + "step": 3457500 + }, + { + "epoch": 17.13, + "learning_rate": 4.143701292588795e-05, + "loss": 2.3375, + "step": 3458000 + }, + { + "epoch": 17.13, + "learning_rate": 4.143577433946187e-05, + "loss": 2.3632, + "step": 3458500 + }, + { + "epoch": 17.14, + "learning_rate": 4.143453823020863e-05, + "loss": 2.341, + "step": 3459000 + }, + { + "epoch": 17.14, + "learning_rate": 4.1433299643782546e-05, + "loss": 2.3918, + "step": 3459500 + }, + { + "epoch": 17.14, + "learning_rate": 4.143206105735646e-05, + "loss": 2.3394, + "step": 3460000 + }, + { + "epoch": 17.14, + "learning_rate": 4.143082247093038e-05, + "loss": 2.3564, + "step": 3460500 + }, + { + "epoch": 17.15, + "learning_rate": 4.1429583884504296e-05, + "loss": 2.351, + "step": 3461000 + }, + { + "epoch": 17.15, + "learning_rate": 4.142834529807821e-05, + "loss": 2.3515, + "step": 3461500 + }, + { + "epoch": 17.15, + "learning_rate": 4.142710918882498e-05, + "loss": 2.343, + "step": 3462000 + }, + { + "epoch": 17.15, + "learning_rate": 4.14258706023989e-05, + "loss": 2.3226, + "step": 3462500 + }, + { + "epoch": 17.16, + "learning_rate": 4.1424632015972816e-05, + "loss": 2.35, + "step": 3463000 + }, + { + "epoch": 17.16, + "learning_rate": 4.142339342954673e-05, + "loss": 2.3612, + "step": 3463500 + }, + { + "epoch": 17.16, + "learning_rate": 4.142215484312065e-05, + "loss": 2.3746, + "step": 3464000 + }, + { + "epoch": 17.16, + "learning_rate": 4.142091625669457e-05, + "loss": 2.3619, + "step": 3464500 + }, + { + "epoch": 17.17, + "learning_rate": 4.141967767026848e-05, + "loss": 2.3721, + "step": 3465000 + }, + { + "epoch": 17.17, + "learning_rate": 4.1418439083842394e-05, + "loss": 2.3639, + "step": 3465500 + }, + { + "epoch": 17.17, + "learning_rate": 4.1417205451762015e-05, + "loss": 2.343, + "step": 3466000 + }, + { + "epoch": 17.17, + "learning_rate": 4.141596686533593e-05, + "loss": 2.3616, + "step": 3466500 + }, + { + "epoch": 17.18, + "learning_rate": 4.141472827890985e-05, + "loss": 2.3572, + "step": 3467000 + }, + { + "epoch": 17.18, + "learning_rate": 4.1413489692483765e-05, + "loss": 2.374, + "step": 3467500 + }, + { + "epoch": 17.18, + "learning_rate": 4.1412253583230534e-05, + "loss": 2.3437, + "step": 3468000 + }, + { + "epoch": 17.18, + "learning_rate": 4.141101499680445e-05, + "loss": 2.3703, + "step": 3468500 + }, + { + "epoch": 17.19, + "learning_rate": 4.140977641037837e-05, + "loss": 2.3546, + "step": 3469000 + }, + { + "epoch": 17.19, + "learning_rate": 4.140853782395228e-05, + "loss": 2.3631, + "step": 3469500 + }, + { + "epoch": 17.19, + "learning_rate": 4.140730171469905e-05, + "loss": 2.3685, + "step": 3470000 + }, + { + "epoch": 17.19, + "learning_rate": 4.1406063128272964e-05, + "loss": 2.3637, + "step": 3470500 + }, + { + "epoch": 17.2, + "learning_rate": 4.140482454184688e-05, + "loss": 2.3558, + "step": 3471000 + }, + { + "epoch": 17.2, + "learning_rate": 4.14035859554208e-05, + "loss": 2.3688, + "step": 3471500 + }, + { + "epoch": 17.2, + "learning_rate": 4.1402347368994715e-05, + "loss": 2.343, + "step": 3472000 + }, + { + "epoch": 17.2, + "learning_rate": 4.140110878256863e-05, + "loss": 2.364, + "step": 3472500 + }, + { + "epoch": 17.21, + "learning_rate": 4.139987019614255e-05, + "loss": 2.3704, + "step": 3473000 + }, + { + "epoch": 17.21, + "learning_rate": 4.1398631609716465e-05, + "loss": 2.3474, + "step": 3473500 + }, + { + "epoch": 17.21, + "learning_rate": 4.1397395500463234e-05, + "loss": 2.3439, + "step": 3474000 + }, + { + "epoch": 17.21, + "learning_rate": 4.139615691403715e-05, + "loss": 2.3453, + "step": 3474500 + }, + { + "epoch": 17.22, + "learning_rate": 4.139491832761107e-05, + "loss": 2.3463, + "step": 3475000 + }, + { + "epoch": 17.22, + "learning_rate": 4.1393679741184985e-05, + "loss": 2.3682, + "step": 3475500 + }, + { + "epoch": 17.22, + "learning_rate": 4.1392441154758895e-05, + "loss": 2.3424, + "step": 3476000 + }, + { + "epoch": 17.22, + "learning_rate": 4.139120256833281e-05, + "loss": 2.3605, + "step": 3476500 + }, + { + "epoch": 17.23, + "learning_rate": 4.138996398190673e-05, + "loss": 2.3513, + "step": 3477000 + }, + { + "epoch": 17.23, + "learning_rate": 4.13887278726535e-05, + "loss": 2.3756, + "step": 3477500 + }, + { + "epoch": 17.23, + "learning_rate": 4.1387489286227415e-05, + "loss": 2.3382, + "step": 3478000 + }, + { + "epoch": 17.23, + "learning_rate": 4.138625069980133e-05, + "loss": 2.3326, + "step": 3478500 + }, + { + "epoch": 17.24, + "learning_rate": 4.138501211337525e-05, + "loss": 2.3489, + "step": 3479000 + }, + { + "epoch": 17.24, + "learning_rate": 4.1383773526949166e-05, + "loss": 2.3513, + "step": 3479500 + }, + { + "epoch": 17.24, + "learning_rate": 4.1382537417695934e-05, + "loss": 2.3649, + "step": 3480000 + }, + { + "epoch": 17.24, + "learning_rate": 4.13813013084427e-05, + "loss": 2.3805, + "step": 3480500 + }, + { + "epoch": 17.25, + "learning_rate": 4.138006272201662e-05, + "loss": 2.3487, + "step": 3481000 + }, + { + "epoch": 17.25, + "learning_rate": 4.137882413559053e-05, + "loss": 2.3675, + "step": 3481500 + }, + { + "epoch": 17.25, + "learning_rate": 4.137758554916445e-05, + "loss": 2.3361, + "step": 3482000 + }, + { + "epoch": 17.25, + "learning_rate": 4.1376346962738364e-05, + "loss": 2.3631, + "step": 3482500 + }, + { + "epoch": 17.26, + "learning_rate": 4.137510837631228e-05, + "loss": 2.3593, + "step": 3483000 + }, + { + "epoch": 17.26, + "learning_rate": 4.13738697898862e-05, + "loss": 2.3565, + "step": 3483500 + }, + { + "epoch": 17.26, + "learning_rate": 4.1372631203460115e-05, + "loss": 2.353, + "step": 3484000 + }, + { + "epoch": 17.26, + "learning_rate": 4.137139261703403e-05, + "loss": 2.3434, + "step": 3484500 + }, + { + "epoch": 17.27, + "learning_rate": 4.137015403060795e-05, + "loss": 2.3584, + "step": 3485000 + }, + { + "epoch": 17.27, + "learning_rate": 4.1368915444181866e-05, + "loss": 2.3592, + "step": 3485500 + }, + { + "epoch": 17.27, + "learning_rate": 4.136767685775578e-05, + "loss": 2.3616, + "step": 3486000 + }, + { + "epoch": 17.27, + "learning_rate": 4.13664382713297e-05, + "loss": 2.361, + "step": 3486500 + }, + { + "epoch": 17.28, + "learning_rate": 4.1365199684903617e-05, + "loss": 2.3554, + "step": 3487000 + }, + { + "epoch": 17.28, + "learning_rate": 4.1363961098477533e-05, + "loss": 2.3607, + "step": 3487500 + }, + { + "epoch": 17.28, + "learning_rate": 4.136272251205145e-05, + "loss": 2.3504, + "step": 3488000 + }, + { + "epoch": 17.28, + "learning_rate": 4.136148640279822e-05, + "loss": 2.3489, + "step": 3488500 + }, + { + "epoch": 17.29, + "learning_rate": 4.1360247816372136e-05, + "loss": 2.3515, + "step": 3489000 + }, + { + "epoch": 17.29, + "learning_rate": 4.1359009229946046e-05, + "loss": 2.3622, + "step": 3489500 + }, + { + "epoch": 17.29, + "learning_rate": 4.1357773120692815e-05, + "loss": 2.366, + "step": 3490000 + }, + { + "epoch": 17.29, + "learning_rate": 4.135653453426673e-05, + "loss": 2.3397, + "step": 3490500 + }, + { + "epoch": 17.3, + "learning_rate": 4.135529594784065e-05, + "loss": 2.3427, + "step": 3491000 + }, + { + "epoch": 17.3, + "learning_rate": 4.1354057361414566e-05, + "loss": 2.3803, + "step": 3491500 + }, + { + "epoch": 17.3, + "learning_rate": 4.135281877498848e-05, + "loss": 2.3359, + "step": 3492000 + }, + { + "epoch": 17.3, + "learning_rate": 4.13515801885624e-05, + "loss": 2.3294, + "step": 3492500 + }, + { + "epoch": 17.31, + "learning_rate": 4.135034160213632e-05, + "loss": 2.3517, + "step": 3493000 + }, + { + "epoch": 17.31, + "learning_rate": 4.1349103015710234e-05, + "loss": 2.3769, + "step": 3493500 + }, + { + "epoch": 17.31, + "learning_rate": 4.134786442928415e-05, + "loss": 2.388, + "step": 3494000 + }, + { + "epoch": 17.31, + "learning_rate": 4.134662584285807e-05, + "loss": 2.3651, + "step": 3494500 + }, + { + "epoch": 17.32, + "learning_rate": 4.1345387256431984e-05, + "loss": 2.3778, + "step": 3495000 + }, + { + "epoch": 17.32, + "learning_rate": 4.13441486700059e-05, + "loss": 2.3529, + "step": 3495500 + }, + { + "epoch": 17.32, + "learning_rate": 4.134291008357982e-05, + "loss": 2.3574, + "step": 3496000 + }, + { + "epoch": 17.32, + "learning_rate": 4.134167149715373e-05, + "loss": 2.3382, + "step": 3496500 + }, + { + "epoch": 17.33, + "learning_rate": 4.13404353879005e-05, + "loss": 2.3456, + "step": 3497000 + }, + { + "epoch": 17.33, + "learning_rate": 4.1339196801474414e-05, + "loss": 2.3635, + "step": 3497500 + }, + { + "epoch": 17.33, + "learning_rate": 4.133795821504833e-05, + "loss": 2.3514, + "step": 3498000 + }, + { + "epoch": 17.33, + "learning_rate": 4.133671962862225e-05, + "loss": 2.352, + "step": 3498500 + }, + { + "epoch": 17.34, + "learning_rate": 4.1335481042196165e-05, + "loss": 2.3373, + "step": 3499000 + }, + { + "epoch": 17.34, + "learning_rate": 4.1334242455770075e-05, + "loss": 2.3498, + "step": 3499500 + }, + { + "epoch": 17.34, + "learning_rate": 4.133300386934399e-05, + "loss": 2.3483, + "step": 3500000 + }, + { + "epoch": 17.34, + "learning_rate": 4.133176528291791e-05, + "loss": 2.3468, + "step": 3500500 + }, + { + "epoch": 17.35, + "learning_rate": 4.1330526696491826e-05, + "loss": 2.3388, + "step": 3501000 + }, + { + "epoch": 17.35, + "learning_rate": 4.132928811006574e-05, + "loss": 2.3492, + "step": 3501500 + }, + { + "epoch": 17.35, + "learning_rate": 4.132805200081252e-05, + "loss": 2.3273, + "step": 3502000 + }, + { + "epoch": 17.35, + "learning_rate": 4.132681341438643e-05, + "loss": 2.3493, + "step": 3502500 + }, + { + "epoch": 17.36, + "learning_rate": 4.132557978230605e-05, + "loss": 2.3476, + "step": 3503000 + }, + { + "epoch": 17.36, + "learning_rate": 4.1324341195879966e-05, + "loss": 2.3567, + "step": 3503500 + }, + { + "epoch": 17.36, + "learning_rate": 4.1323105086626735e-05, + "loss": 2.3562, + "step": 3504000 + }, + { + "epoch": 17.36, + "learning_rate": 4.132186650020065e-05, + "loss": 2.3567, + "step": 3504500 + }, + { + "epoch": 17.36, + "learning_rate": 4.132062791377457e-05, + "loss": 2.3373, + "step": 3505000 + }, + { + "epoch": 17.37, + "learning_rate": 4.1319389327348486e-05, + "loss": 2.3705, + "step": 3505500 + }, + { + "epoch": 17.37, + "learning_rate": 4.13181507409224e-05, + "loss": 2.36, + "step": 3506000 + }, + { + "epoch": 17.37, + "learning_rate": 4.131691215449632e-05, + "loss": 2.3511, + "step": 3506500 + }, + { + "epoch": 17.37, + "learning_rate": 4.1315673568070236e-05, + "loss": 2.3651, + "step": 3507000 + }, + { + "epoch": 17.38, + "learning_rate": 4.1314434981644153e-05, + "loss": 2.358, + "step": 3507500 + }, + { + "epoch": 17.38, + "learning_rate": 4.1313198872390915e-05, + "loss": 2.3668, + "step": 3508000 + }, + { + "epoch": 17.38, + "learning_rate": 4.131196028596483e-05, + "loss": 2.3245, + "step": 3508500 + }, + { + "epoch": 17.38, + "learning_rate": 4.131072169953875e-05, + "loss": 2.3623, + "step": 3509000 + }, + { + "epoch": 17.39, + "learning_rate": 4.1309483113112666e-05, + "loss": 2.3565, + "step": 3509500 + }, + { + "epoch": 17.39, + "learning_rate": 4.130824452668658e-05, + "loss": 2.3437, + "step": 3510000 + }, + { + "epoch": 17.39, + "learning_rate": 4.13070059402605e-05, + "loss": 2.3466, + "step": 3510500 + }, + { + "epoch": 17.39, + "learning_rate": 4.130576735383442e-05, + "loss": 2.383, + "step": 3511000 + }, + { + "epoch": 17.4, + "learning_rate": 4.1304528767408334e-05, + "loss": 2.3689, + "step": 3511500 + }, + { + "epoch": 17.4, + "learning_rate": 4.130329018098225e-05, + "loss": 2.3632, + "step": 3512000 + }, + { + "epoch": 17.4, + "learning_rate": 4.130205159455617e-05, + "loss": 2.3213, + "step": 3512500 + }, + { + "epoch": 17.4, + "learning_rate": 4.1300813008130085e-05, + "loss": 2.3577, + "step": 3513000 + }, + { + "epoch": 17.41, + "learning_rate": 4.1299576898876854e-05, + "loss": 2.3611, + "step": 3513500 + }, + { + "epoch": 17.41, + "learning_rate": 4.129833831245077e-05, + "loss": 2.3616, + "step": 3514000 + }, + { + "epoch": 17.41, + "learning_rate": 4.129709972602469e-05, + "loss": 2.3532, + "step": 3514500 + }, + { + "epoch": 17.41, + "learning_rate": 4.1295861139598604e-05, + "loss": 2.3241, + "step": 3515000 + }, + { + "epoch": 17.42, + "learning_rate": 4.129462255317252e-05, + "loss": 2.3725, + "step": 3515500 + }, + { + "epoch": 17.42, + "learning_rate": 4.129338396674644e-05, + "loss": 2.3333, + "step": 3516000 + }, + { + "epoch": 17.42, + "learning_rate": 4.1292145380320355e-05, + "loss": 2.3379, + "step": 3516500 + }, + { + "epoch": 17.42, + "learning_rate": 4.129090927106712e-05, + "loss": 2.3909, + "step": 3517000 + }, + { + "epoch": 17.43, + "learning_rate": 4.1289670684641034e-05, + "loss": 2.3664, + "step": 3517500 + }, + { + "epoch": 17.43, + "learning_rate": 4.128843209821495e-05, + "loss": 2.3661, + "step": 3518000 + }, + { + "epoch": 17.43, + "learning_rate": 4.128719351178887e-05, + "loss": 2.3586, + "step": 3518500 + }, + { + "epoch": 17.43, + "learning_rate": 4.1285954925362785e-05, + "loss": 2.3723, + "step": 3519000 + }, + { + "epoch": 17.44, + "learning_rate": 4.12847163389367e-05, + "loss": 2.3486, + "step": 3519500 + }, + { + "epoch": 17.44, + "learning_rate": 4.128347775251062e-05, + "loss": 2.3571, + "step": 3520000 + }, + { + "epoch": 17.44, + "learning_rate": 4.128224164325739e-05, + "loss": 2.3583, + "step": 3520500 + }, + { + "epoch": 17.44, + "learning_rate": 4.1281003056831304e-05, + "loss": 2.4005, + "step": 3521000 + }, + { + "epoch": 17.45, + "learning_rate": 4.127976447040522e-05, + "loss": 2.3775, + "step": 3521500 + }, + { + "epoch": 17.45, + "learning_rate": 4.127852588397914e-05, + "loss": 2.3687, + "step": 3522000 + }, + { + "epoch": 17.45, + "learning_rate": 4.1277287297553055e-05, + "loss": 2.3569, + "step": 3522500 + }, + { + "epoch": 17.45, + "learning_rate": 4.127605118829982e-05, + "loss": 2.3676, + "step": 3523000 + }, + { + "epoch": 17.46, + "learning_rate": 4.1274815079046586e-05, + "loss": 2.3535, + "step": 3523500 + }, + { + "epoch": 17.46, + "learning_rate": 4.12735764926205e-05, + "loss": 2.368, + "step": 3524000 + }, + { + "epoch": 17.46, + "learning_rate": 4.127233790619442e-05, + "loss": 2.3668, + "step": 3524500 + }, + { + "epoch": 17.46, + "learning_rate": 4.127109931976834e-05, + "loss": 2.3525, + "step": 3525000 + }, + { + "epoch": 17.47, + "learning_rate": 4.1269860733342254e-05, + "loss": 2.3639, + "step": 3525500 + }, + { + "epoch": 17.47, + "learning_rate": 4.126862214691617e-05, + "loss": 2.3634, + "step": 3526000 + }, + { + "epoch": 17.47, + "learning_rate": 4.126738603766294e-05, + "loss": 2.3497, + "step": 3526500 + }, + { + "epoch": 17.47, + "learning_rate": 4.126614992840971e-05, + "loss": 2.3586, + "step": 3527000 + }, + { + "epoch": 17.48, + "learning_rate": 4.1264911341983625e-05, + "loss": 2.3447, + "step": 3527500 + }, + { + "epoch": 17.48, + "learning_rate": 4.126367275555754e-05, + "loss": 2.3595, + "step": 3528000 + }, + { + "epoch": 17.48, + "learning_rate": 4.126243416913145e-05, + "loss": 2.3406, + "step": 3528500 + }, + { + "epoch": 17.48, + "learning_rate": 4.126119805987823e-05, + "loss": 2.3242, + "step": 3529000 + }, + { + "epoch": 17.49, + "learning_rate": 4.1259959473452145e-05, + "loss": 2.3484, + "step": 3529500 + }, + { + "epoch": 17.49, + "learning_rate": 4.125872336419891e-05, + "loss": 2.3547, + "step": 3530000 + }, + { + "epoch": 17.49, + "learning_rate": 4.1257484777772824e-05, + "loss": 2.3682, + "step": 3530500 + }, + { + "epoch": 17.49, + "learning_rate": 4.125624619134674e-05, + "loss": 2.352, + "step": 3531000 + }, + { + "epoch": 17.5, + "learning_rate": 4.125500760492066e-05, + "loss": 2.3719, + "step": 3531500 + }, + { + "epoch": 17.5, + "learning_rate": 4.1253769018494575e-05, + "loss": 2.3395, + "step": 3532000 + }, + { + "epoch": 17.5, + "learning_rate": 4.125253043206849e-05, + "loss": 2.3595, + "step": 3532500 + }, + { + "epoch": 17.5, + "learning_rate": 4.125129184564241e-05, + "loss": 2.3707, + "step": 3533000 + }, + { + "epoch": 17.51, + "learning_rate": 4.1250053259216325e-05, + "loss": 2.3691, + "step": 3533500 + }, + { + "epoch": 17.51, + "learning_rate": 4.124881467279024e-05, + "loss": 2.3668, + "step": 3534000 + }, + { + "epoch": 17.51, + "learning_rate": 4.124757608636415e-05, + "loss": 2.3482, + "step": 3534500 + }, + { + "epoch": 17.51, + "learning_rate": 4.124633749993807e-05, + "loss": 2.3422, + "step": 3535000 + }, + { + "epoch": 17.52, + "learning_rate": 4.1245098913511986e-05, + "loss": 2.3515, + "step": 3535500 + }, + { + "epoch": 17.52, + "learning_rate": 4.12438603270859e-05, + "loss": 2.3495, + "step": 3536000 + }, + { + "epoch": 17.52, + "learning_rate": 4.124262174065982e-05, + "loss": 2.3579, + "step": 3536500 + }, + { + "epoch": 17.52, + "learning_rate": 4.124138315423374e-05, + "loss": 2.3655, + "step": 3537000 + }, + { + "epoch": 17.53, + "learning_rate": 4.1240144567807654e-05, + "loss": 2.3543, + "step": 3537500 + }, + { + "epoch": 17.53, + "learning_rate": 4.123890598138157e-05, + "loss": 2.3707, + "step": 3538000 + }, + { + "epoch": 17.53, + "learning_rate": 4.123766987212834e-05, + "loss": 2.3427, + "step": 3538500 + }, + { + "epoch": 17.53, + "learning_rate": 4.123643128570226e-05, + "loss": 2.3651, + "step": 3539000 + }, + { + "epoch": 17.54, + "learning_rate": 4.1235192699276174e-05, + "loss": 2.3747, + "step": 3539500 + }, + { + "epoch": 17.54, + "learning_rate": 4.123395411285009e-05, + "loss": 2.3532, + "step": 3540000 + }, + { + "epoch": 17.54, + "learning_rate": 4.1232715526424e-05, + "loss": 2.3638, + "step": 3540500 + }, + { + "epoch": 17.54, + "learning_rate": 4.123147941717077e-05, + "loss": 2.3649, + "step": 3541000 + }, + { + "epoch": 17.55, + "learning_rate": 4.1230240830744686e-05, + "loss": 2.3551, + "step": 3541500 + }, + { + "epoch": 17.55, + "learning_rate": 4.12290022443186e-05, + "loss": 2.3676, + "step": 3542000 + }, + { + "epoch": 17.55, + "learning_rate": 4.122776365789252e-05, + "loss": 2.3658, + "step": 3542500 + }, + { + "epoch": 17.55, + "learning_rate": 4.122652507146644e-05, + "loss": 2.3553, + "step": 3543000 + }, + { + "epoch": 17.56, + "learning_rate": 4.1225286485040354e-05, + "loss": 2.3745, + "step": 3543500 + }, + { + "epoch": 17.56, + "learning_rate": 4.122405037578712e-05, + "loss": 2.3211, + "step": 3544000 + }, + { + "epoch": 17.56, + "learning_rate": 4.122281178936104e-05, + "loss": 2.3367, + "step": 3544500 + }, + { + "epoch": 17.56, + "learning_rate": 4.122157320293496e-05, + "loss": 2.3815, + "step": 3545000 + }, + { + "epoch": 17.57, + "learning_rate": 4.1220334616508874e-05, + "loss": 2.3569, + "step": 3545500 + }, + { + "epoch": 17.57, + "learning_rate": 4.121909603008279e-05, + "loss": 2.3516, + "step": 3546000 + }, + { + "epoch": 17.57, + "learning_rate": 4.121785992082956e-05, + "loss": 2.3911, + "step": 3546500 + }, + { + "epoch": 17.57, + "learning_rate": 4.121662133440347e-05, + "loss": 2.3477, + "step": 3547000 + }, + { + "epoch": 17.58, + "learning_rate": 4.1215382747977387e-05, + "loss": 2.3665, + "step": 3547500 + }, + { + "epoch": 17.58, + "learning_rate": 4.1214144161551304e-05, + "loss": 2.3727, + "step": 3548000 + }, + { + "epoch": 17.58, + "learning_rate": 4.121290557512522e-05, + "loss": 2.3339, + "step": 3548500 + }, + { + "epoch": 17.58, + "learning_rate": 4.121166698869914e-05, + "loss": 2.391, + "step": 3549000 + }, + { + "epoch": 17.59, + "learning_rate": 4.1210428402273054e-05, + "loss": 2.3653, + "step": 3549500 + }, + { + "epoch": 17.59, + "learning_rate": 4.120918981584697e-05, + "loss": 2.3585, + "step": 3550000 + }, + { + "epoch": 17.59, + "learning_rate": 4.120795122942089e-05, + "loss": 2.3839, + "step": 3550500 + }, + { + "epoch": 17.59, + "learning_rate": 4.1206712642994805e-05, + "loss": 2.3457, + "step": 3551000 + }, + { + "epoch": 17.6, + "learning_rate": 4.120547405656872e-05, + "loss": 2.3383, + "step": 3551500 + }, + { + "epoch": 17.6, + "learning_rate": 4.120423547014264e-05, + "loss": 2.3665, + "step": 3552000 + }, + { + "epoch": 17.6, + "learning_rate": 4.1202996883716556e-05, + "loss": 2.3762, + "step": 3552500 + }, + { + "epoch": 17.6, + "learning_rate": 4.120175829729047e-05, + "loss": 2.364, + "step": 3553000 + }, + { + "epoch": 17.61, + "learning_rate": 4.120051971086439e-05, + "loss": 2.3739, + "step": 3553500 + }, + { + "epoch": 17.61, + "learning_rate": 4.119928360161115e-05, + "loss": 2.3595, + "step": 3554000 + }, + { + "epoch": 17.61, + "learning_rate": 4.119804501518507e-05, + "loss": 2.3607, + "step": 3554500 + }, + { + "epoch": 17.61, + "learning_rate": 4.1196806428758986e-05, + "loss": 2.3675, + "step": 3555000 + }, + { + "epoch": 17.62, + "learning_rate": 4.11955678423329e-05, + "loss": 2.3651, + "step": 3555500 + }, + { + "epoch": 17.62, + "learning_rate": 4.119432925590682e-05, + "loss": 2.3478, + "step": 3556000 + }, + { + "epoch": 17.62, + "learning_rate": 4.1193090669480736e-05, + "loss": 2.3354, + "step": 3556500 + }, + { + "epoch": 17.62, + "learning_rate": 4.119185208305465e-05, + "loss": 2.3537, + "step": 3557000 + }, + { + "epoch": 17.63, + "learning_rate": 4.1190618450974274e-05, + "loss": 2.3613, + "step": 3557500 + }, + { + "epoch": 17.63, + "learning_rate": 4.118937986454819e-05, + "loss": 2.3725, + "step": 3558000 + }, + { + "epoch": 17.63, + "learning_rate": 4.118814127812211e-05, + "loss": 2.3538, + "step": 3558500 + }, + { + "epoch": 17.63, + "learning_rate": 4.1186902691696025e-05, + "loss": 2.3833, + "step": 3559000 + }, + { + "epoch": 17.63, + "learning_rate": 4.118566410526994e-05, + "loss": 2.3447, + "step": 3559500 + }, + { + "epoch": 17.64, + "learning_rate": 4.118442551884386e-05, + "loss": 2.3642, + "step": 3560000 + }, + { + "epoch": 17.64, + "learning_rate": 4.118318693241777e-05, + "loss": 2.3562, + "step": 3560500 + }, + { + "epoch": 17.64, + "learning_rate": 4.118195082316454e-05, + "loss": 2.3684, + "step": 3561000 + }, + { + "epoch": 17.64, + "learning_rate": 4.118071471391131e-05, + "loss": 2.376, + "step": 3561500 + }, + { + "epoch": 17.65, + "learning_rate": 4.117947612748523e-05, + "loss": 2.3549, + "step": 3562000 + }, + { + "epoch": 17.65, + "learning_rate": 4.117823754105914e-05, + "loss": 2.3716, + "step": 3562500 + }, + { + "epoch": 17.65, + "learning_rate": 4.117699895463306e-05, + "loss": 2.3731, + "step": 3563000 + }, + { + "epoch": 17.65, + "learning_rate": 4.1175760368206974e-05, + "loss": 2.3784, + "step": 3563500 + }, + { + "epoch": 17.66, + "learning_rate": 4.117452425895374e-05, + "loss": 2.375, + "step": 3564000 + }, + { + "epoch": 17.66, + "learning_rate": 4.117328567252766e-05, + "loss": 2.3604, + "step": 3564500 + }, + { + "epoch": 17.66, + "learning_rate": 4.117204708610158e-05, + "loss": 2.3685, + "step": 3565000 + }, + { + "epoch": 17.66, + "learning_rate": 4.1170808499675494e-05, + "loss": 2.365, + "step": 3565500 + }, + { + "epoch": 17.67, + "learning_rate": 4.1169569913249404e-05, + "loss": 2.3495, + "step": 3566000 + }, + { + "epoch": 17.67, + "learning_rate": 4.116833132682332e-05, + "loss": 2.3564, + "step": 3566500 + }, + { + "epoch": 17.67, + "learning_rate": 4.116709274039724e-05, + "loss": 2.3713, + "step": 3567000 + }, + { + "epoch": 17.67, + "learning_rate": 4.1165854153971155e-05, + "loss": 2.3543, + "step": 3567500 + }, + { + "epoch": 17.68, + "learning_rate": 4.116461556754507e-05, + "loss": 2.3739, + "step": 3568000 + }, + { + "epoch": 17.68, + "learning_rate": 4.116337945829185e-05, + "loss": 2.3615, + "step": 3568500 + }, + { + "epoch": 17.68, + "learning_rate": 4.116214087186576e-05, + "loss": 2.3605, + "step": 3569000 + }, + { + "epoch": 17.68, + "learning_rate": 4.1160902285439674e-05, + "loss": 2.3517, + "step": 3569500 + }, + { + "epoch": 17.69, + "learning_rate": 4.115966369901359e-05, + "loss": 2.3571, + "step": 3570000 + }, + { + "epoch": 17.69, + "learning_rate": 4.115842511258751e-05, + "loss": 2.3669, + "step": 3570500 + }, + { + "epoch": 17.69, + "learning_rate": 4.115718900333428e-05, + "loss": 2.377, + "step": 3571000 + }, + { + "epoch": 17.69, + "learning_rate": 4.1155950416908194e-05, + "loss": 2.3674, + "step": 3571500 + }, + { + "epoch": 17.7, + "learning_rate": 4.1154711830482104e-05, + "loss": 2.3401, + "step": 3572000 + }, + { + "epoch": 17.7, + "learning_rate": 4.115347572122888e-05, + "loss": 2.3685, + "step": 3572500 + }, + { + "epoch": 17.7, + "learning_rate": 4.1152237134802797e-05, + "loss": 2.3623, + "step": 3573000 + }, + { + "epoch": 17.7, + "learning_rate": 4.1150998548376713e-05, + "loss": 2.3706, + "step": 3573500 + }, + { + "epoch": 17.71, + "learning_rate": 4.114975996195063e-05, + "loss": 2.3436, + "step": 3574000 + }, + { + "epoch": 17.71, + "learning_rate": 4.114852137552455e-05, + "loss": 2.386, + "step": 3574500 + }, + { + "epoch": 17.71, + "learning_rate": 4.1147282789098464e-05, + "loss": 2.3572, + "step": 3575000 + }, + { + "epoch": 17.71, + "learning_rate": 4.1146044202672374e-05, + "loss": 2.3548, + "step": 3575500 + }, + { + "epoch": 17.72, + "learning_rate": 4.114480561624629e-05, + "loss": 2.3364, + "step": 3576000 + }, + { + "epoch": 17.72, + "learning_rate": 4.114356702982021e-05, + "loss": 2.3401, + "step": 3576500 + }, + { + "epoch": 17.72, + "learning_rate": 4.1142328443394125e-05, + "loss": 2.3559, + "step": 3577000 + }, + { + "epoch": 17.72, + "learning_rate": 4.1141094811313746e-05, + "loss": 2.3307, + "step": 3577500 + }, + { + "epoch": 17.73, + "learning_rate": 4.113985622488766e-05, + "loss": 2.3549, + "step": 3578000 + }, + { + "epoch": 17.73, + "learning_rate": 4.113861763846158e-05, + "loss": 2.3539, + "step": 3578500 + }, + { + "epoch": 17.73, + "learning_rate": 4.11373790520355e-05, + "loss": 2.3734, + "step": 3579000 + }, + { + "epoch": 17.73, + "learning_rate": 4.1136140465609414e-05, + "loss": 2.3802, + "step": 3579500 + }, + { + "epoch": 17.74, + "learning_rate": 4.113490187918333e-05, + "loss": 2.3796, + "step": 3580000 + }, + { + "epoch": 17.74, + "learning_rate": 4.113366329275725e-05, + "loss": 2.3417, + "step": 3580500 + }, + { + "epoch": 17.74, + "learning_rate": 4.1132424706331164e-05, + "loss": 2.3862, + "step": 3581000 + }, + { + "epoch": 17.74, + "learning_rate": 4.1131186119905075e-05, + "loss": 2.3848, + "step": 3581500 + }, + { + "epoch": 17.75, + "learning_rate": 4.112994753347899e-05, + "loss": 2.3474, + "step": 3582000 + }, + { + "epoch": 17.75, + "learning_rate": 4.112870894705291e-05, + "loss": 2.3563, + "step": 3582500 + }, + { + "epoch": 17.75, + "learning_rate": 4.1127470360626825e-05, + "loss": 2.3501, + "step": 3583000 + }, + { + "epoch": 17.75, + "learning_rate": 4.112623177420074e-05, + "loss": 2.3553, + "step": 3583500 + }, + { + "epoch": 17.76, + "learning_rate": 4.112499566494751e-05, + "loss": 2.3721, + "step": 3584000 + }, + { + "epoch": 17.76, + "learning_rate": 4.112375707852142e-05, + "loss": 2.3584, + "step": 3584500 + }, + { + "epoch": 17.76, + "learning_rate": 4.112251849209534e-05, + "loss": 2.3551, + "step": 3585000 + }, + { + "epoch": 17.76, + "learning_rate": 4.1121279905669255e-05, + "loss": 2.3418, + "step": 3585500 + }, + { + "epoch": 17.77, + "learning_rate": 4.112004379641603e-05, + "loss": 2.3532, + "step": 3586000 + }, + { + "epoch": 17.77, + "learning_rate": 4.111880520998995e-05, + "loss": 2.3619, + "step": 3586500 + }, + { + "epoch": 17.77, + "learning_rate": 4.1117566623563864e-05, + "loss": 2.3691, + "step": 3587000 + }, + { + "epoch": 17.77, + "learning_rate": 4.111632803713778e-05, + "loss": 2.3613, + "step": 3587500 + }, + { + "epoch": 17.78, + "learning_rate": 4.111508945071169e-05, + "loss": 2.3628, + "step": 3588000 + }, + { + "epoch": 17.78, + "learning_rate": 4.111385581863131e-05, + "loss": 2.3733, + "step": 3588500 + }, + { + "epoch": 17.78, + "learning_rate": 4.111261723220523e-05, + "loss": 2.35, + "step": 3589000 + }, + { + "epoch": 17.78, + "learning_rate": 4.1111378645779146e-05, + "loss": 2.3595, + "step": 3589500 + }, + { + "epoch": 17.79, + "learning_rate": 4.111014005935306e-05, + "loss": 2.3707, + "step": 3590000 + }, + { + "epoch": 17.79, + "learning_rate": 4.110890147292698e-05, + "loss": 2.3446, + "step": 3590500 + }, + { + "epoch": 17.79, + "learning_rate": 4.11076628865009e-05, + "loss": 2.3831, + "step": 3591000 + }, + { + "epoch": 17.79, + "learning_rate": 4.1106424300074814e-05, + "loss": 2.3779, + "step": 3591500 + }, + { + "epoch": 17.8, + "learning_rate": 4.110518571364873e-05, + "loss": 2.3586, + "step": 3592000 + }, + { + "epoch": 17.8, + "learning_rate": 4.11039496043955e-05, + "loss": 2.3604, + "step": 3592500 + }, + { + "epoch": 17.8, + "learning_rate": 4.1102711017969416e-05, + "loss": 2.3887, + "step": 3593000 + }, + { + "epoch": 17.8, + "learning_rate": 4.1101472431543333e-05, + "loss": 2.3616, + "step": 3593500 + }, + { + "epoch": 17.81, + "learning_rate": 4.110023384511725e-05, + "loss": 2.3608, + "step": 3594000 + }, + { + "epoch": 17.81, + "learning_rate": 4.109899773586401e-05, + "loss": 2.3625, + "step": 3594500 + }, + { + "epoch": 17.81, + "learning_rate": 4.109775914943793e-05, + "loss": 2.3792, + "step": 3595000 + }, + { + "epoch": 17.81, + "learning_rate": 4.1096520563011846e-05, + "loss": 2.3811, + "step": 3595500 + }, + { + "epoch": 17.82, + "learning_rate": 4.109528197658576e-05, + "loss": 2.3449, + "step": 3596000 + }, + { + "epoch": 17.82, + "learning_rate": 4.109404339015968e-05, + "loss": 2.3525, + "step": 3596500 + }, + { + "epoch": 17.82, + "learning_rate": 4.10928048037336e-05, + "loss": 2.365, + "step": 3597000 + }, + { + "epoch": 17.82, + "learning_rate": 4.1091568694480366e-05, + "loss": 2.3526, + "step": 3597500 + }, + { + "epoch": 17.83, + "learning_rate": 4.109033010805428e-05, + "loss": 2.3738, + "step": 3598000 + }, + { + "epoch": 17.83, + "learning_rate": 4.10890915216282e-05, + "loss": 2.3432, + "step": 3598500 + }, + { + "epoch": 17.83, + "learning_rate": 4.1087852935202117e-05, + "loss": 2.3489, + "step": 3599000 + }, + { + "epoch": 17.83, + "learning_rate": 4.1086614348776034e-05, + "loss": 2.3774, + "step": 3599500 + }, + { + "epoch": 17.84, + "learning_rate": 4.108537576234995e-05, + "loss": 2.3676, + "step": 3600000 + }, + { + "epoch": 17.84, + "learning_rate": 4.108413717592387e-05, + "loss": 2.3776, + "step": 3600500 + }, + { + "epoch": 17.84, + "learning_rate": 4.1082898589497784e-05, + "loss": 2.3718, + "step": 3601000 + }, + { + "epoch": 17.84, + "learning_rate": 4.10816649574174e-05, + "loss": 2.3682, + "step": 3601500 + }, + { + "epoch": 17.85, + "learning_rate": 4.1080426370991315e-05, + "loss": 2.376, + "step": 3602000 + }, + { + "epoch": 17.85, + "learning_rate": 4.107919026173809e-05, + "loss": 2.3763, + "step": 3602500 + }, + { + "epoch": 17.85, + "learning_rate": 4.1077951675312e-05, + "loss": 2.3482, + "step": 3603000 + }, + { + "epoch": 17.85, + "learning_rate": 4.107671308888592e-05, + "loss": 2.3469, + "step": 3603500 + }, + { + "epoch": 17.86, + "learning_rate": 4.1075474502459835e-05, + "loss": 2.3561, + "step": 3604000 + }, + { + "epoch": 17.86, + "learning_rate": 4.107423591603375e-05, + "loss": 2.3922, + "step": 3604500 + }, + { + "epoch": 17.86, + "learning_rate": 4.107299732960767e-05, + "loss": 2.3394, + "step": 3605000 + }, + { + "epoch": 17.86, + "learning_rate": 4.107175874318158e-05, + "loss": 2.3683, + "step": 3605500 + }, + { + "epoch": 17.87, + "learning_rate": 4.1070520156755496e-05, + "loss": 2.3565, + "step": 3606000 + }, + { + "epoch": 17.87, + "learning_rate": 4.106928157032941e-05, + "loss": 2.3693, + "step": 3606500 + }, + { + "epoch": 17.87, + "learning_rate": 4.106804298390333e-05, + "loss": 2.3589, + "step": 3607000 + }, + { + "epoch": 17.87, + "learning_rate": 4.1066804397477246e-05, + "loss": 2.3629, + "step": 3607500 + }, + { + "epoch": 17.88, + "learning_rate": 4.1065565811051163e-05, + "loss": 2.3639, + "step": 3608000 + }, + { + "epoch": 17.88, + "learning_rate": 4.106432970179793e-05, + "loss": 2.3378, + "step": 3608500 + }, + { + "epoch": 17.88, + "learning_rate": 4.106309111537185e-05, + "loss": 2.3463, + "step": 3609000 + }, + { + "epoch": 17.88, + "learning_rate": 4.1061855006118625e-05, + "loss": 2.3587, + "step": 3609500 + }, + { + "epoch": 17.89, + "learning_rate": 4.1060616419692535e-05, + "loss": 2.3465, + "step": 3610000 + }, + { + "epoch": 17.89, + "learning_rate": 4.105937783326645e-05, + "loss": 2.3632, + "step": 3610500 + }, + { + "epoch": 17.89, + "learning_rate": 4.105813924684037e-05, + "loss": 2.3575, + "step": 3611000 + }, + { + "epoch": 17.89, + "learning_rate": 4.1056900660414286e-05, + "loss": 2.3853, + "step": 3611500 + }, + { + "epoch": 17.9, + "learning_rate": 4.1055662073988196e-05, + "loss": 2.3734, + "step": 3612000 + }, + { + "epoch": 17.9, + "learning_rate": 4.105442348756211e-05, + "loss": 2.3292, + "step": 3612500 + }, + { + "epoch": 17.9, + "learning_rate": 4.105318490113603e-05, + "loss": 2.3488, + "step": 3613000 + }, + { + "epoch": 17.9, + "learning_rate": 4.10519487918828e-05, + "loss": 2.3648, + "step": 3613500 + }, + { + "epoch": 17.91, + "learning_rate": 4.1050710205456715e-05, + "loss": 2.3435, + "step": 3614000 + }, + { + "epoch": 17.91, + "learning_rate": 4.104947161903063e-05, + "loss": 2.3709, + "step": 3614500 + }, + { + "epoch": 17.91, + "learning_rate": 4.104823303260455e-05, + "loss": 2.375, + "step": 3615000 + }, + { + "epoch": 17.91, + "learning_rate": 4.1046994446178466e-05, + "loss": 2.3707, + "step": 3615500 + }, + { + "epoch": 17.91, + "learning_rate": 4.104575585975238e-05, + "loss": 2.3493, + "step": 3616000 + }, + { + "epoch": 17.92, + "learning_rate": 4.10445172733263e-05, + "loss": 2.3454, + "step": 3616500 + }, + { + "epoch": 17.92, + "learning_rate": 4.104327868690022e-05, + "loss": 2.34, + "step": 3617000 + }, + { + "epoch": 17.92, + "learning_rate": 4.1042042577646986e-05, + "loss": 2.3497, + "step": 3617500 + }, + { + "epoch": 17.92, + "learning_rate": 4.1040806468393755e-05, + "loss": 2.3711, + "step": 3618000 + }, + { + "epoch": 17.93, + "learning_rate": 4.103956788196767e-05, + "loss": 2.3608, + "step": 3618500 + }, + { + "epoch": 17.93, + "learning_rate": 4.103832929554159e-05, + "loss": 2.3884, + "step": 3619000 + }, + { + "epoch": 17.93, + "learning_rate": 4.10370907091155e-05, + "loss": 2.3432, + "step": 3619500 + }, + { + "epoch": 17.93, + "learning_rate": 4.1035852122689416e-05, + "loss": 2.3545, + "step": 3620000 + }, + { + "epoch": 17.94, + "learning_rate": 4.103462096778189e-05, + "loss": 2.3426, + "step": 3620500 + }, + { + "epoch": 17.94, + "learning_rate": 4.1033382381355805e-05, + "loss": 2.3625, + "step": 3621000 + }, + { + "epoch": 17.94, + "learning_rate": 4.103214379492972e-05, + "loss": 2.3656, + "step": 3621500 + }, + { + "epoch": 17.94, + "learning_rate": 4.103090520850364e-05, + "loss": 2.3802, + "step": 3622000 + }, + { + "epoch": 17.95, + "learning_rate": 4.1029666622077556e-05, + "loss": 2.3537, + "step": 3622500 + }, + { + "epoch": 17.95, + "learning_rate": 4.102842803565147e-05, + "loss": 2.3691, + "step": 3623000 + }, + { + "epoch": 17.95, + "learning_rate": 4.102718944922539e-05, + "loss": 2.3754, + "step": 3623500 + }, + { + "epoch": 17.95, + "learning_rate": 4.1025950862799307e-05, + "loss": 2.3664, + "step": 3624000 + }, + { + "epoch": 17.96, + "learning_rate": 4.1024712276373224e-05, + "loss": 2.3663, + "step": 3624500 + }, + { + "epoch": 17.96, + "learning_rate": 4.102347368994714e-05, + "loss": 2.3661, + "step": 3625000 + }, + { + "epoch": 17.96, + "learning_rate": 4.102223510352106e-05, + "loss": 2.3567, + "step": 3625500 + }, + { + "epoch": 17.96, + "learning_rate": 4.1020998994267826e-05, + "loss": 2.3632, + "step": 3626000 + }, + { + "epoch": 17.97, + "learning_rate": 4.1019760407841736e-05, + "loss": 2.3703, + "step": 3626500 + }, + { + "epoch": 17.97, + "learning_rate": 4.101852182141565e-05, + "loss": 2.3692, + "step": 3627000 + }, + { + "epoch": 17.97, + "learning_rate": 4.101728323498957e-05, + "loss": 2.3708, + "step": 3627500 + }, + { + "epoch": 17.97, + "learning_rate": 4.101604464856349e-05, + "loss": 2.3665, + "step": 3628000 + }, + { + "epoch": 17.98, + "learning_rate": 4.1014806062137404e-05, + "loss": 2.3794, + "step": 3628500 + }, + { + "epoch": 17.98, + "learning_rate": 4.101356747571132e-05, + "loss": 2.3282, + "step": 3629000 + }, + { + "epoch": 17.98, + "learning_rate": 4.101232888928524e-05, + "loss": 2.3399, + "step": 3629500 + }, + { + "epoch": 17.98, + "learning_rate": 4.1011090302859155e-05, + "loss": 2.3481, + "step": 3630000 + }, + { + "epoch": 17.99, + "learning_rate": 4.1009854193605924e-05, + "loss": 2.3503, + "step": 3630500 + }, + { + "epoch": 17.99, + "learning_rate": 4.100861560717984e-05, + "loss": 2.3472, + "step": 3631000 + }, + { + "epoch": 17.99, + "learning_rate": 4.100737702075376e-05, + "loss": 2.3613, + "step": 3631500 + }, + { + "epoch": 17.99, + "learning_rate": 4.1006140911500526e-05, + "loss": 2.3675, + "step": 3632000 + }, + { + "epoch": 18.0, + "learning_rate": 4.100490232507444e-05, + "loss": 2.3705, + "step": 3632500 + }, + { + "epoch": 18.0, + "learning_rate": 4.100366373864836e-05, + "loss": 2.3473, + "step": 3633000 + }, + { + "epoch": 18.0, + "eval_accuracy": 0.650584751161539, + "eval_accuracy_mlm": 0.6049741663885951, + "eval_accuracy_nsp": 0.8655666205154554, + "eval_loss": 2.37389874458313, + "eval_runtime": 145.8488, + "eval_samples_per_second": 1748.105, + "eval_steps_per_second": 72.843, + "step": 3633174 + }, + { + "epoch": 18.0, + "learning_rate": 4.100242515222227e-05, + "loss": 2.3426, + "step": 3633500 + }, + { + "epoch": 18.0, + "learning_rate": 4.100118904296904e-05, + "loss": 2.3235, + "step": 3634000 + }, + { + "epoch": 18.01, + "learning_rate": 4.0999950456542956e-05, + "loss": 2.3318, + "step": 3634500 + }, + { + "epoch": 18.01, + "learning_rate": 4.099871187011687e-05, + "loss": 2.3237, + "step": 3635000 + }, + { + "epoch": 18.01, + "learning_rate": 4.099747328369079e-05, + "loss": 2.3424, + "step": 3635500 + }, + { + "epoch": 18.01, + "learning_rate": 4.099623469726471e-05, + "loss": 2.3453, + "step": 3636000 + }, + { + "epoch": 18.02, + "learning_rate": 4.0994996110838624e-05, + "loss": 2.3344, + "step": 3636500 + }, + { + "epoch": 18.02, + "learning_rate": 4.099375752441254e-05, + "loss": 2.3098, + "step": 3637000 + }, + { + "epoch": 18.02, + "learning_rate": 4.099251893798646e-05, + "loss": 2.3288, + "step": 3637500 + }, + { + "epoch": 18.02, + "learning_rate": 4.0991282828733226e-05, + "loss": 2.3565, + "step": 3638000 + }, + { + "epoch": 18.03, + "learning_rate": 4.099004424230714e-05, + "loss": 2.3382, + "step": 3638500 + }, + { + "epoch": 18.03, + "learning_rate": 4.098880565588106e-05, + "loss": 2.3385, + "step": 3639000 + }, + { + "epoch": 18.03, + "learning_rate": 4.098756954662782e-05, + "loss": 2.3369, + "step": 3639500 + }, + { + "epoch": 18.03, + "learning_rate": 4.098633096020174e-05, + "loss": 2.3324, + "step": 3640000 + }, + { + "epoch": 18.04, + "learning_rate": 4.0985092373775656e-05, + "loss": 2.3385, + "step": 3640500 + }, + { + "epoch": 18.04, + "learning_rate": 4.098385378734957e-05, + "loss": 2.341, + "step": 3641000 + }, + { + "epoch": 18.04, + "learning_rate": 4.098261520092349e-05, + "loss": 2.3253, + "step": 3641500 + }, + { + "epoch": 18.04, + "learning_rate": 4.098137661449741e-05, + "loss": 2.3427, + "step": 3642000 + }, + { + "epoch": 18.05, + "learning_rate": 4.0980138028071324e-05, + "loss": 2.3467, + "step": 3642500 + }, + { + "epoch": 18.05, + "learning_rate": 4.097889944164524e-05, + "loss": 2.3307, + "step": 3643000 + }, + { + "epoch": 18.05, + "learning_rate": 4.097766333239201e-05, + "loss": 2.3384, + "step": 3643500 + }, + { + "epoch": 18.05, + "learning_rate": 4.0976424745965927e-05, + "loss": 2.3376, + "step": 3644000 + }, + { + "epoch": 18.06, + "learning_rate": 4.0975186159539843e-05, + "loss": 2.3253, + "step": 3644500 + }, + { + "epoch": 18.06, + "learning_rate": 4.097394757311376e-05, + "loss": 2.347, + "step": 3645000 + }, + { + "epoch": 18.06, + "learning_rate": 4.097270898668768e-05, + "loss": 2.3095, + "step": 3645500 + }, + { + "epoch": 18.06, + "learning_rate": 4.0971470400261594e-05, + "loss": 2.3029, + "step": 3646000 + }, + { + "epoch": 18.07, + "learning_rate": 4.097023181383551e-05, + "loss": 2.3188, + "step": 3646500 + }, + { + "epoch": 18.07, + "learning_rate": 4.096899322740942e-05, + "loss": 2.3294, + "step": 3647000 + }, + { + "epoch": 18.07, + "learning_rate": 4.096775464098334e-05, + "loss": 2.3405, + "step": 3647500 + }, + { + "epoch": 18.07, + "learning_rate": 4.0966516054557255e-05, + "loss": 2.3273, + "step": 3648000 + }, + { + "epoch": 18.08, + "learning_rate": 4.096527746813117e-05, + "loss": 2.3155, + "step": 3648500 + }, + { + "epoch": 18.08, + "learning_rate": 4.096403888170509e-05, + "loss": 2.3269, + "step": 3649000 + }, + { + "epoch": 18.08, + "learning_rate": 4.0962800295279006e-05, + "loss": 2.3336, + "step": 3649500 + }, + { + "epoch": 18.08, + "learning_rate": 4.0961564186025775e-05, + "loss": 2.3234, + "step": 3650000 + }, + { + "epoch": 18.09, + "learning_rate": 4.096032559959969e-05, + "loss": 2.3282, + "step": 3650500 + }, + { + "epoch": 18.09, + "learning_rate": 4.095908701317361e-05, + "loss": 2.3405, + "step": 3651000 + }, + { + "epoch": 18.09, + "learning_rate": 4.0957848426747526e-05, + "loss": 2.317, + "step": 3651500 + }, + { + "epoch": 18.09, + "learning_rate": 4.0956612317494294e-05, + "loss": 2.3369, + "step": 3652000 + }, + { + "epoch": 18.1, + "learning_rate": 4.095537373106821e-05, + "loss": 2.3423, + "step": 3652500 + }, + { + "epoch": 18.1, + "learning_rate": 4.095413762181497e-05, + "loss": 2.3363, + "step": 3653000 + }, + { + "epoch": 18.1, + "learning_rate": 4.095289903538889e-05, + "loss": 2.3515, + "step": 3653500 + }, + { + "epoch": 18.1, + "learning_rate": 4.095166044896281e-05, + "loss": 2.3327, + "step": 3654000 + }, + { + "epoch": 18.11, + "learning_rate": 4.0950421862536724e-05, + "loss": 2.3208, + "step": 3654500 + }, + { + "epoch": 18.11, + "learning_rate": 4.094918327611064e-05, + "loss": 2.3527, + "step": 3655000 + }, + { + "epoch": 18.11, + "learning_rate": 4.094794468968456e-05, + "loss": 2.352, + "step": 3655500 + }, + { + "epoch": 18.11, + "learning_rate": 4.0946706103258475e-05, + "loss": 2.3502, + "step": 3656000 + }, + { + "epoch": 18.12, + "learning_rate": 4.094546751683239e-05, + "loss": 2.3427, + "step": 3656500 + }, + { + "epoch": 18.12, + "learning_rate": 4.094423140757916e-05, + "loss": 2.3514, + "step": 3657000 + }, + { + "epoch": 18.12, + "learning_rate": 4.094299282115308e-05, + "loss": 2.3399, + "step": 3657500 + }, + { + "epoch": 18.12, + "learning_rate": 4.0941754234726995e-05, + "loss": 2.3461, + "step": 3658000 + }, + { + "epoch": 18.13, + "learning_rate": 4.094051564830091e-05, + "loss": 2.3321, + "step": 3658500 + }, + { + "epoch": 18.13, + "learning_rate": 4.093927706187483e-05, + "loss": 2.3296, + "step": 3659000 + }, + { + "epoch": 18.13, + "learning_rate": 4.093804095262159e-05, + "loss": 2.338, + "step": 3659500 + }, + { + "epoch": 18.13, + "learning_rate": 4.093680236619551e-05, + "loss": 2.3325, + "step": 3660000 + }, + { + "epoch": 18.14, + "learning_rate": 4.093556625694228e-05, + "loss": 2.3297, + "step": 3660500 + }, + { + "epoch": 18.14, + "learning_rate": 4.093432767051619e-05, + "loss": 2.3328, + "step": 3661000 + }, + { + "epoch": 18.14, + "learning_rate": 4.093308908409011e-05, + "loss": 2.354, + "step": 3661500 + }, + { + "epoch": 18.14, + "learning_rate": 4.093185049766403e-05, + "loss": 2.3395, + "step": 3662000 + }, + { + "epoch": 18.15, + "learning_rate": 4.0930611911237944e-05, + "loss": 2.3472, + "step": 3662500 + }, + { + "epoch": 18.15, + "learning_rate": 4.092937332481186e-05, + "loss": 2.3441, + "step": 3663000 + }, + { + "epoch": 18.15, + "learning_rate": 4.092813473838578e-05, + "loss": 2.3015, + "step": 3663500 + }, + { + "epoch": 18.15, + "learning_rate": 4.092689862913254e-05, + "loss": 2.3355, + "step": 3664000 + }, + { + "epoch": 18.16, + "learning_rate": 4.092566004270646e-05, + "loss": 2.3456, + "step": 3664500 + }, + { + "epoch": 18.16, + "learning_rate": 4.0924421456280374e-05, + "loss": 2.3623, + "step": 3665000 + }, + { + "epoch": 18.16, + "learning_rate": 4.092318286985429e-05, + "loss": 2.3409, + "step": 3665500 + }, + { + "epoch": 18.16, + "learning_rate": 4.092194428342821e-05, + "loss": 2.3148, + "step": 3666000 + }, + { + "epoch": 18.17, + "learning_rate": 4.0920705697002124e-05, + "loss": 2.3218, + "step": 3666500 + }, + { + "epoch": 18.17, + "learning_rate": 4.091946711057604e-05, + "loss": 2.3479, + "step": 3667000 + }, + { + "epoch": 18.17, + "learning_rate": 4.091822852414996e-05, + "loss": 2.3304, + "step": 3667500 + }, + { + "epoch": 18.17, + "learning_rate": 4.0916989937723875e-05, + "loss": 2.3497, + "step": 3668000 + }, + { + "epoch": 18.18, + "learning_rate": 4.0915753828470644e-05, + "loss": 2.3362, + "step": 3668500 + }, + { + "epoch": 18.18, + "learning_rate": 4.091451771921741e-05, + "loss": 2.3458, + "step": 3669000 + }, + { + "epoch": 18.18, + "learning_rate": 4.091327913279133e-05, + "loss": 2.3631, + "step": 3669500 + }, + { + "epoch": 18.18, + "learning_rate": 4.091204054636525e-05, + "loss": 2.3432, + "step": 3670000 + }, + { + "epoch": 18.18, + "learning_rate": 4.091080195993916e-05, + "loss": 2.3421, + "step": 3670500 + }, + { + "epoch": 18.19, + "learning_rate": 4.0909563373513074e-05, + "loss": 2.3361, + "step": 3671000 + }, + { + "epoch": 18.19, + "learning_rate": 4.090832478708699e-05, + "loss": 2.3355, + "step": 3671500 + }, + { + "epoch": 18.19, + "learning_rate": 4.090708620066091e-05, + "loss": 2.3339, + "step": 3672000 + }, + { + "epoch": 18.19, + "learning_rate": 4.0905847614234825e-05, + "loss": 2.3367, + "step": 3672500 + }, + { + "epoch": 18.2, + "learning_rate": 4.090460902780874e-05, + "loss": 2.3447, + "step": 3673000 + }, + { + "epoch": 18.2, + "learning_rate": 4.090337291855551e-05, + "loss": 2.3549, + "step": 3673500 + }, + { + "epoch": 18.2, + "learning_rate": 4.090213433212943e-05, + "loss": 2.3378, + "step": 3674000 + }, + { + "epoch": 18.2, + "learning_rate": 4.0900895745703344e-05, + "loss": 2.3436, + "step": 3674500 + }, + { + "epoch": 18.21, + "learning_rate": 4.089965715927726e-05, + "loss": 2.3406, + "step": 3675000 + }, + { + "epoch": 18.21, + "learning_rate": 4.089841857285118e-05, + "loss": 2.3393, + "step": 3675500 + }, + { + "epoch": 18.21, + "learning_rate": 4.089718246359795e-05, + "loss": 2.3336, + "step": 3676000 + }, + { + "epoch": 18.21, + "learning_rate": 4.0895943877171864e-05, + "loss": 2.3367, + "step": 3676500 + }, + { + "epoch": 18.22, + "learning_rate": 4.089470529074578e-05, + "loss": 2.3672, + "step": 3677000 + }, + { + "epoch": 18.22, + "learning_rate": 4.089346670431969e-05, + "loss": 2.3312, + "step": 3677500 + }, + { + "epoch": 18.22, + "learning_rate": 4.089222811789361e-05, + "loss": 2.35, + "step": 3678000 + }, + { + "epoch": 18.22, + "learning_rate": 4.0890989531467525e-05, + "loss": 2.3514, + "step": 3678500 + }, + { + "epoch": 18.23, + "learning_rate": 4.08897534222143e-05, + "loss": 2.369, + "step": 3679000 + }, + { + "epoch": 18.23, + "learning_rate": 4.088851483578821e-05, + "loss": 2.3395, + "step": 3679500 + }, + { + "epoch": 18.23, + "learning_rate": 4.088727624936213e-05, + "loss": 2.3569, + "step": 3680000 + }, + { + "epoch": 18.23, + "learning_rate": 4.0886037662936044e-05, + "loss": 2.3456, + "step": 3680500 + }, + { + "epoch": 18.24, + "learning_rate": 4.088480155368282e-05, + "loss": 2.3367, + "step": 3681000 + }, + { + "epoch": 18.24, + "learning_rate": 4.088356544442958e-05, + "loss": 2.3175, + "step": 3681500 + }, + { + "epoch": 18.24, + "learning_rate": 4.08823268580035e-05, + "loss": 2.3266, + "step": 3682000 + }, + { + "epoch": 18.24, + "learning_rate": 4.0881088271577416e-05, + "loss": 2.3512, + "step": 3682500 + }, + { + "epoch": 18.25, + "learning_rate": 4.087984968515133e-05, + "loss": 2.3464, + "step": 3683000 + }, + { + "epoch": 18.25, + "learning_rate": 4.08786135758981e-05, + "loss": 2.3319, + "step": 3683500 + }, + { + "epoch": 18.25, + "learning_rate": 4.087737498947202e-05, + "loss": 2.3545, + "step": 3684000 + }, + { + "epoch": 18.25, + "learning_rate": 4.0876136403045935e-05, + "loss": 2.3483, + "step": 3684500 + }, + { + "epoch": 18.26, + "learning_rate": 4.087489781661985e-05, + "loss": 2.3413, + "step": 3685000 + }, + { + "epoch": 18.26, + "learning_rate": 4.087365923019377e-05, + "loss": 2.3222, + "step": 3685500 + }, + { + "epoch": 18.26, + "learning_rate": 4.0872420643767686e-05, + "loss": 2.3522, + "step": 3686000 + }, + { + "epoch": 18.26, + "learning_rate": 4.08711820573416e-05, + "loss": 2.3184, + "step": 3686500 + }, + { + "epoch": 18.27, + "learning_rate": 4.086994347091552e-05, + "loss": 2.3553, + "step": 3687000 + }, + { + "epoch": 18.27, + "learning_rate": 4.086870736166228e-05, + "loss": 2.3474, + "step": 3687500 + }, + { + "epoch": 18.27, + "learning_rate": 4.08674687752362e-05, + "loss": 2.3626, + "step": 3688000 + }, + { + "epoch": 18.27, + "learning_rate": 4.086623266598297e-05, + "loss": 2.3262, + "step": 3688500 + }, + { + "epoch": 18.28, + "learning_rate": 4.0864994079556885e-05, + "loss": 2.3548, + "step": 3689000 + }, + { + "epoch": 18.28, + "learning_rate": 4.08637554931308e-05, + "loss": 2.3467, + "step": 3689500 + }, + { + "epoch": 18.28, + "learning_rate": 4.086251690670472e-05, + "loss": 2.3482, + "step": 3690000 + }, + { + "epoch": 18.28, + "learning_rate": 4.086128079745149e-05, + "loss": 2.3704, + "step": 3690500 + }, + { + "epoch": 18.29, + "learning_rate": 4.0860042211025404e-05, + "loss": 2.3454, + "step": 3691000 + }, + { + "epoch": 18.29, + "learning_rate": 4.085880610177217e-05, + "loss": 2.3512, + "step": 3691500 + }, + { + "epoch": 18.29, + "learning_rate": 4.085756751534609e-05, + "loss": 2.3291, + "step": 3692000 + }, + { + "epoch": 18.29, + "learning_rate": 4.085632892892001e-05, + "loss": 2.3378, + "step": 3692500 + }, + { + "epoch": 18.3, + "learning_rate": 4.085509034249392e-05, + "loss": 2.3274, + "step": 3693000 + }, + { + "epoch": 18.3, + "learning_rate": 4.0853851756067834e-05, + "loss": 2.3324, + "step": 3693500 + }, + { + "epoch": 18.3, + "learning_rate": 4.085261316964175e-05, + "loss": 2.3558, + "step": 3694000 + }, + { + "epoch": 18.3, + "learning_rate": 4.085137458321567e-05, + "loss": 2.3529, + "step": 3694500 + }, + { + "epoch": 18.31, + "learning_rate": 4.0850135996789585e-05, + "loss": 2.3572, + "step": 3695000 + }, + { + "epoch": 18.31, + "learning_rate": 4.08488974103635e-05, + "loss": 2.3294, + "step": 3695500 + }, + { + "epoch": 18.31, + "learning_rate": 4.084765882393742e-05, + "loss": 2.3592, + "step": 3696000 + }, + { + "epoch": 18.31, + "learning_rate": 4.084642271468419e-05, + "loss": 2.3392, + "step": 3696500 + }, + { + "epoch": 18.32, + "learning_rate": 4.0845184128258104e-05, + "loss": 2.3154, + "step": 3697000 + }, + { + "epoch": 18.32, + "learning_rate": 4.084394801900487e-05, + "loss": 2.3509, + "step": 3697500 + }, + { + "epoch": 18.32, + "learning_rate": 4.084270943257879e-05, + "loss": 2.3477, + "step": 3698000 + }, + { + "epoch": 18.32, + "learning_rate": 4.084147084615271e-05, + "loss": 2.3218, + "step": 3698500 + }, + { + "epoch": 18.33, + "learning_rate": 4.084023225972662e-05, + "loss": 2.3387, + "step": 3699000 + }, + { + "epoch": 18.33, + "learning_rate": 4.0838993673300534e-05, + "loss": 2.3438, + "step": 3699500 + }, + { + "epoch": 18.33, + "learning_rate": 4.083775508687445e-05, + "loss": 2.341, + "step": 3700000 + }, + { + "epoch": 18.33, + "learning_rate": 4.083651650044837e-05, + "loss": 2.3482, + "step": 3700500 + }, + { + "epoch": 18.34, + "learning_rate": 4.0835277914022285e-05, + "loss": 2.3661, + "step": 3701000 + }, + { + "epoch": 18.34, + "learning_rate": 4.08340393275962e-05, + "loss": 2.3362, + "step": 3701500 + }, + { + "epoch": 18.34, + "learning_rate": 4.083280074117012e-05, + "loss": 2.3077, + "step": 3702000 + }, + { + "epoch": 18.34, + "learning_rate": 4.083156463191689e-05, + "loss": 2.362, + "step": 3702500 + }, + { + "epoch": 18.35, + "learning_rate": 4.0830326045490804e-05, + "loss": 2.3652, + "step": 3703000 + }, + { + "epoch": 18.35, + "learning_rate": 4.082908745906472e-05, + "loss": 2.3664, + "step": 3703500 + }, + { + "epoch": 18.35, + "learning_rate": 4.082784887263864e-05, + "loss": 2.3331, + "step": 3704000 + }, + { + "epoch": 18.35, + "learning_rate": 4.0826610286212555e-05, + "loss": 2.3267, + "step": 3704500 + }, + { + "epoch": 18.36, + "learning_rate": 4.0825371699786465e-05, + "loss": 2.3312, + "step": 3705000 + }, + { + "epoch": 18.36, + "learning_rate": 4.082413311336038e-05, + "loss": 2.362, + "step": 3705500 + }, + { + "epoch": 18.36, + "learning_rate": 4.082289700410715e-05, + "loss": 2.3326, + "step": 3706000 + }, + { + "epoch": 18.36, + "learning_rate": 4.082165841768107e-05, + "loss": 2.3602, + "step": 3706500 + }, + { + "epoch": 18.37, + "learning_rate": 4.0820419831254985e-05, + "loss": 2.3623, + "step": 3707000 + }, + { + "epoch": 18.37, + "learning_rate": 4.08191812448289e-05, + "loss": 2.3526, + "step": 3707500 + }, + { + "epoch": 18.37, + "learning_rate": 4.081794265840282e-05, + "loss": 2.3439, + "step": 3708000 + }, + { + "epoch": 18.37, + "learning_rate": 4.081670654914959e-05, + "loss": 2.3465, + "step": 3708500 + }, + { + "epoch": 18.38, + "learning_rate": 4.0815467962723505e-05, + "loss": 2.3447, + "step": 3709000 + }, + { + "epoch": 18.38, + "learning_rate": 4.081422937629742e-05, + "loss": 2.3459, + "step": 3709500 + }, + { + "epoch": 18.38, + "learning_rate": 4.081299078987134e-05, + "loss": 2.3666, + "step": 3710000 + }, + { + "epoch": 18.38, + "learning_rate": 4.0811752203445255e-05, + "loss": 2.3043, + "step": 3710500 + }, + { + "epoch": 18.39, + "learning_rate": 4.0810516094192024e-05, + "loss": 2.3246, + "step": 3711000 + }, + { + "epoch": 18.39, + "learning_rate": 4.0809277507765934e-05, + "loss": 2.3471, + "step": 3711500 + }, + { + "epoch": 18.39, + "learning_rate": 4.080803892133985e-05, + "loss": 2.3432, + "step": 3712000 + }, + { + "epoch": 18.39, + "learning_rate": 4.080680033491377e-05, + "loss": 2.3355, + "step": 3712500 + }, + { + "epoch": 18.4, + "learning_rate": 4.0805564225660544e-05, + "loss": 2.3684, + "step": 3713000 + }, + { + "epoch": 18.4, + "learning_rate": 4.080432563923446e-05, + "loss": 2.366, + "step": 3713500 + }, + { + "epoch": 18.4, + "learning_rate": 4.080308705280838e-05, + "loss": 2.3494, + "step": 3714000 + }, + { + "epoch": 18.4, + "learning_rate": 4.080184846638229e-05, + "loss": 2.3516, + "step": 3714500 + }, + { + "epoch": 18.41, + "learning_rate": 4.0800609879956205e-05, + "loss": 2.3581, + "step": 3715000 + }, + { + "epoch": 18.41, + "learning_rate": 4.079937129353012e-05, + "loss": 2.3587, + "step": 3715500 + }, + { + "epoch": 18.41, + "learning_rate": 4.079813270710404e-05, + "loss": 2.3318, + "step": 3716000 + }, + { + "epoch": 18.41, + "learning_rate": 4.0796894120677956e-05, + "loss": 2.3423, + "step": 3716500 + }, + { + "epoch": 18.42, + "learning_rate": 4.079565553425187e-05, + "loss": 2.338, + "step": 3717000 + }, + { + "epoch": 18.42, + "learning_rate": 4.079441942499864e-05, + "loss": 2.3467, + "step": 3717500 + }, + { + "epoch": 18.42, + "learning_rate": 4.079318331574541e-05, + "loss": 2.3822, + "step": 3718000 + }, + { + "epoch": 18.42, + "learning_rate": 4.079194472931933e-05, + "loss": 2.3626, + "step": 3718500 + }, + { + "epoch": 18.43, + "learning_rate": 4.0790706142893244e-05, + "loss": 2.3683, + "step": 3719000 + }, + { + "epoch": 18.43, + "learning_rate": 4.078946755646716e-05, + "loss": 2.3831, + "step": 3719500 + }, + { + "epoch": 18.43, + "learning_rate": 4.078822897004108e-05, + "loss": 2.3206, + "step": 3720000 + }, + { + "epoch": 18.43, + "learning_rate": 4.0786990383614995e-05, + "loss": 2.3446, + "step": 3720500 + }, + { + "epoch": 18.44, + "learning_rate": 4.0785751797188905e-05, + "loss": 2.3532, + "step": 3721000 + }, + { + "epoch": 18.44, + "learning_rate": 4.078451321076282e-05, + "loss": 2.3127, + "step": 3721500 + }, + { + "epoch": 18.44, + "learning_rate": 4.078327462433674e-05, + "loss": 2.3387, + "step": 3722000 + }, + { + "epoch": 18.44, + "learning_rate": 4.0782036037910656e-05, + "loss": 2.3392, + "step": 3722500 + }, + { + "epoch": 18.45, + "learning_rate": 4.0780799928657424e-05, + "loss": 2.3358, + "step": 3723000 + }, + { + "epoch": 18.45, + "learning_rate": 4.077956134223134e-05, + "loss": 2.3367, + "step": 3723500 + }, + { + "epoch": 18.45, + "learning_rate": 4.077832275580525e-05, + "loss": 2.375, + "step": 3724000 + }, + { + "epoch": 18.45, + "learning_rate": 4.077708416937917e-05, + "loss": 2.3369, + "step": 3724500 + }, + { + "epoch": 18.45, + "learning_rate": 4.0775845582953085e-05, + "loss": 2.3509, + "step": 3725000 + }, + { + "epoch": 18.46, + "learning_rate": 4.0774606996527e-05, + "loss": 2.3558, + "step": 3725500 + }, + { + "epoch": 18.46, + "learning_rate": 4.077336841010092e-05, + "loss": 2.338, + "step": 3726000 + }, + { + "epoch": 18.46, + "learning_rate": 4.0772129823674836e-05, + "loss": 2.351, + "step": 3726500 + }, + { + "epoch": 18.46, + "learning_rate": 4.0770893714421605e-05, + "loss": 2.3565, + "step": 3727000 + }, + { + "epoch": 18.47, + "learning_rate": 4.076965512799552e-05, + "loss": 2.3638, + "step": 3727500 + }, + { + "epoch": 18.47, + "learning_rate": 4.076841654156944e-05, + "loss": 2.3696, + "step": 3728000 + }, + { + "epoch": 18.47, + "learning_rate": 4.0767177955143356e-05, + "loss": 2.3566, + "step": 3728500 + }, + { + "epoch": 18.47, + "learning_rate": 4.076593936871727e-05, + "loss": 2.3472, + "step": 3729000 + }, + { + "epoch": 18.48, + "learning_rate": 4.076470325946404e-05, + "loss": 2.3396, + "step": 3729500 + }, + { + "epoch": 18.48, + "learning_rate": 4.076346467303796e-05, + "loss": 2.3794, + "step": 3730000 + }, + { + "epoch": 18.48, + "learning_rate": 4.076222608661187e-05, + "loss": 2.3788, + "step": 3730500 + }, + { + "epoch": 18.48, + "learning_rate": 4.0760987500185786e-05, + "loss": 2.3517, + "step": 3731000 + }, + { + "epoch": 18.49, + "learning_rate": 4.075975139093256e-05, + "loss": 2.3382, + "step": 3731500 + }, + { + "epoch": 18.49, + "learning_rate": 4.075851280450648e-05, + "loss": 2.3403, + "step": 3732000 + }, + { + "epoch": 18.49, + "learning_rate": 4.0757274218080395e-05, + "loss": 2.3588, + "step": 3732500 + }, + { + "epoch": 18.49, + "learning_rate": 4.075603563165431e-05, + "loss": 2.3352, + "step": 3733000 + }, + { + "epoch": 18.5, + "learning_rate": 4.075479704522822e-05, + "loss": 2.3494, + "step": 3733500 + }, + { + "epoch": 18.5, + "learning_rate": 4.075355845880214e-05, + "loss": 2.3463, + "step": 3734000 + }, + { + "epoch": 18.5, + "learning_rate": 4.0752319872376056e-05, + "loss": 2.3804, + "step": 3734500 + }, + { + "epoch": 18.5, + "learning_rate": 4.075108128594997e-05, + "loss": 2.3203, + "step": 3735000 + }, + { + "epoch": 18.51, + "learning_rate": 4.074984269952389e-05, + "loss": 2.3402, + "step": 3735500 + }, + { + "epoch": 18.51, + "learning_rate": 4.074860659027066e-05, + "loss": 2.3319, + "step": 3736000 + }, + { + "epoch": 18.51, + "learning_rate": 4.074737048101743e-05, + "loss": 2.3261, + "step": 3736500 + }, + { + "epoch": 18.51, + "learning_rate": 4.0746131894591344e-05, + "loss": 2.3539, + "step": 3737000 + }, + { + "epoch": 18.52, + "learning_rate": 4.074489330816526e-05, + "loss": 2.3372, + "step": 3737500 + }, + { + "epoch": 18.52, + "learning_rate": 4.074365472173918e-05, + "loss": 2.3349, + "step": 3738000 + }, + { + "epoch": 18.52, + "learning_rate": 4.0742416135313095e-05, + "loss": 2.3208, + "step": 3738500 + }, + { + "epoch": 18.52, + "learning_rate": 4.0741180026059864e-05, + "loss": 2.3495, + "step": 3739000 + }, + { + "epoch": 18.53, + "learning_rate": 4.073994143963378e-05, + "loss": 2.3535, + "step": 3739500 + }, + { + "epoch": 18.53, + "learning_rate": 4.073870285320769e-05, + "loss": 2.3686, + "step": 3740000 + }, + { + "epoch": 18.53, + "learning_rate": 4.073746426678161e-05, + "loss": 2.3508, + "step": 3740500 + }, + { + "epoch": 18.53, + "learning_rate": 4.0736225680355525e-05, + "loss": 2.3398, + "step": 3741000 + }, + { + "epoch": 18.54, + "learning_rate": 4.073498709392944e-05, + "loss": 2.3602, + "step": 3741500 + }, + { + "epoch": 18.54, + "learning_rate": 4.073375098467621e-05, + "loss": 2.344, + "step": 3742000 + }, + { + "epoch": 18.54, + "learning_rate": 4.073251239825013e-05, + "loss": 2.3464, + "step": 3742500 + }, + { + "epoch": 18.54, + "learning_rate": 4.0731273811824044e-05, + "loss": 2.3433, + "step": 3743000 + }, + { + "epoch": 18.55, + "learning_rate": 4.073003522539796e-05, + "loss": 2.3592, + "step": 3743500 + }, + { + "epoch": 18.55, + "learning_rate": 4.072879663897188e-05, + "loss": 2.318, + "step": 3744000 + }, + { + "epoch": 18.55, + "learning_rate": 4.072756052971865e-05, + "loss": 2.3456, + "step": 3744500 + }, + { + "epoch": 18.55, + "learning_rate": 4.0726321943292564e-05, + "loss": 2.3568, + "step": 3745000 + }, + { + "epoch": 18.56, + "learning_rate": 4.072508335686648e-05, + "loss": 2.3698, + "step": 3745500 + }, + { + "epoch": 18.56, + "learning_rate": 4.07238447704404e-05, + "loss": 2.3393, + "step": 3746000 + }, + { + "epoch": 18.56, + "learning_rate": 4.072260618401431e-05, + "loss": 2.3585, + "step": 3746500 + }, + { + "epoch": 18.56, + "learning_rate": 4.0721367597588225e-05, + "loss": 2.3521, + "step": 3747000 + }, + { + "epoch": 18.57, + "learning_rate": 4.072012901116214e-05, + "loss": 2.3465, + "step": 3747500 + }, + { + "epoch": 18.57, + "learning_rate": 4.071889042473606e-05, + "loss": 2.3389, + "step": 3748000 + }, + { + "epoch": 18.57, + "learning_rate": 4.071765431548283e-05, + "loss": 2.3552, + "step": 3748500 + }, + { + "epoch": 18.57, + "learning_rate": 4.0716415729056745e-05, + "loss": 2.3512, + "step": 3749000 + }, + { + "epoch": 18.58, + "learning_rate": 4.071517714263066e-05, + "loss": 2.3437, + "step": 3749500 + }, + { + "epoch": 18.58, + "learning_rate": 4.071393855620458e-05, + "loss": 2.3615, + "step": 3750000 + }, + { + "epoch": 18.58, + "learning_rate": 4.0712699969778495e-05, + "loss": 2.3265, + "step": 3750500 + }, + { + "epoch": 18.58, + "learning_rate": 4.071146138335241e-05, + "loss": 2.3665, + "step": 3751000 + }, + { + "epoch": 18.59, + "learning_rate": 4.071022527409918e-05, + "loss": 2.345, + "step": 3751500 + }, + { + "epoch": 18.59, + "learning_rate": 4.07089866876731e-05, + "loss": 2.3294, + "step": 3752000 + }, + { + "epoch": 18.59, + "learning_rate": 4.0707748101247015e-05, + "loss": 2.3589, + "step": 3752500 + }, + { + "epoch": 18.59, + "learning_rate": 4.070650951482093e-05, + "loss": 2.3329, + "step": 3753000 + }, + { + "epoch": 18.6, + "learning_rate": 4.070527092839484e-05, + "loss": 2.3413, + "step": 3753500 + }, + { + "epoch": 18.6, + "learning_rate": 4.070403729631446e-05, + "loss": 2.3506, + "step": 3754000 + }, + { + "epoch": 18.6, + "learning_rate": 4.070279870988838e-05, + "loss": 2.3563, + "step": 3754500 + }, + { + "epoch": 18.6, + "learning_rate": 4.0701560123462297e-05, + "loss": 2.3641, + "step": 3755000 + }, + { + "epoch": 18.61, + "learning_rate": 4.0700321537036213e-05, + "loss": 2.3566, + "step": 3755500 + }, + { + "epoch": 18.61, + "learning_rate": 4.069908295061013e-05, + "loss": 2.3409, + "step": 3756000 + }, + { + "epoch": 18.61, + "learning_rate": 4.06978468413569e-05, + "loss": 2.3472, + "step": 3756500 + }, + { + "epoch": 18.61, + "learning_rate": 4.069660825493081e-05, + "loss": 2.3627, + "step": 3757000 + }, + { + "epoch": 18.62, + "learning_rate": 4.0695369668504726e-05, + "loss": 2.3429, + "step": 3757500 + }, + { + "epoch": 18.62, + "learning_rate": 4.069413108207864e-05, + "loss": 2.3491, + "step": 3758000 + }, + { + "epoch": 18.62, + "learning_rate": 4.069289497282542e-05, + "loss": 2.3293, + "step": 3758500 + }, + { + "epoch": 18.62, + "learning_rate": 4.069165886357219e-05, + "loss": 2.365, + "step": 3759000 + }, + { + "epoch": 18.63, + "learning_rate": 4.0690420277146105e-05, + "loss": 2.3383, + "step": 3759500 + }, + { + "epoch": 18.63, + "learning_rate": 4.068918169072002e-05, + "loss": 2.3502, + "step": 3760000 + }, + { + "epoch": 18.63, + "learning_rate": 4.068794310429394e-05, + "loss": 2.3331, + "step": 3760500 + }, + { + "epoch": 18.63, + "learning_rate": 4.068670451786785e-05, + "loss": 2.3463, + "step": 3761000 + }, + { + "epoch": 18.64, + "learning_rate": 4.0685465931441765e-05, + "loss": 2.3493, + "step": 3761500 + }, + { + "epoch": 18.64, + "learning_rate": 4.068422734501568e-05, + "loss": 2.3455, + "step": 3762000 + }, + { + "epoch": 18.64, + "learning_rate": 4.06829887585896e-05, + "loss": 2.3398, + "step": 3762500 + }, + { + "epoch": 18.64, + "learning_rate": 4.0681750172163516e-05, + "loss": 2.3684, + "step": 3763000 + }, + { + "epoch": 18.65, + "learning_rate": 4.0680514062910285e-05, + "loss": 2.3276, + "step": 3763500 + }, + { + "epoch": 18.65, + "learning_rate": 4.06792754764842e-05, + "loss": 2.3632, + "step": 3764000 + }, + { + "epoch": 18.65, + "learning_rate": 4.067803689005812e-05, + "loss": 2.3577, + "step": 3764500 + }, + { + "epoch": 18.65, + "learning_rate": 4.0676798303632036e-05, + "loss": 2.3654, + "step": 3765000 + }, + { + "epoch": 18.66, + "learning_rate": 4.0675562194378805e-05, + "loss": 2.3418, + "step": 3765500 + }, + { + "epoch": 18.66, + "learning_rate": 4.067432360795272e-05, + "loss": 2.3504, + "step": 3766000 + }, + { + "epoch": 18.66, + "learning_rate": 4.067308502152664e-05, + "loss": 2.3258, + "step": 3766500 + }, + { + "epoch": 18.66, + "learning_rate": 4.0671846435100555e-05, + "loss": 2.3525, + "step": 3767000 + }, + { + "epoch": 18.67, + "learning_rate": 4.067060784867447e-05, + "loss": 2.3618, + "step": 3767500 + }, + { + "epoch": 18.67, + "learning_rate": 4.066936926224838e-05, + "loss": 2.3604, + "step": 3768000 + }, + { + "epoch": 18.67, + "learning_rate": 4.06681306758223e-05, + "loss": 2.365, + "step": 3768500 + }, + { + "epoch": 18.67, + "learning_rate": 4.0666892089396216e-05, + "loss": 2.3799, + "step": 3769000 + }, + { + "epoch": 18.68, + "learning_rate": 4.066565350297013e-05, + "loss": 2.3418, + "step": 3769500 + }, + { + "epoch": 18.68, + "learning_rate": 4.066441491654405e-05, + "loss": 2.3537, + "step": 3770000 + }, + { + "epoch": 18.68, + "learning_rate": 4.066317633011796e-05, + "loss": 2.363, + "step": 3770500 + }, + { + "epoch": 18.68, + "learning_rate": 4.066193774369188e-05, + "loss": 2.3617, + "step": 3771000 + }, + { + "epoch": 18.69, + "learning_rate": 4.0660699157265794e-05, + "loss": 2.3339, + "step": 3771500 + }, + { + "epoch": 18.69, + "learning_rate": 4.065946057083971e-05, + "loss": 2.3676, + "step": 3772000 + }, + { + "epoch": 18.69, + "learning_rate": 4.065822446158648e-05, + "loss": 2.3482, + "step": 3772500 + }, + { + "epoch": 18.69, + "learning_rate": 4.06569858751604e-05, + "loss": 2.333, + "step": 3773000 + }, + { + "epoch": 18.7, + "learning_rate": 4.0655747288734314e-05, + "loss": 2.3455, + "step": 3773500 + }, + { + "epoch": 18.7, + "learning_rate": 4.065450870230823e-05, + "loss": 2.3702, + "step": 3774000 + }, + { + "epoch": 18.7, + "learning_rate": 4.065327011588215e-05, + "loss": 2.3753, + "step": 3774500 + }, + { + "epoch": 18.7, + "learning_rate": 4.0652034006628917e-05, + "loss": 2.3434, + "step": 3775000 + }, + { + "epoch": 18.71, + "learning_rate": 4.0650795420202833e-05, + "loss": 2.3595, + "step": 3775500 + }, + { + "epoch": 18.71, + "learning_rate": 4.064955683377675e-05, + "loss": 2.3349, + "step": 3776000 + }, + { + "epoch": 18.71, + "learning_rate": 4.064831824735067e-05, + "loss": 2.3468, + "step": 3776500 + }, + { + "epoch": 18.71, + "learning_rate": 4.064707966092458e-05, + "loss": 2.35, + "step": 3777000 + }, + { + "epoch": 18.72, + "learning_rate": 4.0645841074498494e-05, + "loss": 2.3771, + "step": 3777500 + }, + { + "epoch": 18.72, + "learning_rate": 4.064460496524526e-05, + "loss": 2.3921, + "step": 3778000 + }, + { + "epoch": 18.72, + "learning_rate": 4.064336637881918e-05, + "loss": 2.3522, + "step": 3778500 + }, + { + "epoch": 18.72, + "learning_rate": 4.06421277923931e-05, + "loss": 2.3284, + "step": 3779000 + }, + { + "epoch": 18.72, + "learning_rate": 4.0640889205967014e-05, + "loss": 2.3193, + "step": 3779500 + }, + { + "epoch": 18.73, + "learning_rate": 4.063965061954093e-05, + "loss": 2.3292, + "step": 3780000 + }, + { + "epoch": 18.73, + "learning_rate": 4.063841203311485e-05, + "loss": 2.3412, + "step": 3780500 + }, + { + "epoch": 18.73, + "learning_rate": 4.0637173446688765e-05, + "loss": 2.3477, + "step": 3781000 + }, + { + "epoch": 18.73, + "learning_rate": 4.063593486026268e-05, + "loss": 2.3427, + "step": 3781500 + }, + { + "epoch": 18.74, + "learning_rate": 4.063469875100945e-05, + "loss": 2.359, + "step": 3782000 + }, + { + "epoch": 18.74, + "learning_rate": 4.063346264175622e-05, + "loss": 2.3708, + "step": 3782500 + }, + { + "epoch": 18.74, + "learning_rate": 4.0632224055330136e-05, + "loss": 2.3456, + "step": 3783000 + }, + { + "epoch": 18.74, + "learning_rate": 4.063098546890405e-05, + "loss": 2.3478, + "step": 3783500 + }, + { + "epoch": 18.75, + "learning_rate": 4.062974935965082e-05, + "loss": 2.3417, + "step": 3784000 + }, + { + "epoch": 18.75, + "learning_rate": 4.062851077322474e-05, + "loss": 2.3459, + "step": 3784500 + }, + { + "epoch": 18.75, + "learning_rate": 4.0627272186798656e-05, + "loss": 2.3443, + "step": 3785000 + }, + { + "epoch": 18.75, + "learning_rate": 4.062603360037257e-05, + "loss": 2.3511, + "step": 3785500 + }, + { + "epoch": 18.76, + "learning_rate": 4.062479501394649e-05, + "loss": 2.3571, + "step": 3786000 + }, + { + "epoch": 18.76, + "learning_rate": 4.062355890469325e-05, + "loss": 2.3711, + "step": 3786500 + }, + { + "epoch": 18.76, + "learning_rate": 4.062232031826717e-05, + "loss": 2.3704, + "step": 3787000 + }, + { + "epoch": 18.76, + "learning_rate": 4.0621081731841086e-05, + "loss": 2.3424, + "step": 3787500 + }, + { + "epoch": 18.77, + "learning_rate": 4.0619843145415e-05, + "loss": 2.3654, + "step": 3788000 + }, + { + "epoch": 18.77, + "learning_rate": 4.061860455898892e-05, + "loss": 2.3397, + "step": 3788500 + }, + { + "epoch": 18.77, + "learning_rate": 4.0617365972562836e-05, + "loss": 2.3416, + "step": 3789000 + }, + { + "epoch": 18.77, + "learning_rate": 4.061612738613675e-05, + "loss": 2.3722, + "step": 3789500 + }, + { + "epoch": 18.78, + "learning_rate": 4.061488879971067e-05, + "loss": 2.3566, + "step": 3790000 + }, + { + "epoch": 18.78, + "learning_rate": 4.061365269045744e-05, + "loss": 2.3691, + "step": 3790500 + }, + { + "epoch": 18.78, + "learning_rate": 4.0612414104031356e-05, + "loss": 2.3407, + "step": 3791000 + }, + { + "epoch": 18.78, + "learning_rate": 4.061117551760527e-05, + "loss": 2.3298, + "step": 3791500 + }, + { + "epoch": 18.79, + "learning_rate": 4.060993693117919e-05, + "loss": 2.3593, + "step": 3792000 + }, + { + "epoch": 18.79, + "learning_rate": 4.060869834475311e-05, + "loss": 2.3477, + "step": 3792500 + }, + { + "epoch": 18.79, + "learning_rate": 4.0607459758327024e-05, + "loss": 2.3582, + "step": 3793000 + }, + { + "epoch": 18.79, + "learning_rate": 4.0606223649073786e-05, + "loss": 2.3396, + "step": 3793500 + }, + { + "epoch": 18.8, + "learning_rate": 4.06049850626477e-05, + "loss": 2.3546, + "step": 3794000 + }, + { + "epoch": 18.8, + "learning_rate": 4.060374647622162e-05, + "loss": 2.3364, + "step": 3794500 + }, + { + "epoch": 18.8, + "learning_rate": 4.0602507889795536e-05, + "loss": 2.3689, + "step": 3795000 + }, + { + "epoch": 18.8, + "learning_rate": 4.0601269303369453e-05, + "loss": 2.3572, + "step": 3795500 + }, + { + "epoch": 18.81, + "learning_rate": 4.060003071694337e-05, + "loss": 2.3567, + "step": 3796000 + }, + { + "epoch": 18.81, + "learning_rate": 4.059879213051728e-05, + "loss": 2.3234, + "step": 3796500 + }, + { + "epoch": 18.81, + "learning_rate": 4.05975535440912e-05, + "loss": 2.3548, + "step": 3797000 + }, + { + "epoch": 18.81, + "learning_rate": 4.0596314957665114e-05, + "loss": 2.3628, + "step": 3797500 + }, + { + "epoch": 18.82, + "learning_rate": 4.059507884841189e-05, + "loss": 2.3432, + "step": 3798000 + }, + { + "epoch": 18.82, + "learning_rate": 4.059384026198581e-05, + "loss": 2.3482, + "step": 3798500 + }, + { + "epoch": 18.82, + "learning_rate": 4.0592601675559724e-05, + "loss": 2.3262, + "step": 3799000 + }, + { + "epoch": 18.82, + "learning_rate": 4.059136308913364e-05, + "loss": 2.354, + "step": 3799500 + }, + { + "epoch": 18.83, + "learning_rate": 4.05901269798804e-05, + "loss": 2.3366, + "step": 3800000 + }, + { + "epoch": 18.83, + "learning_rate": 4.058889087062717e-05, + "loss": 2.3351, + "step": 3800500 + }, + { + "epoch": 18.83, + "learning_rate": 4.058765228420109e-05, + "loss": 2.3698, + "step": 3801000 + }, + { + "epoch": 18.83, + "learning_rate": 4.0586413697775005e-05, + "loss": 2.3652, + "step": 3801500 + }, + { + "epoch": 18.84, + "learning_rate": 4.058517511134892e-05, + "loss": 2.3572, + "step": 3802000 + }, + { + "epoch": 18.84, + "learning_rate": 4.058393900209569e-05, + "loss": 2.3613, + "step": 3802500 + }, + { + "epoch": 18.84, + "learning_rate": 4.058270041566961e-05, + "loss": 2.3577, + "step": 3803000 + }, + { + "epoch": 18.84, + "learning_rate": 4.0581461829243525e-05, + "loss": 2.3717, + "step": 3803500 + }, + { + "epoch": 18.85, + "learning_rate": 4.058022324281744e-05, + "loss": 2.3315, + "step": 3804000 + }, + { + "epoch": 18.85, + "learning_rate": 4.057898465639136e-05, + "loss": 2.375, + "step": 3804500 + }, + { + "epoch": 18.85, + "learning_rate": 4.057774606996527e-05, + "loss": 2.3772, + "step": 3805000 + }, + { + "epoch": 18.85, + "learning_rate": 4.0576507483539186e-05, + "loss": 2.3574, + "step": 3805500 + }, + { + "epoch": 18.86, + "learning_rate": 4.05752688971131e-05, + "loss": 2.3731, + "step": 3806000 + }, + { + "epoch": 18.86, + "learning_rate": 4.057403031068702e-05, + "loss": 2.3567, + "step": 3806500 + }, + { + "epoch": 18.86, + "learning_rate": 4.057279172426094e-05, + "loss": 2.3155, + "step": 3807000 + }, + { + "epoch": 18.86, + "learning_rate": 4.0571553137834854e-05, + "loss": 2.362, + "step": 3807500 + }, + { + "epoch": 18.87, + "learning_rate": 4.057031455140877e-05, + "loss": 2.3639, + "step": 3808000 + }, + { + "epoch": 18.87, + "learning_rate": 4.056907596498269e-05, + "loss": 2.3458, + "step": 3808500 + }, + { + "epoch": 18.87, + "learning_rate": 4.05678373785566e-05, + "loss": 2.3485, + "step": 3809000 + }, + { + "epoch": 18.87, + "learning_rate": 4.0566598792130515e-05, + "loss": 2.3583, + "step": 3809500 + }, + { + "epoch": 18.88, + "learning_rate": 4.056536516005014e-05, + "loss": 2.3601, + "step": 3810000 + }, + { + "epoch": 18.88, + "learning_rate": 4.056412657362406e-05, + "loss": 2.3376, + "step": 3810500 + }, + { + "epoch": 18.88, + "learning_rate": 4.0562887987197976e-05, + "loss": 2.3372, + "step": 3811000 + }, + { + "epoch": 18.88, + "learning_rate": 4.056165187794474e-05, + "loss": 2.3443, + "step": 3811500 + }, + { + "epoch": 18.89, + "learning_rate": 4.0560413291518655e-05, + "loss": 2.349, + "step": 3812000 + }, + { + "epoch": 18.89, + "learning_rate": 4.055917470509257e-05, + "loss": 2.3483, + "step": 3812500 + }, + { + "epoch": 18.89, + "learning_rate": 4.055793611866649e-05, + "loss": 2.3419, + "step": 3813000 + }, + { + "epoch": 18.89, + "learning_rate": 4.0556697532240406e-05, + "loss": 2.3571, + "step": 3813500 + }, + { + "epoch": 18.9, + "learning_rate": 4.055545894581432e-05, + "loss": 2.3455, + "step": 3814000 + }, + { + "epoch": 18.9, + "learning_rate": 4.055422035938824e-05, + "loss": 2.3596, + "step": 3814500 + }, + { + "epoch": 18.9, + "learning_rate": 4.0552981772962156e-05, + "loss": 2.3755, + "step": 3815000 + }, + { + "epoch": 18.9, + "learning_rate": 4.055174318653607e-05, + "loss": 2.3312, + "step": 3815500 + }, + { + "epoch": 18.91, + "learning_rate": 4.055050460010999e-05, + "loss": 2.356, + "step": 3816000 + }, + { + "epoch": 18.91, + "learning_rate": 4.054926601368391e-05, + "loss": 2.3591, + "step": 3816500 + }, + { + "epoch": 18.91, + "learning_rate": 4.0548027427257824e-05, + "loss": 2.3418, + "step": 3817000 + }, + { + "epoch": 18.91, + "learning_rate": 4.054678884083174e-05, + "loss": 2.3178, + "step": 3817500 + }, + { + "epoch": 18.92, + "learning_rate": 4.054555025440566e-05, + "loss": 2.3461, + "step": 3818000 + }, + { + "epoch": 18.92, + "learning_rate": 4.054431166797957e-05, + "loss": 2.3436, + "step": 3818500 + }, + { + "epoch": 18.92, + "learning_rate": 4.0543073081553485e-05, + "loss": 2.3313, + "step": 3819000 + }, + { + "epoch": 18.92, + "learning_rate": 4.05418344951274e-05, + "loss": 2.3429, + "step": 3819500 + }, + { + "epoch": 18.93, + "learning_rate": 4.054059838587417e-05, + "loss": 2.3593, + "step": 3820000 + }, + { + "epoch": 18.93, + "learning_rate": 4.053936227662094e-05, + "loss": 2.3424, + "step": 3820500 + }, + { + "epoch": 18.93, + "learning_rate": 4.0538123690194857e-05, + "loss": 2.3253, + "step": 3821000 + }, + { + "epoch": 18.93, + "learning_rate": 4.0536885103768774e-05, + "loss": 2.3515, + "step": 3821500 + }, + { + "epoch": 18.94, + "learning_rate": 4.053564651734269e-05, + "loss": 2.3587, + "step": 3822000 + }, + { + "epoch": 18.94, + "learning_rate": 4.053440793091661e-05, + "loss": 2.3128, + "step": 3822500 + }, + { + "epoch": 18.94, + "learning_rate": 4.0533169344490524e-05, + "loss": 2.3619, + "step": 3823000 + }, + { + "epoch": 18.94, + "learning_rate": 4.053193075806444e-05, + "loss": 2.3424, + "step": 3823500 + }, + { + "epoch": 18.95, + "learning_rate": 4.053069217163836e-05, + "loss": 2.3527, + "step": 3824000 + }, + { + "epoch": 18.95, + "learning_rate": 4.0529453585212275e-05, + "loss": 2.3656, + "step": 3824500 + }, + { + "epoch": 18.95, + "learning_rate": 4.0528217475959044e-05, + "loss": 2.3551, + "step": 3825000 + }, + { + "epoch": 18.95, + "learning_rate": 4.0526978889532954e-05, + "loss": 2.3346, + "step": 3825500 + }, + { + "epoch": 18.96, + "learning_rate": 4.052574278027972e-05, + "loss": 2.3258, + "step": 3826000 + }, + { + "epoch": 18.96, + "learning_rate": 4.052450419385364e-05, + "loss": 2.3451, + "step": 3826500 + }, + { + "epoch": 18.96, + "learning_rate": 4.052326560742756e-05, + "loss": 2.3521, + "step": 3827000 + }, + { + "epoch": 18.96, + "learning_rate": 4.0522027021001474e-05, + "loss": 2.3527, + "step": 3827500 + }, + { + "epoch": 18.97, + "learning_rate": 4.052078843457539e-05, + "loss": 2.3459, + "step": 3828000 + }, + { + "epoch": 18.97, + "learning_rate": 4.051954984814931e-05, + "loss": 2.3684, + "step": 3828500 + }, + { + "epoch": 18.97, + "learning_rate": 4.0518311261723224e-05, + "loss": 2.3569, + "step": 3829000 + }, + { + "epoch": 18.97, + "learning_rate": 4.051707267529714e-05, + "loss": 2.368, + "step": 3829500 + }, + { + "epoch": 18.98, + "learning_rate": 4.051583408887106e-05, + "loss": 2.3412, + "step": 3830000 + }, + { + "epoch": 18.98, + "learning_rate": 4.0514595502444975e-05, + "loss": 2.3685, + "step": 3830500 + }, + { + "epoch": 18.98, + "learning_rate": 4.0513356916018885e-05, + "loss": 2.3667, + "step": 3831000 + }, + { + "epoch": 18.98, + "learning_rate": 4.05121183295928e-05, + "loss": 2.333, + "step": 3831500 + }, + { + "epoch": 18.99, + "learning_rate": 4.051088469751242e-05, + "loss": 2.3416, + "step": 3832000 + }, + { + "epoch": 18.99, + "learning_rate": 4.050964611108634e-05, + "loss": 2.344, + "step": 3832500 + }, + { + "epoch": 18.99, + "learning_rate": 4.050840752466026e-05, + "loss": 2.3712, + "step": 3833000 + }, + { + "epoch": 18.99, + "learning_rate": 4.0507168938234174e-05, + "loss": 2.3576, + "step": 3833500 + }, + { + "epoch": 18.99, + "learning_rate": 4.050593282898094e-05, + "loss": 2.3196, + "step": 3834000 + }, + { + "epoch": 19.0, + "learning_rate": 4.050469671972771e-05, + "loss": 2.3713, + "step": 3834500 + }, + { + "epoch": 19.0, + "learning_rate": 4.050345813330163e-05, + "loss": 2.3427, + "step": 3835000 + }, + { + "epoch": 19.0, + "eval_accuracy": 0.6510998864965143, + "eval_accuracy_mlm": 0.6054676656033432, + "eval_accuracy_nsp": 0.8664373487501912, + "eval_loss": 2.3667972087860107, + "eval_runtime": 145.68, + "eval_samples_per_second": 1750.131, + "eval_steps_per_second": 72.927, + "step": 3835017 + }, + { + "epoch": 19.0, + "learning_rate": 4.050221954687554e-05, + "loss": 2.3276, + "step": 3835500 + }, + { + "epoch": 19.0, + "learning_rate": 4.0500983437622314e-05, + "loss": 2.3316, + "step": 3836000 + }, + { + "epoch": 19.01, + "learning_rate": 4.049974485119623e-05, + "loss": 2.3063, + "step": 3836500 + }, + { + "epoch": 19.01, + "learning_rate": 4.049850626477015e-05, + "loss": 2.3161, + "step": 3837000 + }, + { + "epoch": 19.01, + "learning_rate": 4.0497267678344065e-05, + "loss": 2.3319, + "step": 3837500 + }, + { + "epoch": 19.01, + "learning_rate": 4.0496031569090834e-05, + "loss": 2.3158, + "step": 3838000 + }, + { + "epoch": 19.02, + "learning_rate": 4.049479298266475e-05, + "loss": 2.3323, + "step": 3838500 + }, + { + "epoch": 19.02, + "learning_rate": 4.049355439623867e-05, + "loss": 2.3132, + "step": 3839000 + }, + { + "epoch": 19.02, + "learning_rate": 4.049231580981258e-05, + "loss": 2.3162, + "step": 3839500 + }, + { + "epoch": 19.02, + "learning_rate": 4.0491077223386495e-05, + "loss": 2.3272, + "step": 3840000 + }, + { + "epoch": 19.03, + "learning_rate": 4.048983863696041e-05, + "loss": 2.3237, + "step": 3840500 + }, + { + "epoch": 19.03, + "learning_rate": 4.048860005053433e-05, + "loss": 2.3282, + "step": 3841000 + }, + { + "epoch": 19.03, + "learning_rate": 4.0487361464108245e-05, + "loss": 2.3251, + "step": 3841500 + }, + { + "epoch": 19.03, + "learning_rate": 4.0486122877682156e-05, + "loss": 2.3182, + "step": 3842000 + }, + { + "epoch": 19.04, + "learning_rate": 4.048488429125607e-05, + "loss": 2.3366, + "step": 3842500 + }, + { + "epoch": 19.04, + "learning_rate": 4.048364570482999e-05, + "loss": 2.312, + "step": 3843000 + }, + { + "epoch": 19.04, + "learning_rate": 4.0482407118403906e-05, + "loss": 2.3254, + "step": 3843500 + }, + { + "epoch": 19.04, + "learning_rate": 4.048116853197782e-05, + "loss": 2.3187, + "step": 3844000 + }, + { + "epoch": 19.05, + "learning_rate": 4.047992994555174e-05, + "loss": 2.3117, + "step": 3844500 + }, + { + "epoch": 19.05, + "learning_rate": 4.047869135912566e-05, + "loss": 2.3228, + "step": 3845000 + }, + { + "epoch": 19.05, + "learning_rate": 4.0477452772699574e-05, + "loss": 2.301, + "step": 3845500 + }, + { + "epoch": 19.05, + "learning_rate": 4.047621666344634e-05, + "loss": 2.3024, + "step": 3846000 + }, + { + "epoch": 19.06, + "learning_rate": 4.047497807702026e-05, + "loss": 2.3125, + "step": 3846500 + }, + { + "epoch": 19.06, + "learning_rate": 4.047373949059418e-05, + "loss": 2.3355, + "step": 3847000 + }, + { + "epoch": 19.06, + "learning_rate": 4.0472500904168094e-05, + "loss": 2.3184, + "step": 3847500 + }, + { + "epoch": 19.06, + "learning_rate": 4.047126231774201e-05, + "loss": 2.2946, + "step": 3848000 + }, + { + "epoch": 19.07, + "learning_rate": 4.047002620848878e-05, + "loss": 2.3503, + "step": 3848500 + }, + { + "epoch": 19.07, + "learning_rate": 4.046878762206269e-05, + "loss": 2.3138, + "step": 3849000 + }, + { + "epoch": 19.07, + "learning_rate": 4.0467549035636606e-05, + "loss": 2.3092, + "step": 3849500 + }, + { + "epoch": 19.07, + "learning_rate": 4.046631044921052e-05, + "loss": 2.3133, + "step": 3850000 + }, + { + "epoch": 19.08, + "learning_rate": 4.046507186278444e-05, + "loss": 2.337, + "step": 3850500 + }, + { + "epoch": 19.08, + "learning_rate": 4.046383327635836e-05, + "loss": 2.3352, + "step": 3851000 + }, + { + "epoch": 19.08, + "learning_rate": 4.0462594689932274e-05, + "loss": 2.3084, + "step": 3851500 + }, + { + "epoch": 19.08, + "learning_rate": 4.046135858067904e-05, + "loss": 2.3219, + "step": 3852000 + }, + { + "epoch": 19.09, + "learning_rate": 4.046011999425296e-05, + "loss": 2.3261, + "step": 3852500 + }, + { + "epoch": 19.09, + "learning_rate": 4.045888140782688e-05, + "loss": 2.3286, + "step": 3853000 + }, + { + "epoch": 19.09, + "learning_rate": 4.0457642821400794e-05, + "loss": 2.2986, + "step": 3853500 + }, + { + "epoch": 19.09, + "learning_rate": 4.045640423497471e-05, + "loss": 2.3174, + "step": 3854000 + }, + { + "epoch": 19.1, + "learning_rate": 4.045516564854863e-05, + "loss": 2.3252, + "step": 3854500 + }, + { + "epoch": 19.1, + "learning_rate": 4.0453927062122545e-05, + "loss": 2.3386, + "step": 3855000 + }, + { + "epoch": 19.1, + "learning_rate": 4.0452690952869307e-05, + "loss": 2.3323, + "step": 3855500 + }, + { + "epoch": 19.1, + "learning_rate": 4.0451452366443223e-05, + "loss": 2.3168, + "step": 3856000 + }, + { + "epoch": 19.11, + "learning_rate": 4.045021378001714e-05, + "loss": 2.3312, + "step": 3856500 + }, + { + "epoch": 19.11, + "learning_rate": 4.044897519359106e-05, + "loss": 2.3278, + "step": 3857000 + }, + { + "epoch": 19.11, + "learning_rate": 4.0447736607164974e-05, + "loss": 2.3539, + "step": 3857500 + }, + { + "epoch": 19.11, + "learning_rate": 4.044649802073889e-05, + "loss": 2.3532, + "step": 3858000 + }, + { + "epoch": 19.12, + "learning_rate": 4.044525943431281e-05, + "loss": 2.3201, + "step": 3858500 + }, + { + "epoch": 19.12, + "learning_rate": 4.0444020847886725e-05, + "loss": 2.2934, + "step": 3859000 + }, + { + "epoch": 19.12, + "learning_rate": 4.044278226146064e-05, + "loss": 2.3019, + "step": 3859500 + }, + { + "epoch": 19.12, + "learning_rate": 4.044154367503456e-05, + "loss": 2.3039, + "step": 3860000 + }, + { + "epoch": 19.13, + "learning_rate": 4.0440305088608476e-05, + "loss": 2.3172, + "step": 3860500 + }, + { + "epoch": 19.13, + "learning_rate": 4.0439071456528097e-05, + "loss": 2.3581, + "step": 3861000 + }, + { + "epoch": 19.13, + "learning_rate": 4.0437832870102013e-05, + "loss": 2.3188, + "step": 3861500 + }, + { + "epoch": 19.13, + "learning_rate": 4.043659428367593e-05, + "loss": 2.3124, + "step": 3862000 + }, + { + "epoch": 19.14, + "learning_rate": 4.043535569724984e-05, + "loss": 2.3261, + "step": 3862500 + }, + { + "epoch": 19.14, + "learning_rate": 4.043411958799661e-05, + "loss": 2.3299, + "step": 3863000 + }, + { + "epoch": 19.14, + "learning_rate": 4.0432881001570526e-05, + "loss": 2.3548, + "step": 3863500 + }, + { + "epoch": 19.14, + "learning_rate": 4.043164241514444e-05, + "loss": 2.3506, + "step": 3864000 + }, + { + "epoch": 19.15, + "learning_rate": 4.043040382871836e-05, + "loss": 2.3457, + "step": 3864500 + }, + { + "epoch": 19.15, + "learning_rate": 4.0429167719465136e-05, + "loss": 2.3462, + "step": 3865000 + }, + { + "epoch": 19.15, + "learning_rate": 4.042792913303905e-05, + "loss": 2.3082, + "step": 3865500 + }, + { + "epoch": 19.15, + "learning_rate": 4.042669054661296e-05, + "loss": 2.3479, + "step": 3866000 + }, + { + "epoch": 19.16, + "learning_rate": 4.042545196018688e-05, + "loss": 2.3207, + "step": 3866500 + }, + { + "epoch": 19.16, + "learning_rate": 4.042421585093365e-05, + "loss": 2.332, + "step": 3867000 + }, + { + "epoch": 19.16, + "learning_rate": 4.0422977264507565e-05, + "loss": 2.3244, + "step": 3867500 + }, + { + "epoch": 19.16, + "learning_rate": 4.042173867808148e-05, + "loss": 2.3418, + "step": 3868000 + }, + { + "epoch": 19.17, + "learning_rate": 4.04205000916554e-05, + "loss": 2.3337, + "step": 3868500 + }, + { + "epoch": 19.17, + "learning_rate": 4.041926150522931e-05, + "loss": 2.3363, + "step": 3869000 + }, + { + "epoch": 19.17, + "learning_rate": 4.0418022918803226e-05, + "loss": 2.3161, + "step": 3869500 + }, + { + "epoch": 19.17, + "learning_rate": 4.041678433237714e-05, + "loss": 2.3293, + "step": 3870000 + }, + { + "epoch": 19.18, + "learning_rate": 4.041554574595106e-05, + "loss": 2.3562, + "step": 3870500 + }, + { + "epoch": 19.18, + "learning_rate": 4.041430715952498e-05, + "loss": 2.3249, + "step": 3871000 + }, + { + "epoch": 19.18, + "learning_rate": 4.0413068573098894e-05, + "loss": 2.3386, + "step": 3871500 + }, + { + "epoch": 19.18, + "learning_rate": 4.041182998667281e-05, + "loss": 2.3363, + "step": 3872000 + }, + { + "epoch": 19.19, + "learning_rate": 4.041059140024673e-05, + "loss": 2.3261, + "step": 3872500 + }, + { + "epoch": 19.19, + "learning_rate": 4.04093552909935e-05, + "loss": 2.3164, + "step": 3873000 + }, + { + "epoch": 19.19, + "learning_rate": 4.0408119181740266e-05, + "loss": 2.3363, + "step": 3873500 + }, + { + "epoch": 19.19, + "learning_rate": 4.040688059531418e-05, + "loss": 2.3191, + "step": 3874000 + }, + { + "epoch": 19.2, + "learning_rate": 4.04056420088881e-05, + "loss": 2.3325, + "step": 3874500 + }, + { + "epoch": 19.2, + "learning_rate": 4.0404403422462016e-05, + "loss": 2.3294, + "step": 3875000 + }, + { + "epoch": 19.2, + "learning_rate": 4.0403164836035927e-05, + "loss": 2.348, + "step": 3875500 + }, + { + "epoch": 19.2, + "learning_rate": 4.0401926249609843e-05, + "loss": 2.3279, + "step": 3876000 + }, + { + "epoch": 19.21, + "learning_rate": 4.040068766318376e-05, + "loss": 2.3494, + "step": 3876500 + }, + { + "epoch": 19.21, + "learning_rate": 4.0399451553930536e-05, + "loss": 2.3361, + "step": 3877000 + }, + { + "epoch": 19.21, + "learning_rate": 4.039821296750445e-05, + "loss": 2.329, + "step": 3877500 + }, + { + "epoch": 19.21, + "learning_rate": 4.039697438107837e-05, + "loss": 2.3137, + "step": 3878000 + }, + { + "epoch": 19.22, + "learning_rate": 4.039573579465228e-05, + "loss": 2.3535, + "step": 3878500 + }, + { + "epoch": 19.22, + "learning_rate": 4.03944972082262e-05, + "loss": 2.3296, + "step": 3879000 + }, + { + "epoch": 19.22, + "learning_rate": 4.0393258621800114e-05, + "loss": 2.3558, + "step": 3879500 + }, + { + "epoch": 19.22, + "learning_rate": 4.039202251254688e-05, + "loss": 2.3215, + "step": 3880000 + }, + { + "epoch": 19.23, + "learning_rate": 4.03907839261208e-05, + "loss": 2.3023, + "step": 3880500 + }, + { + "epoch": 19.23, + "learning_rate": 4.038954781686757e-05, + "loss": 2.3208, + "step": 3881000 + }, + { + "epoch": 19.23, + "learning_rate": 4.0388309230441485e-05, + "loss": 2.3491, + "step": 3881500 + }, + { + "epoch": 19.23, + "learning_rate": 4.0387073121188254e-05, + "loss": 2.3284, + "step": 3882000 + }, + { + "epoch": 19.24, + "learning_rate": 4.038583453476217e-05, + "loss": 2.3285, + "step": 3882500 + }, + { + "epoch": 19.24, + "learning_rate": 4.038459594833609e-05, + "loss": 2.3254, + "step": 3883000 + }, + { + "epoch": 19.24, + "learning_rate": 4.038335736191e-05, + "loss": 2.3343, + "step": 3883500 + }, + { + "epoch": 19.24, + "learning_rate": 4.0382118775483915e-05, + "loss": 2.3346, + "step": 3884000 + }, + { + "epoch": 19.25, + "learning_rate": 4.038088018905783e-05, + "loss": 2.3331, + "step": 3884500 + }, + { + "epoch": 19.25, + "learning_rate": 4.037964160263175e-05, + "loss": 2.3303, + "step": 3885000 + }, + { + "epoch": 19.25, + "learning_rate": 4.0378403016205666e-05, + "loss": 2.3329, + "step": 3885500 + }, + { + "epoch": 19.25, + "learning_rate": 4.037716442977958e-05, + "loss": 2.3502, + "step": 3886000 + }, + { + "epoch": 19.26, + "learning_rate": 4.03759258433535e-05, + "loss": 2.3128, + "step": 3886500 + }, + { + "epoch": 19.26, + "learning_rate": 4.0374687256927417e-05, + "loss": 2.3384, + "step": 3887000 + }, + { + "epoch": 19.26, + "learning_rate": 4.0373448670501334e-05, + "loss": 2.3581, + "step": 3887500 + }, + { + "epoch": 19.26, + "learning_rate": 4.0372210084075244e-05, + "loss": 2.3329, + "step": 3888000 + }, + { + "epoch": 19.26, + "learning_rate": 4.037097397482202e-05, + "loss": 2.3289, + "step": 3888500 + }, + { + "epoch": 19.27, + "learning_rate": 4.0369735388395936e-05, + "loss": 2.3199, + "step": 3889000 + }, + { + "epoch": 19.27, + "learning_rate": 4.036849680196985e-05, + "loss": 2.3655, + "step": 3889500 + }, + { + "epoch": 19.27, + "learning_rate": 4.036725821554377e-05, + "loss": 2.3308, + "step": 3890000 + }, + { + "epoch": 19.27, + "learning_rate": 4.036601962911769e-05, + "loss": 2.3335, + "step": 3890500 + }, + { + "epoch": 19.28, + "learning_rate": 4.03647810426916e-05, + "loss": 2.3548, + "step": 3891000 + }, + { + "epoch": 19.28, + "learning_rate": 4.0363542456265514e-05, + "loss": 2.3258, + "step": 3891500 + }, + { + "epoch": 19.28, + "learning_rate": 4.036230386983943e-05, + "loss": 2.3499, + "step": 3892000 + }, + { + "epoch": 19.28, + "learning_rate": 4.036106528341335e-05, + "loss": 2.3421, + "step": 3892500 + }, + { + "epoch": 19.29, + "learning_rate": 4.035982917416012e-05, + "loss": 2.3172, + "step": 3893000 + }, + { + "epoch": 19.29, + "learning_rate": 4.0358593064906886e-05, + "loss": 2.3416, + "step": 3893500 + }, + { + "epoch": 19.29, + "learning_rate": 4.03573544784808e-05, + "loss": 2.3245, + "step": 3894000 + }, + { + "epoch": 19.29, + "learning_rate": 4.035611589205472e-05, + "loss": 2.3291, + "step": 3894500 + }, + { + "epoch": 19.3, + "learning_rate": 4.035487978280149e-05, + "loss": 2.3485, + "step": 3895000 + }, + { + "epoch": 19.3, + "learning_rate": 4.0353641196375405e-05, + "loss": 2.3216, + "step": 3895500 + }, + { + "epoch": 19.3, + "learning_rate": 4.035240260994932e-05, + "loss": 2.352, + "step": 3896000 + }, + { + "epoch": 19.3, + "learning_rate": 4.035116402352324e-05, + "loss": 2.3244, + "step": 3896500 + }, + { + "epoch": 19.31, + "learning_rate": 4.034992543709715e-05, + "loss": 2.347, + "step": 3897000 + }, + { + "epoch": 19.31, + "learning_rate": 4.0348686850671066e-05, + "loss": 2.3369, + "step": 3897500 + }, + { + "epoch": 19.31, + "learning_rate": 4.034744826424498e-05, + "loss": 2.31, + "step": 3898000 + }, + { + "epoch": 19.31, + "learning_rate": 4.03462096778189e-05, + "loss": 2.3318, + "step": 3898500 + }, + { + "epoch": 19.32, + "learning_rate": 4.034497109139282e-05, + "loss": 2.3256, + "step": 3899000 + }, + { + "epoch": 19.32, + "learning_rate": 4.0343732504966734e-05, + "loss": 2.3221, + "step": 3899500 + }, + { + "epoch": 19.32, + "learning_rate": 4.03424963957135e-05, + "loss": 2.3195, + "step": 3900000 + }, + { + "epoch": 19.32, + "learning_rate": 4.034125780928742e-05, + "loss": 2.3262, + "step": 3900500 + }, + { + "epoch": 19.33, + "learning_rate": 4.0340019222861336e-05, + "loss": 2.3251, + "step": 3901000 + }, + { + "epoch": 19.33, + "learning_rate": 4.033878063643525e-05, + "loss": 2.3431, + "step": 3901500 + }, + { + "epoch": 19.33, + "learning_rate": 4.033754452718202e-05, + "loss": 2.3286, + "step": 3902000 + }, + { + "epoch": 19.33, + "learning_rate": 4.033630594075594e-05, + "loss": 2.3164, + "step": 3902500 + }, + { + "epoch": 19.34, + "learning_rate": 4.03350698315027e-05, + "loss": 2.3303, + "step": 3903000 + }, + { + "epoch": 19.34, + "learning_rate": 4.033383124507662e-05, + "loss": 2.3216, + "step": 3903500 + }, + { + "epoch": 19.34, + "learning_rate": 4.033259513582339e-05, + "loss": 2.3386, + "step": 3904000 + }, + { + "epoch": 19.34, + "learning_rate": 4.0331356549397304e-05, + "loss": 2.3393, + "step": 3904500 + }, + { + "epoch": 19.35, + "learning_rate": 4.033011796297122e-05, + "loss": 2.342, + "step": 3905000 + }, + { + "epoch": 19.35, + "learning_rate": 4.032887937654514e-05, + "loss": 2.3289, + "step": 3905500 + }, + { + "epoch": 19.35, + "learning_rate": 4.0327640790119055e-05, + "loss": 2.3325, + "step": 3906000 + }, + { + "epoch": 19.35, + "learning_rate": 4.032640220369297e-05, + "loss": 2.3331, + "step": 3906500 + }, + { + "epoch": 19.36, + "learning_rate": 4.032516361726689e-05, + "loss": 2.3308, + "step": 3907000 + }, + { + "epoch": 19.36, + "learning_rate": 4.0323925030840805e-05, + "loss": 2.3238, + "step": 3907500 + }, + { + "epoch": 19.36, + "learning_rate": 4.032268644441472e-05, + "loss": 2.3196, + "step": 3908000 + }, + { + "epoch": 19.36, + "learning_rate": 4.032144785798864e-05, + "loss": 2.3406, + "step": 3908500 + }, + { + "epoch": 19.37, + "learning_rate": 4.0320209271562556e-05, + "loss": 2.3241, + "step": 3909000 + }, + { + "epoch": 19.37, + "learning_rate": 4.031897068513647e-05, + "loss": 2.3429, + "step": 3909500 + }, + { + "epoch": 19.37, + "learning_rate": 4.031773209871039e-05, + "loss": 2.3428, + "step": 3910000 + }, + { + "epoch": 19.37, + "learning_rate": 4.03164935122843e-05, + "loss": 2.3308, + "step": 3910500 + }, + { + "epoch": 19.38, + "learning_rate": 4.031525492585822e-05, + "loss": 2.3214, + "step": 3911000 + }, + { + "epoch": 19.38, + "learning_rate": 4.0314016339432134e-05, + "loss": 2.3418, + "step": 3911500 + }, + { + "epoch": 19.38, + "learning_rate": 4.031277775300605e-05, + "loss": 2.3363, + "step": 3912000 + }, + { + "epoch": 19.38, + "learning_rate": 4.031153916657997e-05, + "loss": 2.3595, + "step": 3912500 + }, + { + "epoch": 19.39, + "learning_rate": 4.031030058015388e-05, + "loss": 2.3229, + "step": 3913000 + }, + { + "epoch": 19.39, + "learning_rate": 4.0309064470900654e-05, + "loss": 2.323, + "step": 3913500 + }, + { + "epoch": 19.39, + "learning_rate": 4.030782836164742e-05, + "loss": 2.3263, + "step": 3914000 + }, + { + "epoch": 19.39, + "learning_rate": 4.030658977522134e-05, + "loss": 2.3366, + "step": 3914500 + }, + { + "epoch": 19.4, + "learning_rate": 4.0305351188795256e-05, + "loss": 2.3512, + "step": 3915000 + }, + { + "epoch": 19.4, + "learning_rate": 4.030411260236917e-05, + "loss": 2.3201, + "step": 3915500 + }, + { + "epoch": 19.4, + "learning_rate": 4.030287401594309e-05, + "loss": 2.338, + "step": 3916000 + }, + { + "epoch": 19.4, + "learning_rate": 4.030163542951701e-05, + "loss": 2.3361, + "step": 3916500 + }, + { + "epoch": 19.41, + "learning_rate": 4.0300396843090924e-05, + "loss": 2.3406, + "step": 3917000 + }, + { + "epoch": 19.41, + "learning_rate": 4.0299158256664834e-05, + "loss": 2.3373, + "step": 3917500 + }, + { + "epoch": 19.41, + "learning_rate": 4.029791967023875e-05, + "loss": 2.3003, + "step": 3918000 + }, + { + "epoch": 19.41, + "learning_rate": 4.029668108381267e-05, + "loss": 2.3411, + "step": 3918500 + }, + { + "epoch": 19.42, + "learning_rate": 4.029544497455944e-05, + "loss": 2.3372, + "step": 3919000 + }, + { + "epoch": 19.42, + "learning_rate": 4.0294206388133354e-05, + "loss": 2.3438, + "step": 3919500 + }, + { + "epoch": 19.42, + "learning_rate": 4.029296780170727e-05, + "loss": 2.3411, + "step": 3920000 + }, + { + "epoch": 19.42, + "learning_rate": 4.029172921528119e-05, + "loss": 2.3428, + "step": 3920500 + }, + { + "epoch": 19.43, + "learning_rate": 4.0290493106027956e-05, + "loss": 2.3244, + "step": 3921000 + }, + { + "epoch": 19.43, + "learning_rate": 4.028925451960187e-05, + "loss": 2.339, + "step": 3921500 + }, + { + "epoch": 19.43, + "learning_rate": 4.028801593317579e-05, + "loss": 2.3394, + "step": 3922000 + }, + { + "epoch": 19.43, + "learning_rate": 4.028677734674971e-05, + "loss": 2.3725, + "step": 3922500 + }, + { + "epoch": 19.44, + "learning_rate": 4.028554123749647e-05, + "loss": 2.3452, + "step": 3923000 + }, + { + "epoch": 19.44, + "learning_rate": 4.0284302651070386e-05, + "loss": 2.307, + "step": 3923500 + }, + { + "epoch": 19.44, + "learning_rate": 4.0283066541817155e-05, + "loss": 2.3612, + "step": 3924000 + }, + { + "epoch": 19.44, + "learning_rate": 4.028182795539107e-05, + "loss": 2.3514, + "step": 3924500 + }, + { + "epoch": 19.45, + "learning_rate": 4.028058936896499e-05, + "loss": 2.3375, + "step": 3925000 + }, + { + "epoch": 19.45, + "learning_rate": 4.0279350782538906e-05, + "loss": 2.342, + "step": 3925500 + }, + { + "epoch": 19.45, + "learning_rate": 4.027811219611282e-05, + "loss": 2.3437, + "step": 3926000 + }, + { + "epoch": 19.45, + "learning_rate": 4.027687608685959e-05, + "loss": 2.3342, + "step": 3926500 + }, + { + "epoch": 19.46, + "learning_rate": 4.027563750043351e-05, + "loss": 2.3393, + "step": 3927000 + }, + { + "epoch": 19.46, + "learning_rate": 4.027439891400742e-05, + "loss": 2.3505, + "step": 3927500 + }, + { + "epoch": 19.46, + "learning_rate": 4.0273160327581335e-05, + "loss": 2.3358, + "step": 3928000 + }, + { + "epoch": 19.46, + "learning_rate": 4.027192174115525e-05, + "loss": 2.3349, + "step": 3928500 + }, + { + "epoch": 19.47, + "learning_rate": 4.027068315472917e-05, + "loss": 2.3537, + "step": 3929000 + }, + { + "epoch": 19.47, + "learning_rate": 4.026944704547594e-05, + "loss": 2.323, + "step": 3929500 + }, + { + "epoch": 19.47, + "learning_rate": 4.0268208459049855e-05, + "loss": 2.3303, + "step": 3930000 + }, + { + "epoch": 19.47, + "learning_rate": 4.026696987262377e-05, + "loss": 2.3301, + "step": 3930500 + }, + { + "epoch": 19.48, + "learning_rate": 4.026573128619769e-05, + "loss": 2.3492, + "step": 3931000 + }, + { + "epoch": 19.48, + "learning_rate": 4.0264492699771606e-05, + "loss": 2.3522, + "step": 3931500 + }, + { + "epoch": 19.48, + "learning_rate": 4.026325411334552e-05, + "loss": 2.3272, + "step": 3932000 + }, + { + "epoch": 19.48, + "learning_rate": 4.026201552691944e-05, + "loss": 2.3068, + "step": 3932500 + }, + { + "epoch": 19.49, + "learning_rate": 4.026077694049336e-05, + "loss": 2.3614, + "step": 3933000 + }, + { + "epoch": 19.49, + "learning_rate": 4.0259540831240125e-05, + "loss": 2.3525, + "step": 3933500 + }, + { + "epoch": 19.49, + "learning_rate": 4.0258304721986894e-05, + "loss": 2.3418, + "step": 3934000 + }, + { + "epoch": 19.49, + "learning_rate": 4.025706613556081e-05, + "loss": 2.3367, + "step": 3934500 + }, + { + "epoch": 19.5, + "learning_rate": 4.025582754913473e-05, + "loss": 2.3377, + "step": 3935000 + }, + { + "epoch": 19.5, + "learning_rate": 4.025458896270864e-05, + "loss": 2.3424, + "step": 3935500 + }, + { + "epoch": 19.5, + "learning_rate": 4.0253350376282555e-05, + "loss": 2.3364, + "step": 3936000 + }, + { + "epoch": 19.5, + "learning_rate": 4.025211178985647e-05, + "loss": 2.3356, + "step": 3936500 + }, + { + "epoch": 19.51, + "learning_rate": 4.025087320343039e-05, + "loss": 2.3515, + "step": 3937000 + }, + { + "epoch": 19.51, + "learning_rate": 4.0249637094177165e-05, + "loss": 2.3304, + "step": 3937500 + }, + { + "epoch": 19.51, + "learning_rate": 4.024839850775108e-05, + "loss": 2.3372, + "step": 3938000 + }, + { + "epoch": 19.51, + "learning_rate": 4.024715992132499e-05, + "loss": 2.3153, + "step": 3938500 + }, + { + "epoch": 19.52, + "learning_rate": 4.024592133489891e-05, + "loss": 2.3227, + "step": 3939000 + }, + { + "epoch": 19.52, + "learning_rate": 4.024468522564568e-05, + "loss": 2.3313, + "step": 3939500 + }, + { + "epoch": 19.52, + "learning_rate": 4.0243446639219594e-05, + "loss": 2.3482, + "step": 3940000 + }, + { + "epoch": 19.52, + "learning_rate": 4.024221052996636e-05, + "loss": 2.305, + "step": 3940500 + }, + { + "epoch": 19.53, + "learning_rate": 4.024097194354028e-05, + "loss": 2.3352, + "step": 3941000 + }, + { + "epoch": 19.53, + "learning_rate": 4.02397333571142e-05, + "loss": 2.3291, + "step": 3941500 + }, + { + "epoch": 19.53, + "learning_rate": 4.0238494770688114e-05, + "loss": 2.333, + "step": 3942000 + }, + { + "epoch": 19.53, + "learning_rate": 4.0237258661434876e-05, + "loss": 2.3466, + "step": 3942500 + }, + { + "epoch": 19.53, + "learning_rate": 4.023602007500879e-05, + "loss": 2.3585, + "step": 3943000 + }, + { + "epoch": 19.54, + "learning_rate": 4.023478148858271e-05, + "loss": 2.3453, + "step": 3943500 + }, + { + "epoch": 19.54, + "learning_rate": 4.023354290215663e-05, + "loss": 2.3602, + "step": 3944000 + }, + { + "epoch": 19.54, + "learning_rate": 4.0232304315730544e-05, + "loss": 2.3127, + "step": 3944500 + }, + { + "epoch": 19.54, + "learning_rate": 4.023106572930446e-05, + "loss": 2.331, + "step": 3945000 + }, + { + "epoch": 19.55, + "learning_rate": 4.022982714287838e-05, + "loss": 2.3243, + "step": 3945500 + }, + { + "epoch": 19.55, + "learning_rate": 4.0228588556452295e-05, + "loss": 2.3414, + "step": 3946000 + }, + { + "epoch": 19.55, + "learning_rate": 4.022734997002621e-05, + "loss": 2.3321, + "step": 3946500 + }, + { + "epoch": 19.55, + "learning_rate": 4.022611138360013e-05, + "loss": 2.3295, + "step": 3947000 + }, + { + "epoch": 19.56, + "learning_rate": 4.0224872797174045e-05, + "loss": 2.3272, + "step": 3947500 + }, + { + "epoch": 19.56, + "learning_rate": 4.0223634210747955e-05, + "loss": 2.3229, + "step": 3948000 + }, + { + "epoch": 19.56, + "learning_rate": 4.022239810149473e-05, + "loss": 2.3459, + "step": 3948500 + }, + { + "epoch": 19.56, + "learning_rate": 4.022115951506865e-05, + "loss": 2.3256, + "step": 3949000 + }, + { + "epoch": 19.57, + "learning_rate": 4.0219920928642565e-05, + "loss": 2.3478, + "step": 3949500 + }, + { + "epoch": 19.57, + "learning_rate": 4.021868234221648e-05, + "loss": 2.3261, + "step": 3950000 + }, + { + "epoch": 19.57, + "learning_rate": 4.02174437557904e-05, + "loss": 2.3297, + "step": 3950500 + }, + { + "epoch": 19.57, + "learning_rate": 4.021620764653716e-05, + "loss": 2.326, + "step": 3951000 + }, + { + "epoch": 19.58, + "learning_rate": 4.021496906011108e-05, + "loss": 2.33, + "step": 3951500 + }, + { + "epoch": 19.58, + "learning_rate": 4.0213730473684995e-05, + "loss": 2.3451, + "step": 3952000 + }, + { + "epoch": 19.58, + "learning_rate": 4.021249188725891e-05, + "loss": 2.3306, + "step": 3952500 + }, + { + "epoch": 19.58, + "learning_rate": 4.021125330083283e-05, + "loss": 2.3403, + "step": 3953000 + }, + { + "epoch": 19.59, + "learning_rate": 4.0210014714406745e-05, + "loss": 2.3302, + "step": 3953500 + }, + { + "epoch": 19.59, + "learning_rate": 4.0208776127980656e-05, + "loss": 2.3446, + "step": 3954000 + }, + { + "epoch": 19.59, + "learning_rate": 4.020754001872743e-05, + "loss": 2.3467, + "step": 3954500 + }, + { + "epoch": 19.59, + "learning_rate": 4.020630143230135e-05, + "loss": 2.3475, + "step": 3955000 + }, + { + "epoch": 19.6, + "learning_rate": 4.0205062845875265e-05, + "loss": 2.3296, + "step": 3955500 + }, + { + "epoch": 19.6, + "learning_rate": 4.020382425944918e-05, + "loss": 2.3456, + "step": 3956000 + }, + { + "epoch": 19.6, + "learning_rate": 4.02025856730231e-05, + "loss": 2.3194, + "step": 3956500 + }, + { + "epoch": 19.6, + "learning_rate": 4.020134956376986e-05, + "loss": 2.3377, + "step": 3957000 + }, + { + "epoch": 19.61, + "learning_rate": 4.020011097734378e-05, + "loss": 2.3476, + "step": 3957500 + }, + { + "epoch": 19.61, + "learning_rate": 4.0198872390917695e-05, + "loss": 2.3331, + "step": 3958000 + }, + { + "epoch": 19.61, + "learning_rate": 4.0197636281664464e-05, + "loss": 2.3387, + "step": 3958500 + }, + { + "epoch": 19.61, + "learning_rate": 4.019639769523838e-05, + "loss": 2.3505, + "step": 3959000 + }, + { + "epoch": 19.62, + "learning_rate": 4.01951591088123e-05, + "loss": 2.3324, + "step": 3959500 + }, + { + "epoch": 19.62, + "learning_rate": 4.0193920522386214e-05, + "loss": 2.32, + "step": 3960000 + }, + { + "epoch": 19.62, + "learning_rate": 4.019268193596013e-05, + "loss": 2.3419, + "step": 3960500 + }, + { + "epoch": 19.62, + "learning_rate": 4.019144334953405e-05, + "loss": 2.3614, + "step": 3961000 + }, + { + "epoch": 19.63, + "learning_rate": 4.0190204763107965e-05, + "loss": 2.338, + "step": 3961500 + }, + { + "epoch": 19.63, + "learning_rate": 4.018896865385473e-05, + "loss": 2.3367, + "step": 3962000 + }, + { + "epoch": 19.63, + "learning_rate": 4.0187730067428644e-05, + "loss": 2.3342, + "step": 3962500 + }, + { + "epoch": 19.63, + "learning_rate": 4.018649148100256e-05, + "loss": 2.3435, + "step": 3963000 + }, + { + "epoch": 19.64, + "learning_rate": 4.018525537174933e-05, + "loss": 2.3708, + "step": 3963500 + }, + { + "epoch": 19.64, + "learning_rate": 4.018401678532325e-05, + "loss": 2.3477, + "step": 3964000 + }, + { + "epoch": 19.64, + "learning_rate": 4.0182780676070016e-05, + "loss": 2.3401, + "step": 3964500 + }, + { + "epoch": 19.64, + "learning_rate": 4.018154208964393e-05, + "loss": 2.2965, + "step": 3965000 + }, + { + "epoch": 19.65, + "learning_rate": 4.018030350321785e-05, + "loss": 2.3713, + "step": 3965500 + }, + { + "epoch": 19.65, + "learning_rate": 4.0179064916791766e-05, + "loss": 2.3456, + "step": 3966000 + }, + { + "epoch": 19.65, + "learning_rate": 4.017782633036568e-05, + "loss": 2.3476, + "step": 3966500 + }, + { + "epoch": 19.65, + "learning_rate": 4.01765877439396e-05, + "loss": 2.3343, + "step": 3967000 + }, + { + "epoch": 19.66, + "learning_rate": 4.017534915751352e-05, + "loss": 2.3137, + "step": 3967500 + }, + { + "epoch": 19.66, + "learning_rate": 4.0174110571087434e-05, + "loss": 2.3458, + "step": 3968000 + }, + { + "epoch": 19.66, + "learning_rate": 4.017287198466135e-05, + "loss": 2.3405, + "step": 3968500 + }, + { + "epoch": 19.66, + "learning_rate": 4.017163339823526e-05, + "loss": 2.3362, + "step": 3969000 + }, + { + "epoch": 19.67, + "learning_rate": 4.017039481180918e-05, + "loss": 2.3175, + "step": 3969500 + }, + { + "epoch": 19.67, + "learning_rate": 4.0169156225383095e-05, + "loss": 2.3659, + "step": 3970000 + }, + { + "epoch": 19.67, + "learning_rate": 4.016791763895701e-05, + "loss": 2.3572, + "step": 3970500 + }, + { + "epoch": 19.67, + "learning_rate": 4.016668400687663e-05, + "loss": 2.3597, + "step": 3971000 + }, + { + "epoch": 19.68, + "learning_rate": 4.016544542045055e-05, + "loss": 2.3242, + "step": 3971500 + }, + { + "epoch": 19.68, + "learning_rate": 4.0164206834024466e-05, + "loss": 2.3366, + "step": 3972000 + }, + { + "epoch": 19.68, + "learning_rate": 4.0162968247598383e-05, + "loss": 2.3619, + "step": 3972500 + }, + { + "epoch": 19.68, + "learning_rate": 4.01617296611723e-05, + "loss": 2.3342, + "step": 3973000 + }, + { + "epoch": 19.69, + "learning_rate": 4.016049107474622e-05, + "loss": 2.3261, + "step": 3973500 + }, + { + "epoch": 19.69, + "learning_rate": 4.0159252488320134e-05, + "loss": 2.3447, + "step": 3974000 + }, + { + "epoch": 19.69, + "learning_rate": 4.015801390189405e-05, + "loss": 2.3384, + "step": 3974500 + }, + { + "epoch": 19.69, + "learning_rate": 4.015677531546797e-05, + "loss": 2.3568, + "step": 3975000 + }, + { + "epoch": 19.7, + "learning_rate": 4.015553672904188e-05, + "loss": 2.3522, + "step": 3975500 + }, + { + "epoch": 19.7, + "learning_rate": 4.0154298142615795e-05, + "loss": 2.3391, + "step": 3976000 + }, + { + "epoch": 19.7, + "learning_rate": 4.015305955618971e-05, + "loss": 2.3405, + "step": 3976500 + }, + { + "epoch": 19.7, + "learning_rate": 4.015182096976363e-05, + "loss": 2.3432, + "step": 3977000 + }, + { + "epoch": 19.71, + "learning_rate": 4.0150582383337546e-05, + "loss": 2.3234, + "step": 3977500 + }, + { + "epoch": 19.71, + "learning_rate": 4.0149346274084315e-05, + "loss": 2.3282, + "step": 3978000 + }, + { + "epoch": 19.71, + "learning_rate": 4.014810768765823e-05, + "loss": 2.3459, + "step": 3978500 + }, + { + "epoch": 19.71, + "learning_rate": 4.014686910123215e-05, + "loss": 2.3324, + "step": 3979000 + }, + { + "epoch": 19.72, + "learning_rate": 4.0145630514806066e-05, + "loss": 2.3331, + "step": 3979500 + }, + { + "epoch": 19.72, + "learning_rate": 4.014439192837998e-05, + "loss": 2.3306, + "step": 3980000 + }, + { + "epoch": 19.72, + "learning_rate": 4.014315581912675e-05, + "loss": 2.3346, + "step": 3980500 + }, + { + "epoch": 19.72, + "learning_rate": 4.014191723270067e-05, + "loss": 2.3382, + "step": 3981000 + }, + { + "epoch": 19.73, + "learning_rate": 4.0140678646274585e-05, + "loss": 2.3328, + "step": 3981500 + }, + { + "epoch": 19.73, + "learning_rate": 4.01394400598485e-05, + "loss": 2.3343, + "step": 3982000 + }, + { + "epoch": 19.73, + "learning_rate": 4.013820147342241e-05, + "loss": 2.3313, + "step": 3982500 + }, + { + "epoch": 19.73, + "learning_rate": 4.013696288699633e-05, + "loss": 2.3678, + "step": 3983000 + }, + { + "epoch": 19.74, + "learning_rate": 4.0135724300570246e-05, + "loss": 2.3607, + "step": 3983500 + }, + { + "epoch": 19.74, + "learning_rate": 4.013448571414416e-05, + "loss": 2.3278, + "step": 3984000 + }, + { + "epoch": 19.74, + "learning_rate": 4.013324712771808e-05, + "loss": 2.3487, + "step": 3984500 + }, + { + "epoch": 19.74, + "learning_rate": 4.0132008541292e-05, + "loss": 2.3366, + "step": 3985000 + }, + { + "epoch": 19.75, + "learning_rate": 4.013076995486591e-05, + "loss": 2.364, + "step": 3985500 + }, + { + "epoch": 19.75, + "learning_rate": 4.012953384561268e-05, + "loss": 2.3186, + "step": 3986000 + }, + { + "epoch": 19.75, + "learning_rate": 4.012829773635945e-05, + "loss": 2.3758, + "step": 3986500 + }, + { + "epoch": 19.75, + "learning_rate": 4.012705914993337e-05, + "loss": 2.3873, + "step": 3987000 + }, + { + "epoch": 19.76, + "learning_rate": 4.0125820563507285e-05, + "loss": 2.3456, + "step": 3987500 + }, + { + "epoch": 19.76, + "learning_rate": 4.01245819770812e-05, + "loss": 2.342, + "step": 3988000 + }, + { + "epoch": 19.76, + "learning_rate": 4.012334339065512e-05, + "loss": 2.3393, + "step": 3988500 + }, + { + "epoch": 19.76, + "learning_rate": 4.012210480422903e-05, + "loss": 2.3263, + "step": 3989000 + }, + { + "epoch": 19.77, + "learning_rate": 4.0120866217802946e-05, + "loss": 2.3438, + "step": 3989500 + }, + { + "epoch": 19.77, + "learning_rate": 4.011962763137686e-05, + "loss": 2.348, + "step": 3990000 + }, + { + "epoch": 19.77, + "learning_rate": 4.011838904495078e-05, + "loss": 2.3395, + "step": 3990500 + }, + { + "epoch": 19.77, + "learning_rate": 4.011715293569755e-05, + "loss": 2.3636, + "step": 3991000 + }, + { + "epoch": 19.78, + "learning_rate": 4.0115914349271466e-05, + "loss": 2.3862, + "step": 3991500 + }, + { + "epoch": 19.78, + "learning_rate": 4.0114678240018235e-05, + "loss": 2.3051, + "step": 3992000 + }, + { + "epoch": 19.78, + "learning_rate": 4.011343965359215e-05, + "loss": 2.3611, + "step": 3992500 + }, + { + "epoch": 19.78, + "learning_rate": 4.011220106716607e-05, + "loss": 2.3512, + "step": 3993000 + }, + { + "epoch": 19.79, + "learning_rate": 4.0110962480739985e-05, + "loss": 2.3359, + "step": 3993500 + }, + { + "epoch": 19.79, + "learning_rate": 4.01097238943139e-05, + "loss": 2.3261, + "step": 3994000 + }, + { + "epoch": 19.79, + "learning_rate": 4.010848530788782e-05, + "loss": 2.369, + "step": 3994500 + }, + { + "epoch": 19.79, + "learning_rate": 4.010724919863458e-05, + "loss": 2.3553, + "step": 3995000 + }, + { + "epoch": 19.8, + "learning_rate": 4.01060106122085e-05, + "loss": 2.3294, + "step": 3995500 + }, + { + "epoch": 19.8, + "learning_rate": 4.0104772025782415e-05, + "loss": 2.3247, + "step": 3996000 + }, + { + "epoch": 19.8, + "learning_rate": 4.010353343935633e-05, + "loss": 2.3215, + "step": 3996500 + }, + { + "epoch": 19.8, + "learning_rate": 4.010229485293025e-05, + "loss": 2.3778, + "step": 3997000 + }, + { + "epoch": 19.8, + "learning_rate": 4.0101056266504166e-05, + "loss": 2.3558, + "step": 3997500 + }, + { + "epoch": 19.81, + "learning_rate": 4.009981768007808e-05, + "loss": 2.3269, + "step": 3998000 + }, + { + "epoch": 19.81, + "learning_rate": 4.0098579093652e-05, + "loss": 2.3752, + "step": 3998500 + }, + { + "epoch": 19.81, + "learning_rate": 4.009734050722592e-05, + "loss": 2.3359, + "step": 3999000 + }, + { + "epoch": 19.81, + "learning_rate": 4.0096101920799834e-05, + "loss": 2.3454, + "step": 3999500 + }, + { + "epoch": 19.82, + "learning_rate": 4.009486333437375e-05, + "loss": 2.3231, + "step": 4000000 + }, + { + "epoch": 19.82, + "learning_rate": 4.009362474794767e-05, + "loss": 2.3238, + "step": 4000500 + }, + { + "epoch": 19.82, + "learning_rate": 4.009238616152158e-05, + "loss": 2.3168, + "step": 4001000 + }, + { + "epoch": 19.82, + "learning_rate": 4.00911525294412e-05, + "loss": 2.3225, + "step": 4001500 + }, + { + "epoch": 19.83, + "learning_rate": 4.0089913943015115e-05, + "loss": 2.3095, + "step": 4002000 + }, + { + "epoch": 19.83, + "learning_rate": 4.008867535658903e-05, + "loss": 2.3578, + "step": 4002500 + }, + { + "epoch": 19.83, + "learning_rate": 4.008743677016295e-05, + "loss": 2.3386, + "step": 4003000 + }, + { + "epoch": 19.83, + "learning_rate": 4.0086198183736866e-05, + "loss": 2.3406, + "step": 4003500 + }, + { + "epoch": 19.84, + "learning_rate": 4.008496455165649e-05, + "loss": 2.3178, + "step": 4004000 + }, + { + "epoch": 19.84, + "learning_rate": 4.0083725965230404e-05, + "loss": 2.3481, + "step": 4004500 + }, + { + "epoch": 19.84, + "learning_rate": 4.008248737880432e-05, + "loss": 2.3606, + "step": 4005000 + }, + { + "epoch": 19.84, + "learning_rate": 4.008124879237824e-05, + "loss": 2.3519, + "step": 4005500 + }, + { + "epoch": 19.85, + "learning_rate": 4.008001020595215e-05, + "loss": 2.3332, + "step": 4006000 + }, + { + "epoch": 19.85, + "learning_rate": 4.0078771619526065e-05, + "loss": 2.3432, + "step": 4006500 + }, + { + "epoch": 19.85, + "learning_rate": 4.007753551027284e-05, + "loss": 2.3641, + "step": 4007000 + }, + { + "epoch": 19.85, + "learning_rate": 4.007629692384676e-05, + "loss": 2.3344, + "step": 4007500 + }, + { + "epoch": 19.86, + "learning_rate": 4.007505833742067e-05, + "loss": 2.3358, + "step": 4008000 + }, + { + "epoch": 19.86, + "learning_rate": 4.0073819750994584e-05, + "loss": 2.3221, + "step": 4008500 + }, + { + "epoch": 19.86, + "learning_rate": 4.00725811645685e-05, + "loss": 2.3158, + "step": 4009000 + }, + { + "epoch": 19.86, + "learning_rate": 4.007134505531528e-05, + "loss": 2.3348, + "step": 4009500 + }, + { + "epoch": 19.87, + "learning_rate": 4.0070106468889194e-05, + "loss": 2.3408, + "step": 4010000 + }, + { + "epoch": 19.87, + "learning_rate": 4.0068870359635956e-05, + "loss": 2.339, + "step": 4010500 + }, + { + "epoch": 19.87, + "learning_rate": 4.0067634250382724e-05, + "loss": 2.3199, + "step": 4011000 + }, + { + "epoch": 19.87, + "learning_rate": 4.006639566395664e-05, + "loss": 2.3297, + "step": 4011500 + }, + { + "epoch": 19.88, + "learning_rate": 4.006515707753056e-05, + "loss": 2.3532, + "step": 4012000 + }, + { + "epoch": 19.88, + "learning_rate": 4.0063918491104475e-05, + "loss": 2.3398, + "step": 4012500 + }, + { + "epoch": 19.88, + "learning_rate": 4.006267990467839e-05, + "loss": 2.328, + "step": 4013000 + }, + { + "epoch": 19.88, + "learning_rate": 4.006144131825231e-05, + "loss": 2.3538, + "step": 4013500 + }, + { + "epoch": 19.89, + "learning_rate": 4.0060202731826226e-05, + "loss": 2.3285, + "step": 4014000 + }, + { + "epoch": 19.89, + "learning_rate": 4.005896414540014e-05, + "loss": 2.3619, + "step": 4014500 + }, + { + "epoch": 19.89, + "learning_rate": 4.005772555897406e-05, + "loss": 2.3521, + "step": 4015000 + }, + { + "epoch": 19.89, + "learning_rate": 4.005648697254798e-05, + "loss": 2.3502, + "step": 4015500 + }, + { + "epoch": 19.9, + "learning_rate": 4.0055248386121894e-05, + "loss": 2.3306, + "step": 4016000 + }, + { + "epoch": 19.9, + "learning_rate": 4.0054012276868656e-05, + "loss": 2.3506, + "step": 4016500 + }, + { + "epoch": 19.9, + "learning_rate": 4.005277369044257e-05, + "loss": 2.3259, + "step": 4017000 + }, + { + "epoch": 19.9, + "learning_rate": 4.005153758118934e-05, + "loss": 2.3518, + "step": 4017500 + }, + { + "epoch": 19.91, + "learning_rate": 4.005029899476326e-05, + "loss": 2.3261, + "step": 4018000 + }, + { + "epoch": 19.91, + "learning_rate": 4.0049060408337175e-05, + "loss": 2.3528, + "step": 4018500 + }, + { + "epoch": 19.91, + "learning_rate": 4.004782182191109e-05, + "loss": 2.3506, + "step": 4019000 + }, + { + "epoch": 19.91, + "learning_rate": 4.004658323548501e-05, + "loss": 2.3426, + "step": 4019500 + }, + { + "epoch": 19.92, + "learning_rate": 4.0045344649058926e-05, + "loss": 2.3404, + "step": 4020000 + }, + { + "epoch": 19.92, + "learning_rate": 4.004410606263284e-05, + "loss": 2.373, + "step": 4020500 + }, + { + "epoch": 19.92, + "learning_rate": 4.004286747620676e-05, + "loss": 2.3435, + "step": 4021000 + }, + { + "epoch": 19.92, + "learning_rate": 4.004163136695352e-05, + "loss": 2.329, + "step": 4021500 + }, + { + "epoch": 19.93, + "learning_rate": 4.004039278052744e-05, + "loss": 2.336, + "step": 4022000 + }, + { + "epoch": 19.93, + "learning_rate": 4.0039154194101356e-05, + "loss": 2.3432, + "step": 4022500 + }, + { + "epoch": 19.93, + "learning_rate": 4.003791560767527e-05, + "loss": 2.3376, + "step": 4023000 + }, + { + "epoch": 19.93, + "learning_rate": 4.003667702124919e-05, + "loss": 2.3447, + "step": 4023500 + }, + { + "epoch": 19.94, + "learning_rate": 4.003543843482311e-05, + "loss": 2.3564, + "step": 4024000 + }, + { + "epoch": 19.94, + "learning_rate": 4.0034202325569875e-05, + "loss": 2.3494, + "step": 4024500 + }, + { + "epoch": 19.94, + "learning_rate": 4.003296373914379e-05, + "loss": 2.3439, + "step": 4025000 + }, + { + "epoch": 19.94, + "learning_rate": 4.003172515271771e-05, + "loss": 2.3499, + "step": 4025500 + }, + { + "epoch": 19.95, + "learning_rate": 4.003048904346448e-05, + "loss": 2.311, + "step": 4026000 + }, + { + "epoch": 19.95, + "learning_rate": 4.0029250457038395e-05, + "loss": 2.3349, + "step": 4026500 + }, + { + "epoch": 19.95, + "learning_rate": 4.0028011870612305e-05, + "loss": 2.3582, + "step": 4027000 + }, + { + "epoch": 19.95, + "learning_rate": 4.002677328418622e-05, + "loss": 2.3338, + "step": 4027500 + }, + { + "epoch": 19.96, + "learning_rate": 4.002553469776014e-05, + "loss": 2.3483, + "step": 4028000 + }, + { + "epoch": 19.96, + "learning_rate": 4.0024296111334056e-05, + "loss": 2.3628, + "step": 4028500 + }, + { + "epoch": 19.96, + "learning_rate": 4.002305752490797e-05, + "loss": 2.3416, + "step": 4029000 + }, + { + "epoch": 19.96, + "learning_rate": 4.002182141565474e-05, + "loss": 2.3288, + "step": 4029500 + }, + { + "epoch": 19.97, + "learning_rate": 4.002058282922866e-05, + "loss": 2.3303, + "step": 4030000 + }, + { + "epoch": 19.97, + "learning_rate": 4.0019344242802576e-05, + "loss": 2.3498, + "step": 4030500 + }, + { + "epoch": 19.97, + "learning_rate": 4.001810565637649e-05, + "loss": 2.3482, + "step": 4031000 + }, + { + "epoch": 19.97, + "learning_rate": 4.001686706995041e-05, + "loss": 2.3286, + "step": 4031500 + }, + { + "epoch": 19.98, + "learning_rate": 4.0015628483524326e-05, + "loss": 2.3441, + "step": 4032000 + }, + { + "epoch": 19.98, + "learning_rate": 4.001438989709824e-05, + "loss": 2.3522, + "step": 4032500 + }, + { + "epoch": 19.98, + "learning_rate": 4.001315131067216e-05, + "loss": 2.3592, + "step": 4033000 + }, + { + "epoch": 19.98, + "learning_rate": 4.001191272424608e-05, + "loss": 2.3551, + "step": 4033500 + }, + { + "epoch": 19.99, + "learning_rate": 4.0010674137819994e-05, + "loss": 2.3237, + "step": 4034000 + }, + { + "epoch": 19.99, + "learning_rate": 4.000943555139391e-05, + "loss": 2.3385, + "step": 4034500 + }, + { + "epoch": 19.99, + "learning_rate": 4.000819696496783e-05, + "loss": 2.3485, + "step": 4035000 + }, + { + "epoch": 19.99, + "learning_rate": 4.0006958378541745e-05, + "loss": 2.3399, + "step": 4035500 + }, + { + "epoch": 20.0, + "learning_rate": 4.000572226928851e-05, + "loss": 2.3312, + "step": 4036000 + }, + { + "epoch": 20.0, + "learning_rate": 4.0004483682862424e-05, + "loss": 2.3215, + "step": 4036500 + }, + { + "epoch": 20.0, + "eval_accuracy": 0.6514623229212978, + "eval_accuracy_mlm": 0.6059239055867153, + "eval_accuracy_nsp": 0.8662569275844352, + "eval_loss": 2.360621213912964, + "eval_runtime": 145.9157, + "eval_samples_per_second": 1747.304, + "eval_steps_per_second": 72.809, + "step": 4036860 + }, + { + "epoch": 20.0, + "learning_rate": 4.000324509643634e-05, + "loss": 2.3361, + "step": 4037000 + }, + { + "epoch": 20.0, + "learning_rate": 4.000200651001026e-05, + "loss": 2.331, + "step": 4037500 + }, + { + "epoch": 20.01, + "learning_rate": 4.0000767923584175e-05, + "loss": 2.3296, + "step": 4038000 + }, + { + "epoch": 20.01, + "learning_rate": 3.999952933715809e-05, + "loss": 2.324, + "step": 4038500 + }, + { + "epoch": 20.01, + "learning_rate": 3.999829075073201e-05, + "loss": 2.274, + "step": 4039000 + }, + { + "epoch": 20.01, + "learning_rate": 3.999705216430592e-05, + "loss": 2.3324, + "step": 4039500 + }, + { + "epoch": 20.02, + "learning_rate": 3.9995813577879836e-05, + "loss": 2.3256, + "step": 4040000 + }, + { + "epoch": 20.02, + "learning_rate": 3.999457499145375e-05, + "loss": 2.2992, + "step": 4040500 + }, + { + "epoch": 20.02, + "learning_rate": 3.999333640502767e-05, + "loss": 2.2902, + "step": 4041000 + }, + { + "epoch": 20.02, + "learning_rate": 3.9992097818601586e-05, + "loss": 2.2879, + "step": 4041500 + }, + { + "epoch": 20.03, + "learning_rate": 3.999086170934836e-05, + "loss": 2.3138, + "step": 4042000 + }, + { + "epoch": 20.03, + "learning_rate": 3.998962312292227e-05, + "loss": 2.3257, + "step": 4042500 + }, + { + "epoch": 20.03, + "learning_rate": 3.998838453649619e-05, + "loss": 2.3098, + "step": 4043000 + }, + { + "epoch": 20.03, + "learning_rate": 3.9987145950070106e-05, + "loss": 2.2984, + "step": 4043500 + }, + { + "epoch": 20.04, + "learning_rate": 3.998590736364402e-05, + "loss": 2.2918, + "step": 4044000 + }, + { + "epoch": 20.04, + "learning_rate": 3.998466877721794e-05, + "loss": 2.2826, + "step": 4044500 + }, + { + "epoch": 20.04, + "learning_rate": 3.998343019079186e-05, + "loss": 2.3165, + "step": 4045000 + }, + { + "epoch": 20.04, + "learning_rate": 3.998219408153862e-05, + "loss": 2.3079, + "step": 4045500 + }, + { + "epoch": 20.05, + "learning_rate": 3.9980955495112536e-05, + "loss": 2.3265, + "step": 4046000 + }, + { + "epoch": 20.05, + "learning_rate": 3.997971690868645e-05, + "loss": 2.3006, + "step": 4046500 + }, + { + "epoch": 20.05, + "learning_rate": 3.997848079943323e-05, + "loss": 2.3074, + "step": 4047000 + }, + { + "epoch": 20.05, + "learning_rate": 3.9977242213007145e-05, + "loss": 2.3117, + "step": 4047500 + }, + { + "epoch": 20.06, + "learning_rate": 3.997600362658106e-05, + "loss": 2.3119, + "step": 4048000 + }, + { + "epoch": 20.06, + "learning_rate": 3.997476504015497e-05, + "loss": 2.3299, + "step": 4048500 + }, + { + "epoch": 20.06, + "learning_rate": 3.997352645372889e-05, + "loss": 2.3168, + "step": 4049000 + }, + { + "epoch": 20.06, + "learning_rate": 3.9972287867302806e-05, + "loss": 2.3114, + "step": 4049500 + }, + { + "epoch": 20.07, + "learning_rate": 3.997104928087672e-05, + "loss": 2.3004, + "step": 4050000 + }, + { + "epoch": 20.07, + "learning_rate": 3.996981069445064e-05, + "loss": 2.3204, + "step": 4050500 + }, + { + "epoch": 20.07, + "learning_rate": 3.996857458519741e-05, + "loss": 2.3282, + "step": 4051000 + }, + { + "epoch": 20.07, + "learning_rate": 3.9967335998771326e-05, + "loss": 2.3293, + "step": 4051500 + }, + { + "epoch": 20.08, + "learning_rate": 3.9966097412345236e-05, + "loss": 2.3379, + "step": 4052000 + }, + { + "epoch": 20.08, + "learning_rate": 3.996485882591915e-05, + "loss": 2.3334, + "step": 4052500 + }, + { + "epoch": 20.08, + "learning_rate": 3.996362271666593e-05, + "loss": 2.3042, + "step": 4053000 + }, + { + "epoch": 20.08, + "learning_rate": 3.9962384130239845e-05, + "loss": 2.3357, + "step": 4053500 + }, + { + "epoch": 20.08, + "learning_rate": 3.9961148020986614e-05, + "loss": 2.3007, + "step": 4054000 + }, + { + "epoch": 20.09, + "learning_rate": 3.9959909434560524e-05, + "loss": 2.3134, + "step": 4054500 + }, + { + "epoch": 20.09, + "learning_rate": 3.995867084813444e-05, + "loss": 2.3141, + "step": 4055000 + }, + { + "epoch": 20.09, + "learning_rate": 3.995743226170836e-05, + "loss": 2.3416, + "step": 4055500 + }, + { + "epoch": 20.09, + "learning_rate": 3.9956193675282275e-05, + "loss": 2.3416, + "step": 4056000 + }, + { + "epoch": 20.1, + "learning_rate": 3.995495508885619e-05, + "loss": 2.3061, + "step": 4056500 + }, + { + "epoch": 20.1, + "learning_rate": 3.995371650243011e-05, + "loss": 2.3172, + "step": 4057000 + }, + { + "epoch": 20.1, + "learning_rate": 3.9952477916004026e-05, + "loss": 2.3176, + "step": 4057500 + }, + { + "epoch": 20.1, + "learning_rate": 3.9951241806750795e-05, + "loss": 2.3158, + "step": 4058000 + }, + { + "epoch": 20.11, + "learning_rate": 3.995000322032471e-05, + "loss": 2.3287, + "step": 4058500 + }, + { + "epoch": 20.11, + "learning_rate": 3.994876711107148e-05, + "loss": 2.3434, + "step": 4059000 + }, + { + "epoch": 20.11, + "learning_rate": 3.99475285246454e-05, + "loss": 2.3069, + "step": 4059500 + }, + { + "epoch": 20.11, + "learning_rate": 3.9946289938219314e-05, + "loss": 2.3125, + "step": 4060000 + }, + { + "epoch": 20.12, + "learning_rate": 3.994505135179323e-05, + "loss": 2.3109, + "step": 4060500 + }, + { + "epoch": 20.12, + "learning_rate": 3.994381276536714e-05, + "loss": 2.3453, + "step": 4061000 + }, + { + "epoch": 20.12, + "learning_rate": 3.994257417894106e-05, + "loss": 2.339, + "step": 4061500 + }, + { + "epoch": 20.12, + "learning_rate": 3.9941335592514975e-05, + "loss": 2.3199, + "step": 4062000 + }, + { + "epoch": 20.13, + "learning_rate": 3.994009700608889e-05, + "loss": 2.3294, + "step": 4062500 + }, + { + "epoch": 20.13, + "learning_rate": 3.993885841966281e-05, + "loss": 2.3076, + "step": 4063000 + }, + { + "epoch": 20.13, + "learning_rate": 3.9937619833236726e-05, + "loss": 2.3158, + "step": 4063500 + }, + { + "epoch": 20.13, + "learning_rate": 3.993638124681064e-05, + "loss": 2.3061, + "step": 4064000 + }, + { + "epoch": 20.14, + "learning_rate": 3.993514266038455e-05, + "loss": 2.343, + "step": 4064500 + }, + { + "epoch": 20.14, + "learning_rate": 3.993390655113133e-05, + "loss": 2.3145, + "step": 4065000 + }, + { + "epoch": 20.14, + "learning_rate": 3.9932667964705246e-05, + "loss": 2.2968, + "step": 4065500 + }, + { + "epoch": 20.14, + "learning_rate": 3.993142937827916e-05, + "loss": 2.3266, + "step": 4066000 + }, + { + "epoch": 20.15, + "learning_rate": 3.993019079185308e-05, + "loss": 2.3402, + "step": 4066500 + }, + { + "epoch": 20.15, + "learning_rate": 3.9928952205426996e-05, + "loss": 2.3157, + "step": 4067000 + }, + { + "epoch": 20.15, + "learning_rate": 3.992771857334661e-05, + "loss": 2.3301, + "step": 4067500 + }, + { + "epoch": 20.15, + "learning_rate": 3.992647998692053e-05, + "loss": 2.3513, + "step": 4068000 + }, + { + "epoch": 20.16, + "learning_rate": 3.9925241400494444e-05, + "loss": 2.3077, + "step": 4068500 + }, + { + "epoch": 20.16, + "learning_rate": 3.992400281406836e-05, + "loss": 2.3166, + "step": 4069000 + }, + { + "epoch": 20.16, + "learning_rate": 3.992276422764228e-05, + "loss": 2.3251, + "step": 4069500 + }, + { + "epoch": 20.16, + "learning_rate": 3.992152811838905e-05, + "loss": 2.3054, + "step": 4070000 + }, + { + "epoch": 20.17, + "learning_rate": 3.9920289531962964e-05, + "loss": 2.322, + "step": 4070500 + }, + { + "epoch": 20.17, + "learning_rate": 3.9919053422709726e-05, + "loss": 2.3153, + "step": 4071000 + }, + { + "epoch": 20.17, + "learning_rate": 3.991781483628364e-05, + "loss": 2.2919, + "step": 4071500 + }, + { + "epoch": 20.17, + "learning_rate": 3.991657624985756e-05, + "loss": 2.3338, + "step": 4072000 + }, + { + "epoch": 20.18, + "learning_rate": 3.9915337663431476e-05, + "loss": 2.3325, + "step": 4072500 + }, + { + "epoch": 20.18, + "learning_rate": 3.9914099077005393e-05, + "loss": 2.3226, + "step": 4073000 + }, + { + "epoch": 20.18, + "learning_rate": 3.991286049057931e-05, + "loss": 2.3341, + "step": 4073500 + }, + { + "epoch": 20.18, + "learning_rate": 3.991162190415323e-05, + "loss": 2.2985, + "step": 4074000 + }, + { + "epoch": 20.19, + "learning_rate": 3.9910383317727144e-05, + "loss": 2.3138, + "step": 4074500 + }, + { + "epoch": 20.19, + "learning_rate": 3.990914473130106e-05, + "loss": 2.3167, + "step": 4075000 + }, + { + "epoch": 20.19, + "learning_rate": 3.990790614487498e-05, + "loss": 2.3082, + "step": 4075500 + }, + { + "epoch": 20.19, + "learning_rate": 3.9906667558448895e-05, + "loss": 2.3136, + "step": 4076000 + }, + { + "epoch": 20.2, + "learning_rate": 3.990542897202281e-05, + "loss": 2.3414, + "step": 4076500 + }, + { + "epoch": 20.2, + "learning_rate": 3.990419038559673e-05, + "loss": 2.298, + "step": 4077000 + }, + { + "epoch": 20.2, + "learning_rate": 3.99029542763435e-05, + "loss": 2.317, + "step": 4077500 + }, + { + "epoch": 20.2, + "learning_rate": 3.990171816709026e-05, + "loss": 2.3172, + "step": 4078000 + }, + { + "epoch": 20.21, + "learning_rate": 3.9900479580664177e-05, + "loss": 2.3035, + "step": 4078500 + }, + { + "epoch": 20.21, + "learning_rate": 3.9899240994238094e-05, + "loss": 2.3391, + "step": 4079000 + }, + { + "epoch": 20.21, + "learning_rate": 3.989800240781201e-05, + "loss": 2.3139, + "step": 4079500 + }, + { + "epoch": 20.21, + "learning_rate": 3.989676382138593e-05, + "loss": 2.297, + "step": 4080000 + }, + { + "epoch": 20.22, + "learning_rate": 3.9895527712132696e-05, + "loss": 2.326, + "step": 4080500 + }, + { + "epoch": 20.22, + "learning_rate": 3.989428912570661e-05, + "loss": 2.3263, + "step": 4081000 + }, + { + "epoch": 20.22, + "learning_rate": 3.989305053928053e-05, + "loss": 2.3248, + "step": 4081500 + }, + { + "epoch": 20.22, + "learning_rate": 3.98918144300273e-05, + "loss": 2.3094, + "step": 4082000 + }, + { + "epoch": 20.23, + "learning_rate": 3.9890575843601216e-05, + "loss": 2.3216, + "step": 4082500 + }, + { + "epoch": 20.23, + "learning_rate": 3.988933725717513e-05, + "loss": 2.3138, + "step": 4083000 + }, + { + "epoch": 20.23, + "learning_rate": 3.988809867074905e-05, + "loss": 2.3215, + "step": 4083500 + }, + { + "epoch": 20.23, + "learning_rate": 3.9886860084322967e-05, + "loss": 2.3122, + "step": 4084000 + }, + { + "epoch": 20.24, + "learning_rate": 3.988562149789688e-05, + "loss": 2.3316, + "step": 4084500 + }, + { + "epoch": 20.24, + "learning_rate": 3.988438538864365e-05, + "loss": 2.324, + "step": 4085000 + }, + { + "epoch": 20.24, + "learning_rate": 3.988314680221757e-05, + "loss": 2.3268, + "step": 4085500 + }, + { + "epoch": 20.24, + "learning_rate": 3.9881908215791486e-05, + "loss": 2.2985, + "step": 4086000 + }, + { + "epoch": 20.25, + "learning_rate": 3.98806696293654e-05, + "loss": 2.3345, + "step": 4086500 + }, + { + "epoch": 20.25, + "learning_rate": 3.987943104293931e-05, + "loss": 2.3098, + "step": 4087000 + }, + { + "epoch": 20.25, + "learning_rate": 3.987819245651323e-05, + "loss": 2.3195, + "step": 4087500 + }, + { + "epoch": 20.25, + "learning_rate": 3.987695387008715e-05, + "loss": 2.373, + "step": 4088000 + }, + { + "epoch": 20.26, + "learning_rate": 3.9875715283661064e-05, + "loss": 2.3466, + "step": 4088500 + }, + { + "epoch": 20.26, + "learning_rate": 3.987447669723498e-05, + "loss": 2.3485, + "step": 4089000 + }, + { + "epoch": 20.26, + "learning_rate": 3.98732381108089e-05, + "loss": 2.3326, + "step": 4089500 + }, + { + "epoch": 20.26, + "learning_rate": 3.9871999524382815e-05, + "loss": 2.2901, + "step": 4090000 + }, + { + "epoch": 20.27, + "learning_rate": 3.987076093795673e-05, + "loss": 2.3226, + "step": 4090500 + }, + { + "epoch": 20.27, + "learning_rate": 3.98695248287035e-05, + "loss": 2.3075, + "step": 4091000 + }, + { + "epoch": 20.27, + "learning_rate": 3.986828624227741e-05, + "loss": 2.2915, + "step": 4091500 + }, + { + "epoch": 20.27, + "learning_rate": 3.986704765585133e-05, + "loss": 2.3465, + "step": 4092000 + }, + { + "epoch": 20.28, + "learning_rate": 3.9865809069425245e-05, + "loss": 2.3153, + "step": 4092500 + }, + { + "epoch": 20.28, + "learning_rate": 3.986457048299916e-05, + "loss": 2.3102, + "step": 4093000 + }, + { + "epoch": 20.28, + "learning_rate": 3.986333189657308e-05, + "loss": 2.3314, + "step": 4093500 + }, + { + "epoch": 20.28, + "learning_rate": 3.9862093310146995e-05, + "loss": 2.3403, + "step": 4094000 + }, + { + "epoch": 20.29, + "learning_rate": 3.9860857200893764e-05, + "loss": 2.3266, + "step": 4094500 + }, + { + "epoch": 20.29, + "learning_rate": 3.985962109164054e-05, + "loss": 2.3155, + "step": 4095000 + }, + { + "epoch": 20.29, + "learning_rate": 3.985838250521445e-05, + "loss": 2.3508, + "step": 4095500 + }, + { + "epoch": 20.29, + "learning_rate": 3.985714391878837e-05, + "loss": 2.32, + "step": 4096000 + }, + { + "epoch": 20.3, + "learning_rate": 3.9855905332362284e-05, + "loss": 2.328, + "step": 4096500 + }, + { + "epoch": 20.3, + "learning_rate": 3.98546667459362e-05, + "loss": 2.3088, + "step": 4097000 + }, + { + "epoch": 20.3, + "learning_rate": 3.985343063668297e-05, + "loss": 2.3212, + "step": 4097500 + }, + { + "epoch": 20.3, + "learning_rate": 3.9852192050256886e-05, + "loss": 2.3098, + "step": 4098000 + }, + { + "epoch": 20.31, + "learning_rate": 3.98509534638308e-05, + "loss": 2.32, + "step": 4098500 + }, + { + "epoch": 20.31, + "learning_rate": 3.984971487740472e-05, + "loss": 2.3258, + "step": 4099000 + }, + { + "epoch": 20.31, + "learning_rate": 3.984847629097863e-05, + "loss": 2.3186, + "step": 4099500 + }, + { + "epoch": 20.31, + "learning_rate": 3.984723770455255e-05, + "loss": 2.3401, + "step": 4100000 + }, + { + "epoch": 20.32, + "learning_rate": 3.984600159529932e-05, + "loss": 2.3358, + "step": 4100500 + }, + { + "epoch": 20.32, + "learning_rate": 3.984476300887324e-05, + "loss": 2.323, + "step": 4101000 + }, + { + "epoch": 20.32, + "learning_rate": 3.984352442244716e-05, + "loss": 2.3239, + "step": 4101500 + }, + { + "epoch": 20.32, + "learning_rate": 3.9842285836021074e-05, + "loss": 2.2897, + "step": 4102000 + }, + { + "epoch": 20.33, + "learning_rate": 3.9841049726767836e-05, + "loss": 2.3372, + "step": 4102500 + }, + { + "epoch": 20.33, + "learning_rate": 3.983981114034175e-05, + "loss": 2.3183, + "step": 4103000 + }, + { + "epoch": 20.33, + "learning_rate": 3.983857255391567e-05, + "loss": 2.3244, + "step": 4103500 + }, + { + "epoch": 20.33, + "learning_rate": 3.9837333967489587e-05, + "loss": 2.341, + "step": 4104000 + }, + { + "epoch": 20.34, + "learning_rate": 3.9836095381063503e-05, + "loss": 2.3251, + "step": 4104500 + }, + { + "epoch": 20.34, + "learning_rate": 3.983485679463742e-05, + "loss": 2.3282, + "step": 4105000 + }, + { + "epoch": 20.34, + "learning_rate": 3.983361820821133e-05, + "loss": 2.3602, + "step": 4105500 + }, + { + "epoch": 20.34, + "learning_rate": 3.983237962178525e-05, + "loss": 2.3238, + "step": 4106000 + }, + { + "epoch": 20.35, + "learning_rate": 3.9831141035359164e-05, + "loss": 2.3484, + "step": 4106500 + }, + { + "epoch": 20.35, + "learning_rate": 3.982990244893308e-05, + "loss": 2.3466, + "step": 4107000 + }, + { + "epoch": 20.35, + "learning_rate": 3.9828663862507e-05, + "loss": 2.3151, + "step": 4107500 + }, + { + "epoch": 20.35, + "learning_rate": 3.9827427753253774e-05, + "loss": 2.3078, + "step": 4108000 + }, + { + "epoch": 20.35, + "learning_rate": 3.9826189166827684e-05, + "loss": 2.3361, + "step": 4108500 + }, + { + "epoch": 20.36, + "learning_rate": 3.98249505804016e-05, + "loss": 2.3302, + "step": 4109000 + }, + { + "epoch": 20.36, + "learning_rate": 3.982371199397552e-05, + "loss": 2.3368, + "step": 4109500 + }, + { + "epoch": 20.36, + "learning_rate": 3.9822473407549435e-05, + "loss": 2.3268, + "step": 4110000 + }, + { + "epoch": 20.36, + "learning_rate": 3.982123482112335e-05, + "loss": 2.337, + "step": 4110500 + }, + { + "epoch": 20.37, + "learning_rate": 3.981999623469727e-05, + "loss": 2.3263, + "step": 4111000 + }, + { + "epoch": 20.37, + "learning_rate": 3.981875764827118e-05, + "loss": 2.3309, + "step": 4111500 + }, + { + "epoch": 20.37, + "learning_rate": 3.9817519061845096e-05, + "loss": 2.3406, + "step": 4112000 + }, + { + "epoch": 20.37, + "learning_rate": 3.981628047541901e-05, + "loss": 2.304, + "step": 4112500 + }, + { + "epoch": 20.38, + "learning_rate": 3.981504188899293e-05, + "loss": 2.3435, + "step": 4113000 + }, + { + "epoch": 20.38, + "learning_rate": 3.9813803302566847e-05, + "loss": 2.3374, + "step": 4113500 + }, + { + "epoch": 20.38, + "learning_rate": 3.9812567193313615e-05, + "loss": 2.334, + "step": 4114000 + }, + { + "epoch": 20.38, + "learning_rate": 3.981132860688753e-05, + "loss": 2.3147, + "step": 4114500 + }, + { + "epoch": 20.39, + "learning_rate": 3.981009002046145e-05, + "loss": 2.33, + "step": 4115000 + }, + { + "epoch": 20.39, + "learning_rate": 3.9808851434035366e-05, + "loss": 2.3088, + "step": 4115500 + }, + { + "epoch": 20.39, + "learning_rate": 3.980761284760928e-05, + "loss": 2.3281, + "step": 4116000 + }, + { + "epoch": 20.39, + "learning_rate": 3.98063742611832e-05, + "loss": 2.3257, + "step": 4116500 + }, + { + "epoch": 20.4, + "learning_rate": 3.980513815192997e-05, + "loss": 2.2962, + "step": 4117000 + }, + { + "epoch": 20.4, + "learning_rate": 3.9803899565503886e-05, + "loss": 2.3362, + "step": 4117500 + }, + { + "epoch": 20.4, + "learning_rate": 3.98026609790778e-05, + "loss": 2.3423, + "step": 4118000 + }, + { + "epoch": 20.4, + "learning_rate": 3.980142239265171e-05, + "loss": 2.2976, + "step": 4118500 + }, + { + "epoch": 20.41, + "learning_rate": 3.980018380622563e-05, + "loss": 2.3139, + "step": 4119000 + }, + { + "epoch": 20.41, + "learning_rate": 3.979894521979955e-05, + "loss": 2.3334, + "step": 4119500 + }, + { + "epoch": 20.41, + "learning_rate": 3.9797709110546315e-05, + "loss": 2.3412, + "step": 4120000 + }, + { + "epoch": 20.41, + "learning_rate": 3.979647052412023e-05, + "loss": 2.3465, + "step": 4120500 + }, + { + "epoch": 20.42, + "learning_rate": 3.979523193769415e-05, + "loss": 2.3413, + "step": 4121000 + }, + { + "epoch": 20.42, + "learning_rate": 3.9793993351268066e-05, + "loss": 2.3351, + "step": 4121500 + }, + { + "epoch": 20.42, + "learning_rate": 3.979275476484198e-05, + "loss": 2.3251, + "step": 4122000 + }, + { + "epoch": 20.42, + "learning_rate": 3.979151865558875e-05, + "loss": 2.3504, + "step": 4122500 + }, + { + "epoch": 20.43, + "learning_rate": 3.979028006916267e-05, + "loss": 2.3297, + "step": 4123000 + }, + { + "epoch": 20.43, + "learning_rate": 3.9789041482736586e-05, + "loss": 2.3087, + "step": 4123500 + }, + { + "epoch": 20.43, + "learning_rate": 3.97878028963105e-05, + "loss": 2.3201, + "step": 4124000 + }, + { + "epoch": 20.43, + "learning_rate": 3.978656430988442e-05, + "loss": 2.2992, + "step": 4124500 + }, + { + "epoch": 20.44, + "learning_rate": 3.978532572345833e-05, + "loss": 2.3279, + "step": 4125000 + }, + { + "epoch": 20.44, + "learning_rate": 3.97840896142051e-05, + "loss": 2.3199, + "step": 4125500 + }, + { + "epoch": 20.44, + "learning_rate": 3.9782851027779016e-05, + "loss": 2.3297, + "step": 4126000 + }, + { + "epoch": 20.44, + "learning_rate": 3.978161244135293e-05, + "loss": 2.3423, + "step": 4126500 + }, + { + "epoch": 20.45, + "learning_rate": 3.978037385492685e-05, + "loss": 2.3141, + "step": 4127000 + }, + { + "epoch": 20.45, + "learning_rate": 3.9779135268500766e-05, + "loss": 2.336, + "step": 4127500 + }, + { + "epoch": 20.45, + "learning_rate": 3.977789668207468e-05, + "loss": 2.3278, + "step": 4128000 + }, + { + "epoch": 20.45, + "learning_rate": 3.97766580956486e-05, + "loss": 2.3016, + "step": 4128500 + }, + { + "epoch": 20.46, + "learning_rate": 3.977541950922252e-05, + "loss": 2.3267, + "step": 4129000 + }, + { + "epoch": 20.46, + "learning_rate": 3.9774183399969286e-05, + "loss": 2.3293, + "step": 4129500 + }, + { + "epoch": 20.46, + "learning_rate": 3.97729448135432e-05, + "loss": 2.3389, + "step": 4130000 + }, + { + "epoch": 20.46, + "learning_rate": 3.977170622711712e-05, + "loss": 2.3131, + "step": 4130500 + }, + { + "epoch": 20.47, + "learning_rate": 3.977046764069104e-05, + "loss": 2.3192, + "step": 4131000 + }, + { + "epoch": 20.47, + "learning_rate": 3.9769229054264954e-05, + "loss": 2.3528, + "step": 4131500 + }, + { + "epoch": 20.47, + "learning_rate": 3.9767992945011716e-05, + "loss": 2.3322, + "step": 4132000 + }, + { + "epoch": 20.47, + "learning_rate": 3.976675435858563e-05, + "loss": 2.3396, + "step": 4132500 + }, + { + "epoch": 20.48, + "learning_rate": 3.976551824933241e-05, + "loss": 2.3243, + "step": 4133000 + }, + { + "epoch": 20.48, + "learning_rate": 3.976427966290632e-05, + "loss": 2.3496, + "step": 4133500 + }, + { + "epoch": 20.48, + "learning_rate": 3.9763041076480235e-05, + "loss": 2.3338, + "step": 4134000 + }, + { + "epoch": 20.48, + "learning_rate": 3.976180249005415e-05, + "loss": 2.2961, + "step": 4134500 + }, + { + "epoch": 20.49, + "learning_rate": 3.976056390362807e-05, + "loss": 2.3068, + "step": 4135000 + }, + { + "epoch": 20.49, + "learning_rate": 3.9759325317201986e-05, + "loss": 2.358, + "step": 4135500 + }, + { + "epoch": 20.49, + "learning_rate": 3.97580867307759e-05, + "loss": 2.2942, + "step": 4136000 + }, + { + "epoch": 20.49, + "learning_rate": 3.975684814434982e-05, + "loss": 2.3191, + "step": 4136500 + }, + { + "epoch": 20.5, + "learning_rate": 3.975561203509658e-05, + "loss": 2.3382, + "step": 4137000 + }, + { + "epoch": 20.5, + "learning_rate": 3.97543734486705e-05, + "loss": 2.3275, + "step": 4137500 + }, + { + "epoch": 20.5, + "learning_rate": 3.9753134862244416e-05, + "loss": 2.352, + "step": 4138000 + }, + { + "epoch": 20.5, + "learning_rate": 3.975189627581833e-05, + "loss": 2.3277, + "step": 4138500 + }, + { + "epoch": 20.51, + "learning_rate": 3.975066016656511e-05, + "loss": 2.3214, + "step": 4139000 + }, + { + "epoch": 20.51, + "learning_rate": 3.974942405731187e-05, + "loss": 2.3368, + "step": 4139500 + }, + { + "epoch": 20.51, + "learning_rate": 3.974818547088579e-05, + "loss": 2.3367, + "step": 4140000 + }, + { + "epoch": 20.51, + "learning_rate": 3.9746946884459704e-05, + "loss": 2.3376, + "step": 4140500 + }, + { + "epoch": 20.52, + "learning_rate": 3.974570829803362e-05, + "loss": 2.3505, + "step": 4141000 + }, + { + "epoch": 20.52, + "learning_rate": 3.974447218878039e-05, + "loss": 2.3357, + "step": 4141500 + }, + { + "epoch": 20.52, + "learning_rate": 3.974323607952716e-05, + "loss": 2.326, + "step": 4142000 + }, + { + "epoch": 20.52, + "learning_rate": 3.9741997493101076e-05, + "loss": 2.3177, + "step": 4142500 + }, + { + "epoch": 20.53, + "learning_rate": 3.974075890667499e-05, + "loss": 2.3331, + "step": 4143000 + }, + { + "epoch": 20.53, + "learning_rate": 3.973952032024891e-05, + "loss": 2.3333, + "step": 4143500 + }, + { + "epoch": 20.53, + "learning_rate": 3.9738281733822826e-05, + "loss": 2.3378, + "step": 4144000 + }, + { + "epoch": 20.53, + "learning_rate": 3.9737043147396743e-05, + "loss": 2.3116, + "step": 4144500 + }, + { + "epoch": 20.54, + "learning_rate": 3.973580456097066e-05, + "loss": 2.3071, + "step": 4145000 + }, + { + "epoch": 20.54, + "learning_rate": 3.973456597454458e-05, + "loss": 2.302, + "step": 4145500 + }, + { + "epoch": 20.54, + "learning_rate": 3.9733327388118494e-05, + "loss": 2.3401, + "step": 4146000 + }, + { + "epoch": 20.54, + "learning_rate": 3.9732088801692404e-05, + "loss": 2.3107, + "step": 4146500 + }, + { + "epoch": 20.55, + "learning_rate": 3.973085021526632e-05, + "loss": 2.3115, + "step": 4147000 + }, + { + "epoch": 20.55, + "learning_rate": 3.972961162884024e-05, + "loss": 2.3198, + "step": 4147500 + }, + { + "epoch": 20.55, + "learning_rate": 3.9728373042414155e-05, + "loss": 2.3458, + "step": 4148000 + }, + { + "epoch": 20.55, + "learning_rate": 3.972713445598807e-05, + "loss": 2.3251, + "step": 4148500 + }, + { + "epoch": 20.56, + "learning_rate": 3.972589586956199e-05, + "loss": 2.3449, + "step": 4149000 + }, + { + "epoch": 20.56, + "learning_rate": 3.97246572831359e-05, + "loss": 2.3381, + "step": 4149500 + }, + { + "epoch": 20.56, + "learning_rate": 3.9723421173882675e-05, + "loss": 2.3345, + "step": 4150000 + }, + { + "epoch": 20.56, + "learning_rate": 3.972218258745659e-05, + "loss": 2.2837, + "step": 4150500 + }, + { + "epoch": 20.57, + "learning_rate": 3.972094400103051e-05, + "loss": 2.3166, + "step": 4151000 + }, + { + "epoch": 20.57, + "learning_rate": 3.9719705414604426e-05, + "loss": 2.3352, + "step": 4151500 + }, + { + "epoch": 20.57, + "learning_rate": 3.971846682817834e-05, + "loss": 2.3251, + "step": 4152000 + }, + { + "epoch": 20.57, + "learning_rate": 3.971723071892511e-05, + "loss": 2.3379, + "step": 4152500 + }, + { + "epoch": 20.58, + "learning_rate": 3.971599213249902e-05, + "loss": 2.3292, + "step": 4153000 + }, + { + "epoch": 20.58, + "learning_rate": 3.971475354607294e-05, + "loss": 2.3387, + "step": 4153500 + }, + { + "epoch": 20.58, + "learning_rate": 3.9713514959646855e-05, + "loss": 2.3423, + "step": 4154000 + }, + { + "epoch": 20.58, + "learning_rate": 3.971227637322077e-05, + "loss": 2.3238, + "step": 4154500 + }, + { + "epoch": 20.59, + "learning_rate": 3.971104026396754e-05, + "loss": 2.3356, + "step": 4155000 + }, + { + "epoch": 20.59, + "learning_rate": 3.970980167754146e-05, + "loss": 2.3428, + "step": 4155500 + }, + { + "epoch": 20.59, + "learning_rate": 3.9708563091115375e-05, + "loss": 2.3381, + "step": 4156000 + }, + { + "epoch": 20.59, + "learning_rate": 3.970732450468929e-05, + "loss": 2.335, + "step": 4156500 + }, + { + "epoch": 20.6, + "learning_rate": 3.970608591826321e-05, + "loss": 2.3374, + "step": 4157000 + }, + { + "epoch": 20.6, + "learning_rate": 3.9704847331837126e-05, + "loss": 2.3173, + "step": 4157500 + }, + { + "epoch": 20.6, + "learning_rate": 3.9703611222583894e-05, + "loss": 2.3357, + "step": 4158000 + }, + { + "epoch": 20.6, + "learning_rate": 3.970237263615781e-05, + "loss": 2.3303, + "step": 4158500 + }, + { + "epoch": 20.61, + "learning_rate": 3.9701136526904573e-05, + "loss": 2.3437, + "step": 4159000 + }, + { + "epoch": 20.61, + "learning_rate": 3.969989794047849e-05, + "loss": 2.2999, + "step": 4159500 + }, + { + "epoch": 20.61, + "learning_rate": 3.969865935405241e-05, + "loss": 2.3308, + "step": 4160000 + }, + { + "epoch": 20.61, + "learning_rate": 3.9697420767626324e-05, + "loss": 2.3444, + "step": 4160500 + }, + { + "epoch": 20.62, + "learning_rate": 3.969618218120024e-05, + "loss": 2.3279, + "step": 4161000 + }, + { + "epoch": 20.62, + "learning_rate": 3.969494359477416e-05, + "loss": 2.3423, + "step": 4161500 + }, + { + "epoch": 20.62, + "learning_rate": 3.9693705008348075e-05, + "loss": 2.352, + "step": 4162000 + }, + { + "epoch": 20.62, + "learning_rate": 3.969246642192199e-05, + "loss": 2.3336, + "step": 4162500 + }, + { + "epoch": 20.62, + "learning_rate": 3.969122783549591e-05, + "loss": 2.3079, + "step": 4163000 + }, + { + "epoch": 20.63, + "learning_rate": 3.9689989249069826e-05, + "loss": 2.3315, + "step": 4163500 + }, + { + "epoch": 20.63, + "learning_rate": 3.9688753139816595e-05, + "loss": 2.3208, + "step": 4164000 + }, + { + "epoch": 20.63, + "learning_rate": 3.968751455339051e-05, + "loss": 2.3149, + "step": 4164500 + }, + { + "epoch": 20.63, + "learning_rate": 3.968627596696443e-05, + "loss": 2.3026, + "step": 4165000 + }, + { + "epoch": 20.64, + "learning_rate": 3.9685037380538345e-05, + "loss": 2.3623, + "step": 4165500 + }, + { + "epoch": 20.64, + "learning_rate": 3.968379879411226e-05, + "loss": 2.3316, + "step": 4166000 + }, + { + "epoch": 20.64, + "learning_rate": 3.968256020768617e-05, + "loss": 2.3202, + "step": 4166500 + }, + { + "epoch": 20.64, + "learning_rate": 3.968132162126009e-05, + "loss": 2.3328, + "step": 4167000 + }, + { + "epoch": 20.65, + "learning_rate": 3.9680083034834006e-05, + "loss": 2.3363, + "step": 4167500 + }, + { + "epoch": 20.65, + "learning_rate": 3.9678846925580775e-05, + "loss": 2.339, + "step": 4168000 + }, + { + "epoch": 20.65, + "learning_rate": 3.967760833915469e-05, + "loss": 2.3511, + "step": 4168500 + }, + { + "epoch": 20.65, + "learning_rate": 3.967636975272861e-05, + "loss": 2.3296, + "step": 4169000 + }, + { + "epoch": 20.66, + "learning_rate": 3.967513364347538e-05, + "loss": 2.3214, + "step": 4169500 + }, + { + "epoch": 20.66, + "learning_rate": 3.9673895057049295e-05, + "loss": 2.3289, + "step": 4170000 + }, + { + "epoch": 20.66, + "learning_rate": 3.967265647062321e-05, + "loss": 2.333, + "step": 4170500 + }, + { + "epoch": 20.66, + "learning_rate": 3.967141788419713e-05, + "loss": 2.3358, + "step": 4171000 + }, + { + "epoch": 20.67, + "learning_rate": 3.9670179297771045e-05, + "loss": 2.3285, + "step": 4171500 + }, + { + "epoch": 20.67, + "learning_rate": 3.966894318851781e-05, + "loss": 2.3058, + "step": 4172000 + }, + { + "epoch": 20.67, + "learning_rate": 3.9667704602091724e-05, + "loss": 2.3333, + "step": 4172500 + }, + { + "epoch": 20.67, + "learning_rate": 3.966646601566564e-05, + "loss": 2.3302, + "step": 4173000 + }, + { + "epoch": 20.68, + "learning_rate": 3.966522742923956e-05, + "loss": 2.3255, + "step": 4173500 + }, + { + "epoch": 20.68, + "learning_rate": 3.9663988842813475e-05, + "loss": 2.3237, + "step": 4174000 + }, + { + "epoch": 20.68, + "learning_rate": 3.966275025638739e-05, + "loss": 2.3398, + "step": 4174500 + }, + { + "epoch": 20.68, + "learning_rate": 3.966151166996131e-05, + "loss": 2.3193, + "step": 4175000 + }, + { + "epoch": 20.69, + "learning_rate": 3.9660273083535226e-05, + "loss": 2.3686, + "step": 4175500 + }, + { + "epoch": 20.69, + "learning_rate": 3.9659036974281995e-05, + "loss": 2.3719, + "step": 4176000 + }, + { + "epoch": 20.69, + "learning_rate": 3.965779838785591e-05, + "loss": 2.3404, + "step": 4176500 + }, + { + "epoch": 20.69, + "learning_rate": 3.965655980142983e-05, + "loss": 2.333, + "step": 4177000 + }, + { + "epoch": 20.7, + "learning_rate": 3.965532616934945e-05, + "loss": 2.339, + "step": 4177500 + }, + { + "epoch": 20.7, + "learning_rate": 3.965408758292336e-05, + "loss": 2.3373, + "step": 4178000 + }, + { + "epoch": 20.7, + "learning_rate": 3.9652851473670135e-05, + "loss": 2.3554, + "step": 4178500 + }, + { + "epoch": 20.7, + "learning_rate": 3.965161288724405e-05, + "loss": 2.3353, + "step": 4179000 + }, + { + "epoch": 20.71, + "learning_rate": 3.965037430081797e-05, + "loss": 2.3286, + "step": 4179500 + }, + { + "epoch": 20.71, + "learning_rate": 3.9649135714391886e-05, + "loss": 2.3024, + "step": 4180000 + }, + { + "epoch": 20.71, + "learning_rate": 3.964789960513865e-05, + "loss": 2.3233, + "step": 4180500 + }, + { + "epoch": 20.71, + "learning_rate": 3.9646661018712565e-05, + "loss": 2.328, + "step": 4181000 + }, + { + "epoch": 20.72, + "learning_rate": 3.964542243228648e-05, + "loss": 2.301, + "step": 4181500 + }, + { + "epoch": 20.72, + "learning_rate": 3.96441838458604e-05, + "loss": 2.3744, + "step": 4182000 + }, + { + "epoch": 20.72, + "learning_rate": 3.9642945259434316e-05, + "loss": 2.3801, + "step": 4182500 + }, + { + "epoch": 20.72, + "learning_rate": 3.964170667300823e-05, + "loss": 2.3168, + "step": 4183000 + }, + { + "epoch": 20.73, + "learning_rate": 3.964046808658215e-05, + "loss": 2.3182, + "step": 4183500 + }, + { + "epoch": 20.73, + "learning_rate": 3.9639229500156066e-05, + "loss": 2.3429, + "step": 4184000 + }, + { + "epoch": 20.73, + "learning_rate": 3.9637990913729977e-05, + "loss": 2.3526, + "step": 4184500 + }, + { + "epoch": 20.73, + "learning_rate": 3.9636752327303893e-05, + "loss": 2.3447, + "step": 4185000 + }, + { + "epoch": 20.74, + "learning_rate": 3.963551374087781e-05, + "loss": 2.3361, + "step": 4185500 + }, + { + "epoch": 20.74, + "learning_rate": 3.963427515445173e-05, + "loss": 2.3486, + "step": 4186000 + }, + { + "epoch": 20.74, + "learning_rate": 3.9633036568025644e-05, + "loss": 2.3517, + "step": 4186500 + }, + { + "epoch": 20.74, + "learning_rate": 3.963179798159956e-05, + "loss": 2.3206, + "step": 4187000 + }, + { + "epoch": 20.75, + "learning_rate": 3.963055939517348e-05, + "loss": 2.3178, + "step": 4187500 + }, + { + "epoch": 20.75, + "learning_rate": 3.9629320808747395e-05, + "loss": 2.3548, + "step": 4188000 + }, + { + "epoch": 20.75, + "learning_rate": 3.962808222232131e-05, + "loss": 2.3043, + "step": 4188500 + }, + { + "epoch": 20.75, + "learning_rate": 3.962684859024093e-05, + "loss": 2.3362, + "step": 4189000 + }, + { + "epoch": 20.76, + "learning_rate": 3.962561000381485e-05, + "loss": 2.3417, + "step": 4189500 + }, + { + "epoch": 20.76, + "learning_rate": 3.9624371417388767e-05, + "loss": 2.3329, + "step": 4190000 + }, + { + "epoch": 20.76, + "learning_rate": 3.962313283096268e-05, + "loss": 2.3078, + "step": 4190500 + }, + { + "epoch": 20.76, + "learning_rate": 3.9621894244536594e-05, + "loss": 2.3241, + "step": 4191000 + }, + { + "epoch": 20.77, + "learning_rate": 3.962065565811051e-05, + "loss": 2.3591, + "step": 4191500 + }, + { + "epoch": 20.77, + "learning_rate": 3.961941707168443e-05, + "loss": 2.3285, + "step": 4192000 + }, + { + "epoch": 20.77, + "learning_rate": 3.9618178485258344e-05, + "loss": 2.3298, + "step": 4192500 + }, + { + "epoch": 20.77, + "learning_rate": 3.961694237600512e-05, + "loss": 2.3402, + "step": 4193000 + }, + { + "epoch": 20.78, + "learning_rate": 3.961570378957903e-05, + "loss": 2.3157, + "step": 4193500 + }, + { + "epoch": 20.78, + "learning_rate": 3.961446520315295e-05, + "loss": 2.3364, + "step": 4194000 + }, + { + "epoch": 20.78, + "learning_rate": 3.9613226616726864e-05, + "loss": 2.339, + "step": 4194500 + }, + { + "epoch": 20.78, + "learning_rate": 3.961198803030078e-05, + "loss": 2.3329, + "step": 4195000 + }, + { + "epoch": 20.79, + "learning_rate": 3.961075192104755e-05, + "loss": 2.3085, + "step": 4195500 + }, + { + "epoch": 20.79, + "learning_rate": 3.960951333462147e-05, + "loss": 2.3486, + "step": 4196000 + }, + { + "epoch": 20.79, + "learning_rate": 3.9608277225368235e-05, + "loss": 2.3269, + "step": 4196500 + }, + { + "epoch": 20.79, + "learning_rate": 3.960703863894215e-05, + "loss": 2.3502, + "step": 4197000 + }, + { + "epoch": 20.8, + "learning_rate": 3.960580005251607e-05, + "loss": 2.32, + "step": 4197500 + }, + { + "epoch": 20.8, + "learning_rate": 3.960456642043568e-05, + "loss": 2.3226, + "step": 4198000 + }, + { + "epoch": 20.8, + "learning_rate": 3.96033278340096e-05, + "loss": 2.3389, + "step": 4198500 + }, + { + "epoch": 20.8, + "learning_rate": 3.960208924758352e-05, + "loss": 2.321, + "step": 4199000 + }, + { + "epoch": 20.81, + "learning_rate": 3.9600850661157434e-05, + "loss": 2.3624, + "step": 4199500 + }, + { + "epoch": 20.81, + "learning_rate": 3.959961207473135e-05, + "loss": 2.3265, + "step": 4200000 + }, + { + "epoch": 20.81, + "learning_rate": 3.959837596547812e-05, + "loss": 2.3382, + "step": 4200500 + }, + { + "epoch": 20.81, + "learning_rate": 3.959713737905204e-05, + "loss": 2.3298, + "step": 4201000 + }, + { + "epoch": 20.82, + "learning_rate": 3.9595898792625954e-05, + "loss": 2.3306, + "step": 4201500 + }, + { + "epoch": 20.82, + "learning_rate": 3.959466020619987e-05, + "loss": 2.3273, + "step": 4202000 + }, + { + "epoch": 20.82, + "learning_rate": 3.959342409694664e-05, + "loss": 2.3362, + "step": 4202500 + }, + { + "epoch": 20.82, + "learning_rate": 3.9592185510520556e-05, + "loss": 2.3331, + "step": 4203000 + }, + { + "epoch": 20.83, + "learning_rate": 3.9590946924094466e-05, + "loss": 2.3285, + "step": 4203500 + }, + { + "epoch": 20.83, + "learning_rate": 3.958970833766838e-05, + "loss": 2.3242, + "step": 4204000 + }, + { + "epoch": 20.83, + "learning_rate": 3.95884697512423e-05, + "loss": 2.3425, + "step": 4204500 + }, + { + "epoch": 20.83, + "learning_rate": 3.958723116481622e-05, + "loss": 2.3424, + "step": 4205000 + }, + { + "epoch": 20.84, + "learning_rate": 3.9585992578390134e-05, + "loss": 2.3153, + "step": 4205500 + }, + { + "epoch": 20.84, + "learning_rate": 3.958475399196405e-05, + "loss": 2.3106, + "step": 4206000 + }, + { + "epoch": 20.84, + "learning_rate": 3.958351540553797e-05, + "loss": 2.3452, + "step": 4206500 + }, + { + "epoch": 20.84, + "learning_rate": 3.9582276819111885e-05, + "loss": 2.3583, + "step": 4207000 + }, + { + "epoch": 20.85, + "learning_rate": 3.95810382326858e-05, + "loss": 2.339, + "step": 4207500 + }, + { + "epoch": 20.85, + "learning_rate": 3.957979964625972e-05, + "loss": 2.3194, + "step": 4208000 + }, + { + "epoch": 20.85, + "learning_rate": 3.957856353700649e-05, + "loss": 2.3322, + "step": 4208500 + }, + { + "epoch": 20.85, + "learning_rate": 3.9577324950580405e-05, + "loss": 2.3334, + "step": 4209000 + }, + { + "epoch": 20.86, + "learning_rate": 3.957608636415432e-05, + "loss": 2.3398, + "step": 4209500 + }, + { + "epoch": 20.86, + "learning_rate": 3.957484777772824e-05, + "loss": 2.3255, + "step": 4210000 + }, + { + "epoch": 20.86, + "learning_rate": 3.9573611668475e-05, + "loss": 2.3186, + "step": 4210500 + }, + { + "epoch": 20.86, + "learning_rate": 3.957237308204892e-05, + "loss": 2.3099, + "step": 4211000 + }, + { + "epoch": 20.87, + "learning_rate": 3.9571134495622834e-05, + "loss": 2.3274, + "step": 4211500 + }, + { + "epoch": 20.87, + "learning_rate": 3.956989590919675e-05, + "loss": 2.3277, + "step": 4212000 + }, + { + "epoch": 20.87, + "learning_rate": 3.956865732277067e-05, + "loss": 2.3225, + "step": 4212500 + }, + { + "epoch": 20.87, + "learning_rate": 3.9567418736344585e-05, + "loss": 2.3221, + "step": 4213000 + }, + { + "epoch": 20.88, + "learning_rate": 3.95661801499185e-05, + "loss": 2.337, + "step": 4213500 + }, + { + "epoch": 20.88, + "learning_rate": 3.956494156349242e-05, + "loss": 2.3301, + "step": 4214000 + }, + { + "epoch": 20.88, + "learning_rate": 3.9563702977066336e-05, + "loss": 2.3094, + "step": 4214500 + }, + { + "epoch": 20.88, + "learning_rate": 3.956246439064025e-05, + "loss": 2.353, + "step": 4215000 + }, + { + "epoch": 20.89, + "learning_rate": 3.956122580421417e-05, + "loss": 2.3262, + "step": 4215500 + }, + { + "epoch": 20.89, + "learning_rate": 3.955998969496094e-05, + "loss": 2.3429, + "step": 4216000 + }, + { + "epoch": 20.89, + "learning_rate": 3.95587535857077e-05, + "loss": 2.3003, + "step": 4216500 + }, + { + "epoch": 20.89, + "learning_rate": 3.955751499928162e-05, + "loss": 2.3653, + "step": 4217000 + }, + { + "epoch": 20.89, + "learning_rate": 3.9556276412855534e-05, + "loss": 2.3238, + "step": 4217500 + }, + { + "epoch": 20.9, + "learning_rate": 3.955503782642945e-05, + "loss": 2.3313, + "step": 4218000 + }, + { + "epoch": 20.9, + "learning_rate": 3.955379924000337e-05, + "loss": 2.3488, + "step": 4218500 + }, + { + "epoch": 20.9, + "learning_rate": 3.9552560653577285e-05, + "loss": 2.3553, + "step": 4219000 + }, + { + "epoch": 20.9, + "learning_rate": 3.95513220671512e-05, + "loss": 2.3272, + "step": 4219500 + }, + { + "epoch": 20.91, + "learning_rate": 3.955008348072512e-05, + "loss": 2.3104, + "step": 4220000 + }, + { + "epoch": 20.91, + "learning_rate": 3.9548844894299036e-05, + "loss": 2.3523, + "step": 4220500 + }, + { + "epoch": 20.91, + "learning_rate": 3.954760630787295e-05, + "loss": 2.3315, + "step": 4221000 + }, + { + "epoch": 20.91, + "learning_rate": 3.954636772144687e-05, + "loss": 2.3421, + "step": 4221500 + }, + { + "epoch": 20.92, + "learning_rate": 3.954512913502079e-05, + "loss": 2.3485, + "step": 4222000 + }, + { + "epoch": 20.92, + "learning_rate": 3.9543893025767556e-05, + "loss": 2.3143, + "step": 4222500 + }, + { + "epoch": 20.92, + "learning_rate": 3.954265443934147e-05, + "loss": 2.3271, + "step": 4223000 + }, + { + "epoch": 20.92, + "learning_rate": 3.954141585291539e-05, + "loss": 2.3282, + "step": 4223500 + }, + { + "epoch": 20.93, + "learning_rate": 3.9540177266489306e-05, + "loss": 2.3492, + "step": 4224000 + }, + { + "epoch": 20.93, + "learning_rate": 3.953894115723607e-05, + "loss": 2.3315, + "step": 4224500 + }, + { + "epoch": 20.93, + "learning_rate": 3.9537702570809985e-05, + "loss": 2.3257, + "step": 4225000 + }, + { + "epoch": 20.93, + "learning_rate": 3.95364639843839e-05, + "loss": 2.351, + "step": 4225500 + }, + { + "epoch": 20.94, + "learning_rate": 3.953522539795782e-05, + "loss": 2.3266, + "step": 4226000 + }, + { + "epoch": 20.94, + "learning_rate": 3.9533986811531736e-05, + "loss": 2.3206, + "step": 4226500 + }, + { + "epoch": 20.94, + "learning_rate": 3.953274822510565e-05, + "loss": 2.343, + "step": 4227000 + }, + { + "epoch": 20.94, + "learning_rate": 3.953150963867957e-05, + "loss": 2.3361, + "step": 4227500 + }, + { + "epoch": 20.95, + "learning_rate": 3.953027105225349e-05, + "loss": 2.2889, + "step": 4228000 + }, + { + "epoch": 20.95, + "learning_rate": 3.95290374201731e-05, + "loss": 2.3352, + "step": 4228500 + }, + { + "epoch": 20.95, + "learning_rate": 3.952779883374702e-05, + "loss": 2.3433, + "step": 4229000 + }, + { + "epoch": 20.95, + "learning_rate": 3.9526560247320935e-05, + "loss": 2.326, + "step": 4229500 + }, + { + "epoch": 20.96, + "learning_rate": 3.952532413806771e-05, + "loss": 2.3601, + "step": 4230000 + }, + { + "epoch": 20.96, + "learning_rate": 3.952408555164163e-05, + "loss": 2.314, + "step": 4230500 + }, + { + "epoch": 20.96, + "learning_rate": 3.9522846965215544e-05, + "loss": 2.3232, + "step": 4231000 + }, + { + "epoch": 20.96, + "learning_rate": 3.952160837878946e-05, + "loss": 2.3366, + "step": 4231500 + }, + { + "epoch": 20.97, + "learning_rate": 3.952036979236337e-05, + "loss": 2.3331, + "step": 4232000 + }, + { + "epoch": 20.97, + "learning_rate": 3.951913120593729e-05, + "loss": 2.3387, + "step": 4232500 + }, + { + "epoch": 20.97, + "learning_rate": 3.951789509668406e-05, + "loss": 2.3342, + "step": 4233000 + }, + { + "epoch": 20.97, + "learning_rate": 3.9516656510257974e-05, + "loss": 2.3227, + "step": 4233500 + }, + { + "epoch": 20.98, + "learning_rate": 3.951541792383189e-05, + "loss": 2.3227, + "step": 4234000 + }, + { + "epoch": 20.98, + "learning_rate": 3.951417933740581e-05, + "loss": 2.3304, + "step": 4234500 + }, + { + "epoch": 20.98, + "learning_rate": 3.951294075097972e-05, + "loss": 2.3277, + "step": 4235000 + }, + { + "epoch": 20.98, + "learning_rate": 3.9511702164553635e-05, + "loss": 2.3255, + "step": 4235500 + }, + { + "epoch": 20.99, + "learning_rate": 3.951046357812755e-05, + "loss": 2.3444, + "step": 4236000 + }, + { + "epoch": 20.99, + "learning_rate": 3.950922499170147e-05, + "loss": 2.3233, + "step": 4236500 + }, + { + "epoch": 20.99, + "learning_rate": 3.9507986405275386e-05, + "loss": 2.3227, + "step": 4237000 + }, + { + "epoch": 20.99, + "learning_rate": 3.95067478188493e-05, + "loss": 2.3412, + "step": 4237500 + }, + { + "epoch": 21.0, + "learning_rate": 3.950550923242322e-05, + "loss": 2.336, + "step": 4238000 + }, + { + "epoch": 21.0, + "learning_rate": 3.9504270645997136e-05, + "loss": 2.3633, + "step": 4238500 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.6523621169343081, + "eval_accuracy_mlm": 0.6068608911574469, + "eval_accuracy_nsp": 0.8669707678489482, + "eval_loss": 2.365056276321411, + "eval_runtime": 145.9769, + "eval_samples_per_second": 1746.57, + "eval_steps_per_second": 72.779, + "step": 4238703 + }, + { + "epoch": 21.0, + "learning_rate": 3.9503034536743905e-05, + "loss": 2.2921, + "step": 4239000 + }, + { + "epoch": 21.0, + "learning_rate": 3.950179595031782e-05, + "loss": 2.3045, + "step": 4239500 + }, + { + "epoch": 21.01, + "learning_rate": 3.950055736389174e-05, + "loss": 2.2858, + "step": 4240000 + }, + { + "epoch": 21.01, + "learning_rate": 3.9499318777465656e-05, + "loss": 2.2861, + "step": 4240500 + }, + { + "epoch": 21.01, + "learning_rate": 3.949808019103957e-05, + "loss": 2.2879, + "step": 4241000 + }, + { + "epoch": 21.01, + "learning_rate": 3.949684160461349e-05, + "loss": 2.2669, + "step": 4241500 + }, + { + "epoch": 21.02, + "learning_rate": 3.949560301818741e-05, + "loss": 2.3146, + "step": 4242000 + }, + { + "epoch": 21.02, + "learning_rate": 3.949436690893417e-05, + "loss": 2.2891, + "step": 4242500 + }, + { + "epoch": 21.02, + "learning_rate": 3.9493128322508086e-05, + "loss": 2.3136, + "step": 4243000 + }, + { + "epoch": 21.02, + "learning_rate": 3.9491889736082e-05, + "loss": 2.2931, + "step": 4243500 + }, + { + "epoch": 21.03, + "learning_rate": 3.949065114965592e-05, + "loss": 2.3204, + "step": 4244000 + }, + { + "epoch": 21.03, + "learning_rate": 3.9489412563229836e-05, + "loss": 2.2862, + "step": 4244500 + }, + { + "epoch": 21.03, + "learning_rate": 3.9488176453976605e-05, + "loss": 2.2945, + "step": 4245000 + }, + { + "epoch": 21.03, + "learning_rate": 3.948693786755052e-05, + "loss": 2.3142, + "step": 4245500 + }, + { + "epoch": 21.04, + "learning_rate": 3.948569928112444e-05, + "loss": 2.305, + "step": 4246000 + }, + { + "epoch": 21.04, + "learning_rate": 3.9484460694698356e-05, + "loss": 2.2793, + "step": 4246500 + }, + { + "epoch": 21.04, + "learning_rate": 3.948322210827227e-05, + "loss": 2.3251, + "step": 4247000 + }, + { + "epoch": 21.04, + "learning_rate": 3.948198352184619e-05, + "loss": 2.3238, + "step": 4247500 + }, + { + "epoch": 21.05, + "learning_rate": 3.948074493542011e-05, + "loss": 2.2985, + "step": 4248000 + }, + { + "epoch": 21.05, + "learning_rate": 3.9479506348994024e-05, + "loss": 2.3153, + "step": 4248500 + }, + { + "epoch": 21.05, + "learning_rate": 3.947826776256794e-05, + "loss": 2.3251, + "step": 4249000 + }, + { + "epoch": 21.05, + "learning_rate": 3.947702917614186e-05, + "loss": 2.2942, + "step": 4249500 + }, + { + "epoch": 21.06, + "learning_rate": 3.9475790589715775e-05, + "loss": 2.2967, + "step": 4250000 + }, + { + "epoch": 21.06, + "learning_rate": 3.947455200328969e-05, + "loss": 2.3266, + "step": 4250500 + }, + { + "epoch": 21.06, + "learning_rate": 3.947331341686361e-05, + "loss": 2.3041, + "step": 4251000 + }, + { + "epoch": 21.06, + "learning_rate": 3.947207730761037e-05, + "loss": 2.3075, + "step": 4251500 + }, + { + "epoch": 21.07, + "learning_rate": 3.947083872118429e-05, + "loss": 2.3057, + "step": 4252000 + }, + { + "epoch": 21.07, + "learning_rate": 3.9469600134758204e-05, + "loss": 2.3371, + "step": 4252500 + }, + { + "epoch": 21.07, + "learning_rate": 3.946836154833212e-05, + "loss": 2.2894, + "step": 4253000 + }, + { + "epoch": 21.07, + "learning_rate": 3.946712296190604e-05, + "loss": 2.3192, + "step": 4253500 + }, + { + "epoch": 21.08, + "learning_rate": 3.946588932982566e-05, + "loss": 2.2943, + "step": 4254000 + }, + { + "epoch": 21.08, + "learning_rate": 3.946465322057243e-05, + "loss": 2.3263, + "step": 4254500 + }, + { + "epoch": 21.08, + "learning_rate": 3.9463414634146345e-05, + "loss": 2.292, + "step": 4255000 + }, + { + "epoch": 21.08, + "learning_rate": 3.946217604772026e-05, + "loss": 2.324, + "step": 4255500 + }, + { + "epoch": 21.09, + "learning_rate": 3.946093746129418e-05, + "loss": 2.3155, + "step": 4256000 + }, + { + "epoch": 21.09, + "learning_rate": 3.9459698874868095e-05, + "loss": 2.3074, + "step": 4256500 + }, + { + "epoch": 21.09, + "learning_rate": 3.9458460288442006e-05, + "loss": 2.2982, + "step": 4257000 + }, + { + "epoch": 21.09, + "learning_rate": 3.945722170201592e-05, + "loss": 2.3279, + "step": 4257500 + }, + { + "epoch": 21.1, + "learning_rate": 3.945598311558984e-05, + "loss": 2.3079, + "step": 4258000 + }, + { + "epoch": 21.1, + "learning_rate": 3.9454744529163756e-05, + "loss": 2.2971, + "step": 4258500 + }, + { + "epoch": 21.1, + "learning_rate": 3.945350594273767e-05, + "loss": 2.2837, + "step": 4259000 + }, + { + "epoch": 21.1, + "learning_rate": 3.945226735631159e-05, + "loss": 2.3128, + "step": 4259500 + }, + { + "epoch": 21.11, + "learning_rate": 3.945103124705836e-05, + "loss": 2.2928, + "step": 4260000 + }, + { + "epoch": 21.11, + "learning_rate": 3.9449792660632276e-05, + "loss": 2.3077, + "step": 4260500 + }, + { + "epoch": 21.11, + "learning_rate": 3.944855407420619e-05, + "loss": 2.2944, + "step": 4261000 + }, + { + "epoch": 21.11, + "learning_rate": 3.944731548778011e-05, + "loss": 2.3236, + "step": 4261500 + }, + { + "epoch": 21.12, + "learning_rate": 3.944607690135402e-05, + "loss": 2.3145, + "step": 4262000 + }, + { + "epoch": 21.12, + "learning_rate": 3.944483831492794e-05, + "loss": 2.2849, + "step": 4262500 + }, + { + "epoch": 21.12, + "learning_rate": 3.9443602205674706e-05, + "loss": 2.3208, + "step": 4263000 + }, + { + "epoch": 21.12, + "learning_rate": 3.944236361924862e-05, + "loss": 2.3103, + "step": 4263500 + }, + { + "epoch": 21.13, + "learning_rate": 3.944112503282254e-05, + "loss": 2.3213, + "step": 4264000 + }, + { + "epoch": 21.13, + "learning_rate": 3.9439886446396456e-05, + "loss": 2.3028, + "step": 4264500 + }, + { + "epoch": 21.13, + "learning_rate": 3.943864785997037e-05, + "loss": 2.3208, + "step": 4265000 + }, + { + "epoch": 21.13, + "learning_rate": 3.943740927354429e-05, + "loss": 2.2868, + "step": 4265500 + }, + { + "epoch": 21.14, + "learning_rate": 3.943617316429106e-05, + "loss": 2.3265, + "step": 4266000 + }, + { + "epoch": 21.14, + "learning_rate": 3.9434934577864976e-05, + "loss": 2.3104, + "step": 4266500 + }, + { + "epoch": 21.14, + "learning_rate": 3.943369599143889e-05, + "loss": 2.325, + "step": 4267000 + }, + { + "epoch": 21.14, + "learning_rate": 3.943245740501281e-05, + "loss": 2.3422, + "step": 4267500 + }, + { + "epoch": 21.15, + "learning_rate": 3.943121881858673e-05, + "loss": 2.3074, + "step": 4268000 + }, + { + "epoch": 21.15, + "learning_rate": 3.9429980232160644e-05, + "loss": 2.3229, + "step": 4268500 + }, + { + "epoch": 21.15, + "learning_rate": 3.942874412290741e-05, + "loss": 2.2827, + "step": 4269000 + }, + { + "epoch": 21.15, + "learning_rate": 3.942750553648132e-05, + "loss": 2.3152, + "step": 4269500 + }, + { + "epoch": 21.16, + "learning_rate": 3.942626695005524e-05, + "loss": 2.3225, + "step": 4270000 + }, + { + "epoch": 21.16, + "learning_rate": 3.9425028363629157e-05, + "loss": 2.3046, + "step": 4270500 + }, + { + "epoch": 21.16, + "learning_rate": 3.9423789777203073e-05, + "loss": 2.3032, + "step": 4271000 + }, + { + "epoch": 21.16, + "learning_rate": 3.942255119077699e-05, + "loss": 2.2956, + "step": 4271500 + }, + { + "epoch": 21.16, + "learning_rate": 3.942131260435091e-05, + "loss": 2.3496, + "step": 4272000 + }, + { + "epoch": 21.17, + "learning_rate": 3.9420074017924824e-05, + "loss": 2.3057, + "step": 4272500 + }, + { + "epoch": 21.17, + "learning_rate": 3.941883790867159e-05, + "loss": 2.2873, + "step": 4273000 + }, + { + "epoch": 21.17, + "learning_rate": 3.941759932224551e-05, + "loss": 2.2936, + "step": 4273500 + }, + { + "epoch": 21.17, + "learning_rate": 3.941636073581943e-05, + "loss": 2.3081, + "step": 4274000 + }, + { + "epoch": 21.18, + "learning_rate": 3.9415122149393344e-05, + "loss": 2.2982, + "step": 4274500 + }, + { + "epoch": 21.18, + "learning_rate": 3.941388604014011e-05, + "loss": 2.3144, + "step": 4275000 + }, + { + "epoch": 21.18, + "learning_rate": 3.941264745371402e-05, + "loss": 2.3072, + "step": 4275500 + }, + { + "epoch": 21.18, + "learning_rate": 3.941140886728794e-05, + "loss": 2.3279, + "step": 4276000 + }, + { + "epoch": 21.19, + "learning_rate": 3.941017028086186e-05, + "loss": 2.3102, + "step": 4276500 + }, + { + "epoch": 21.19, + "learning_rate": 3.9408931694435774e-05, + "loss": 2.298, + "step": 4277000 + }, + { + "epoch": 21.19, + "learning_rate": 3.940769558518255e-05, + "loss": 2.3229, + "step": 4277500 + }, + { + "epoch": 21.19, + "learning_rate": 3.9406456998756466e-05, + "loss": 2.3222, + "step": 4278000 + }, + { + "epoch": 21.2, + "learning_rate": 3.940521841233038e-05, + "loss": 2.2881, + "step": 4278500 + }, + { + "epoch": 21.2, + "learning_rate": 3.940397982590429e-05, + "loss": 2.3254, + "step": 4279000 + }, + { + "epoch": 21.2, + "learning_rate": 3.940274123947821e-05, + "loss": 2.3011, + "step": 4279500 + }, + { + "epoch": 21.2, + "learning_rate": 3.940150265305213e-05, + "loss": 2.3101, + "step": 4280000 + }, + { + "epoch": 21.21, + "learning_rate": 3.9400264066626044e-05, + "loss": 2.3173, + "step": 4280500 + }, + { + "epoch": 21.21, + "learning_rate": 3.939902548019996e-05, + "loss": 2.3111, + "step": 4281000 + }, + { + "epoch": 21.21, + "learning_rate": 3.939778937094673e-05, + "loss": 2.3002, + "step": 4281500 + }, + { + "epoch": 21.21, + "learning_rate": 3.93965532616935e-05, + "loss": 2.2919, + "step": 4282000 + }, + { + "epoch": 21.22, + "learning_rate": 3.9395314675267415e-05, + "loss": 2.2976, + "step": 4282500 + }, + { + "epoch": 21.22, + "learning_rate": 3.9394078566014184e-05, + "loss": 2.3086, + "step": 4283000 + }, + { + "epoch": 21.22, + "learning_rate": 3.9392839979588094e-05, + "loss": 2.3153, + "step": 4283500 + }, + { + "epoch": 21.22, + "learning_rate": 3.939160387033486e-05, + "loss": 2.3233, + "step": 4284000 + }, + { + "epoch": 21.23, + "learning_rate": 3.939036528390878e-05, + "loss": 2.3166, + "step": 4284500 + }, + { + "epoch": 21.23, + "learning_rate": 3.9389129174655556e-05, + "loss": 2.2879, + "step": 4285000 + }, + { + "epoch": 21.23, + "learning_rate": 3.9387890588229466e-05, + "loss": 2.2924, + "step": 4285500 + }, + { + "epoch": 21.23, + "learning_rate": 3.938665200180338e-05, + "loss": 2.3315, + "step": 4286000 + }, + { + "epoch": 21.24, + "learning_rate": 3.93854134153773e-05, + "loss": 2.2909, + "step": 4286500 + }, + { + "epoch": 21.24, + "learning_rate": 3.938417482895122e-05, + "loss": 2.3155, + "step": 4287000 + }, + { + "epoch": 21.24, + "learning_rate": 3.9382936242525134e-05, + "loss": 2.3186, + "step": 4287500 + }, + { + "epoch": 21.24, + "learning_rate": 3.938169765609905e-05, + "loss": 2.3072, + "step": 4288000 + }, + { + "epoch": 21.25, + "learning_rate": 3.938046154684582e-05, + "loss": 2.3222, + "step": 4288500 + }, + { + "epoch": 21.25, + "learning_rate": 3.937922296041973e-05, + "loss": 2.3089, + "step": 4289000 + }, + { + "epoch": 21.25, + "learning_rate": 3.9377984373993646e-05, + "loss": 2.3375, + "step": 4289500 + }, + { + "epoch": 21.25, + "learning_rate": 3.937674578756756e-05, + "loss": 2.3187, + "step": 4290000 + }, + { + "epoch": 21.26, + "learning_rate": 3.937550720114148e-05, + "loss": 2.3231, + "step": 4290500 + }, + { + "epoch": 21.26, + "learning_rate": 3.93742686147154e-05, + "loss": 2.2876, + "step": 4291000 + }, + { + "epoch": 21.26, + "learning_rate": 3.9373030028289314e-05, + "loss": 2.3264, + "step": 4291500 + }, + { + "epoch": 21.26, + "learning_rate": 3.937179144186323e-05, + "loss": 2.3282, + "step": 4292000 + }, + { + "epoch": 21.27, + "learning_rate": 3.937055285543715e-05, + "loss": 2.3319, + "step": 4292500 + }, + { + "epoch": 21.27, + "learning_rate": 3.9369314269011065e-05, + "loss": 2.3423, + "step": 4293000 + }, + { + "epoch": 21.27, + "learning_rate": 3.936807568258498e-05, + "loss": 2.286, + "step": 4293500 + }, + { + "epoch": 21.27, + "learning_rate": 3.93668370961589e-05, + "loss": 2.3192, + "step": 4294000 + }, + { + "epoch": 21.28, + "learning_rate": 3.9365598509732816e-05, + "loss": 2.3145, + "step": 4294500 + }, + { + "epoch": 21.28, + "learning_rate": 3.936435992330673e-05, + "loss": 2.3265, + "step": 4295000 + }, + { + "epoch": 21.28, + "learning_rate": 3.936312133688065e-05, + "loss": 2.334, + "step": 4295500 + }, + { + "epoch": 21.28, + "learning_rate": 3.9361882750454566e-05, + "loss": 2.3322, + "step": 4296000 + }, + { + "epoch": 21.29, + "learning_rate": 3.9360646641201335e-05, + "loss": 2.2928, + "step": 4296500 + }, + { + "epoch": 21.29, + "learning_rate": 3.9359408054775245e-05, + "loss": 2.314, + "step": 4297000 + }, + { + "epoch": 21.29, + "learning_rate": 3.935816946834916e-05, + "loss": 2.3296, + "step": 4297500 + }, + { + "epoch": 21.29, + "learning_rate": 3.935693335909593e-05, + "loss": 2.3279, + "step": 4298000 + }, + { + "epoch": 21.3, + "learning_rate": 3.935569477266985e-05, + "loss": 2.2969, + "step": 4298500 + }, + { + "epoch": 21.3, + "learning_rate": 3.9354456186243765e-05, + "loss": 2.3151, + "step": 4299000 + }, + { + "epoch": 21.3, + "learning_rate": 3.935321759981768e-05, + "loss": 2.3044, + "step": 4299500 + }, + { + "epoch": 21.3, + "learning_rate": 3.935198149056445e-05, + "loss": 2.3075, + "step": 4300000 + }, + { + "epoch": 21.31, + "learning_rate": 3.935074290413837e-05, + "loss": 2.2896, + "step": 4300500 + }, + { + "epoch": 21.31, + "learning_rate": 3.9349504317712285e-05, + "loss": 2.3119, + "step": 4301000 + }, + { + "epoch": 21.31, + "learning_rate": 3.93482657312862e-05, + "loss": 2.3079, + "step": 4301500 + }, + { + "epoch": 21.31, + "learning_rate": 3.934702714486012e-05, + "loss": 2.3173, + "step": 4302000 + }, + { + "epoch": 21.32, + "learning_rate": 3.9345788558434035e-05, + "loss": 2.3188, + "step": 4302500 + }, + { + "epoch": 21.32, + "learning_rate": 3.934454997200795e-05, + "loss": 2.2956, + "step": 4303000 + }, + { + "epoch": 21.32, + "learning_rate": 3.934331138558186e-05, + "loss": 2.3025, + "step": 4303500 + }, + { + "epoch": 21.32, + "learning_rate": 3.934207279915578e-05, + "loss": 2.3133, + "step": 4304000 + }, + { + "epoch": 21.33, + "learning_rate": 3.9340834212729696e-05, + "loss": 2.2965, + "step": 4304500 + }, + { + "epoch": 21.33, + "learning_rate": 3.933959562630361e-05, + "loss": 2.3176, + "step": 4305000 + }, + { + "epoch": 21.33, + "learning_rate": 3.933835703987753e-05, + "loss": 2.3177, + "step": 4305500 + }, + { + "epoch": 21.33, + "learning_rate": 3.933711845345145e-05, + "loss": 2.3032, + "step": 4306000 + }, + { + "epoch": 21.34, + "learning_rate": 3.9335879867025364e-05, + "loss": 2.2993, + "step": 4306500 + }, + { + "epoch": 21.34, + "learning_rate": 3.9334641280599274e-05, + "loss": 2.3499, + "step": 4307000 + }, + { + "epoch": 21.34, + "learning_rate": 3.933340269417319e-05, + "loss": 2.3083, + "step": 4307500 + }, + { + "epoch": 21.34, + "learning_rate": 3.933216658491997e-05, + "loss": 2.3264, + "step": 4308000 + }, + { + "epoch": 21.35, + "learning_rate": 3.9330927998493884e-05, + "loss": 2.3329, + "step": 4308500 + }, + { + "epoch": 21.35, + "learning_rate": 3.93296894120678e-05, + "loss": 2.2855, + "step": 4309000 + }, + { + "epoch": 21.35, + "learning_rate": 3.932845082564172e-05, + "loss": 2.3285, + "step": 4309500 + }, + { + "epoch": 21.35, + "learning_rate": 3.932721223921563e-05, + "loss": 2.2971, + "step": 4310000 + }, + { + "epoch": 21.36, + "learning_rate": 3.9325976129962396e-05, + "loss": 2.3157, + "step": 4310500 + }, + { + "epoch": 21.36, + "learning_rate": 3.9324740020709165e-05, + "loss": 2.3484, + "step": 4311000 + }, + { + "epoch": 21.36, + "learning_rate": 3.932350143428308e-05, + "loss": 2.3032, + "step": 4311500 + }, + { + "epoch": 21.36, + "learning_rate": 3.932226532502985e-05, + "loss": 2.3432, + "step": 4312000 + }, + { + "epoch": 21.37, + "learning_rate": 3.932102673860377e-05, + "loss": 2.3068, + "step": 4312500 + }, + { + "epoch": 21.37, + "learning_rate": 3.9319788152177685e-05, + "loss": 2.3444, + "step": 4313000 + }, + { + "epoch": 21.37, + "learning_rate": 3.93185495657516e-05, + "loss": 2.3294, + "step": 4313500 + }, + { + "epoch": 21.37, + "learning_rate": 3.931731097932552e-05, + "loss": 2.3024, + "step": 4314000 + }, + { + "epoch": 21.38, + "learning_rate": 3.9316072392899436e-05, + "loss": 2.3364, + "step": 4314500 + }, + { + "epoch": 21.38, + "learning_rate": 3.931483380647335e-05, + "loss": 2.3322, + "step": 4315000 + }, + { + "epoch": 21.38, + "learning_rate": 3.931359522004727e-05, + "loss": 2.3135, + "step": 4315500 + }, + { + "epoch": 21.38, + "learning_rate": 3.9312356633621186e-05, + "loss": 2.3176, + "step": 4316000 + }, + { + "epoch": 21.39, + "learning_rate": 3.93111180471951e-05, + "loss": 2.34, + "step": 4316500 + }, + { + "epoch": 21.39, + "learning_rate": 3.9309879460769014e-05, + "loss": 2.3245, + "step": 4317000 + }, + { + "epoch": 21.39, + "learning_rate": 3.930864087434293e-05, + "loss": 2.3008, + "step": 4317500 + }, + { + "epoch": 21.39, + "learning_rate": 3.93074047650897e-05, + "loss": 2.2976, + "step": 4318000 + }, + { + "epoch": 21.4, + "learning_rate": 3.9306166178663616e-05, + "loss": 2.3099, + "step": 4318500 + }, + { + "epoch": 21.4, + "learning_rate": 3.9304930069410385e-05, + "loss": 2.3081, + "step": 4319000 + }, + { + "epoch": 21.4, + "learning_rate": 3.93036914829843e-05, + "loss": 2.3236, + "step": 4319500 + }, + { + "epoch": 21.4, + "learning_rate": 3.930245289655822e-05, + "loss": 2.3435, + "step": 4320000 + }, + { + "epoch": 21.41, + "learning_rate": 3.9301214310132136e-05, + "loss": 2.3184, + "step": 4320500 + }, + { + "epoch": 21.41, + "learning_rate": 3.929997572370605e-05, + "loss": 2.3174, + "step": 4321000 + }, + { + "epoch": 21.41, + "learning_rate": 3.929873713727997e-05, + "loss": 2.3308, + "step": 4321500 + }, + { + "epoch": 21.41, + "learning_rate": 3.9297498550853887e-05, + "loss": 2.2802, + "step": 4322000 + }, + { + "epoch": 21.42, + "learning_rate": 3.9296259964427804e-05, + "loss": 2.2863, + "step": 4322500 + }, + { + "epoch": 21.42, + "learning_rate": 3.929502137800172e-05, + "loss": 2.3012, + "step": 4323000 + }, + { + "epoch": 21.42, + "learning_rate": 3.929378526874848e-05, + "loss": 2.3041, + "step": 4323500 + }, + { + "epoch": 21.42, + "learning_rate": 3.929254915949525e-05, + "loss": 2.343, + "step": 4324000 + }, + { + "epoch": 21.43, + "learning_rate": 3.929131057306917e-05, + "loss": 2.3007, + "step": 4324500 + }, + { + "epoch": 21.43, + "learning_rate": 3.9290071986643085e-05, + "loss": 2.3265, + "step": 4325000 + }, + { + "epoch": 21.43, + "learning_rate": 3.9288833400217e-05, + "loss": 2.3181, + "step": 4325500 + }, + { + "epoch": 21.43, + "learning_rate": 3.928759481379092e-05, + "loss": 2.3068, + "step": 4326000 + }, + { + "epoch": 21.43, + "learning_rate": 3.9286356227364836e-05, + "loss": 2.3173, + "step": 4326500 + }, + { + "epoch": 21.44, + "learning_rate": 3.928511764093875e-05, + "loss": 2.3145, + "step": 4327000 + }, + { + "epoch": 21.44, + "learning_rate": 3.9283881531685515e-05, + "loss": 2.3157, + "step": 4327500 + }, + { + "epoch": 21.44, + "learning_rate": 3.928264294525943e-05, + "loss": 2.3428, + "step": 4328000 + }, + { + "epoch": 21.44, + "learning_rate": 3.928140435883335e-05, + "loss": 2.3414, + "step": 4328500 + }, + { + "epoch": 21.45, + "learning_rate": 3.9280165772407266e-05, + "loss": 2.3233, + "step": 4329000 + }, + { + "epoch": 21.45, + "learning_rate": 3.9278929663154034e-05, + "loss": 2.3465, + "step": 4329500 + }, + { + "epoch": 21.45, + "learning_rate": 3.927769107672795e-05, + "loss": 2.3259, + "step": 4330000 + }, + { + "epoch": 21.45, + "learning_rate": 3.927645249030187e-05, + "loss": 2.3362, + "step": 4330500 + }, + { + "epoch": 21.46, + "learning_rate": 3.9275213903875785e-05, + "loss": 2.3229, + "step": 4331000 + }, + { + "epoch": 21.46, + "learning_rate": 3.92739753174497e-05, + "loss": 2.3119, + "step": 4331500 + }, + { + "epoch": 21.46, + "learning_rate": 3.927273673102362e-05, + "loss": 2.3136, + "step": 4332000 + }, + { + "epoch": 21.46, + "learning_rate": 3.9271498144597536e-05, + "loss": 2.332, + "step": 4332500 + }, + { + "epoch": 21.47, + "learning_rate": 3.927025955817145e-05, + "loss": 2.3427, + "step": 4333000 + }, + { + "epoch": 21.47, + "learning_rate": 3.926902097174537e-05, + "loss": 2.348, + "step": 4333500 + }, + { + "epoch": 21.47, + "learning_rate": 3.926778486249213e-05, + "loss": 2.3072, + "step": 4334000 + }, + { + "epoch": 21.47, + "learning_rate": 3.926654627606605e-05, + "loss": 2.2956, + "step": 4334500 + }, + { + "epoch": 21.48, + "learning_rate": 3.9265310166812824e-05, + "loss": 2.3138, + "step": 4335000 + }, + { + "epoch": 21.48, + "learning_rate": 3.9264071580386735e-05, + "loss": 2.296, + "step": 4335500 + }, + { + "epoch": 21.48, + "learning_rate": 3.926283299396065e-05, + "loss": 2.3195, + "step": 4336000 + }, + { + "epoch": 21.48, + "learning_rate": 3.926159440753457e-05, + "loss": 2.3196, + "step": 4336500 + }, + { + "epoch": 21.49, + "learning_rate": 3.9260358298281344e-05, + "loss": 2.2992, + "step": 4337000 + }, + { + "epoch": 21.49, + "learning_rate": 3.925911971185526e-05, + "loss": 2.3306, + "step": 4337500 + }, + { + "epoch": 21.49, + "learning_rate": 3.925788112542918e-05, + "loss": 2.3089, + "step": 4338000 + }, + { + "epoch": 21.49, + "learning_rate": 3.925664253900309e-05, + "loss": 2.3162, + "step": 4338500 + }, + { + "epoch": 21.5, + "learning_rate": 3.9255403952577005e-05, + "loss": 2.3234, + "step": 4339000 + }, + { + "epoch": 21.5, + "learning_rate": 3.925416536615092e-05, + "loss": 2.3258, + "step": 4339500 + }, + { + "epoch": 21.5, + "learning_rate": 3.925292677972484e-05, + "loss": 2.3317, + "step": 4340000 + }, + { + "epoch": 21.5, + "learning_rate": 3.925168819329875e-05, + "loss": 2.3034, + "step": 4340500 + }, + { + "epoch": 21.51, + "learning_rate": 3.9250449606872666e-05, + "loss": 2.3462, + "step": 4341000 + }, + { + "epoch": 21.51, + "learning_rate": 3.924921102044658e-05, + "loss": 2.3138, + "step": 4341500 + }, + { + "epoch": 21.51, + "learning_rate": 3.924797491119335e-05, + "loss": 2.3311, + "step": 4342000 + }, + { + "epoch": 21.51, + "learning_rate": 3.924673632476727e-05, + "loss": 2.3276, + "step": 4342500 + }, + { + "epoch": 21.52, + "learning_rate": 3.9245497738341186e-05, + "loss": 2.3321, + "step": 4343000 + }, + { + "epoch": 21.52, + "learning_rate": 3.924426162908796e-05, + "loss": 2.2764, + "step": 4343500 + }, + { + "epoch": 21.52, + "learning_rate": 3.924302304266188e-05, + "loss": 2.3127, + "step": 4344000 + }, + { + "epoch": 21.52, + "learning_rate": 3.9241784456235795e-05, + "loss": 2.3023, + "step": 4344500 + }, + { + "epoch": 21.53, + "learning_rate": 3.9240545869809705e-05, + "loss": 2.2963, + "step": 4345000 + }, + { + "epoch": 21.53, + "learning_rate": 3.923930728338362e-05, + "loss": 2.3223, + "step": 4345500 + }, + { + "epoch": 21.53, + "learning_rate": 3.923806869695754e-05, + "loss": 2.3039, + "step": 4346000 + }, + { + "epoch": 21.53, + "learning_rate": 3.9236830110531456e-05, + "loss": 2.3454, + "step": 4346500 + }, + { + "epoch": 21.54, + "learning_rate": 3.923559152410537e-05, + "loss": 2.3503, + "step": 4347000 + }, + { + "epoch": 21.54, + "learning_rate": 3.923435293767928e-05, + "loss": 2.3224, + "step": 4347500 + }, + { + "epoch": 21.54, + "learning_rate": 3.923311682842605e-05, + "loss": 2.3291, + "step": 4348000 + }, + { + "epoch": 21.54, + "learning_rate": 3.923187824199997e-05, + "loss": 2.3457, + "step": 4348500 + }, + { + "epoch": 21.55, + "learning_rate": 3.9230639655573886e-05, + "loss": 2.3133, + "step": 4349000 + }, + { + "epoch": 21.55, + "learning_rate": 3.92294010691478e-05, + "loss": 2.3439, + "step": 4349500 + }, + { + "epoch": 21.55, + "learning_rate": 3.922816248272172e-05, + "loss": 2.3106, + "step": 4350000 + }, + { + "epoch": 21.55, + "learning_rate": 3.9226926373468495e-05, + "loss": 2.3065, + "step": 4350500 + }, + { + "epoch": 21.56, + "learning_rate": 3.922569026421526e-05, + "loss": 2.3188, + "step": 4351000 + }, + { + "epoch": 21.56, + "learning_rate": 3.9224451677789174e-05, + "loss": 2.3561, + "step": 4351500 + }, + { + "epoch": 21.56, + "learning_rate": 3.922321309136309e-05, + "loss": 2.2994, + "step": 4352000 + }, + { + "epoch": 21.56, + "learning_rate": 3.922197450493701e-05, + "loss": 2.2983, + "step": 4352500 + }, + { + "epoch": 21.57, + "learning_rate": 3.9220735918510925e-05, + "loss": 2.2753, + "step": 4353000 + }, + { + "epoch": 21.57, + "learning_rate": 3.921949733208484e-05, + "loss": 2.3172, + "step": 4353500 + }, + { + "epoch": 21.57, + "learning_rate": 3.921825874565876e-05, + "loss": 2.3432, + "step": 4354000 + }, + { + "epoch": 21.57, + "learning_rate": 3.921702015923267e-05, + "loss": 2.3251, + "step": 4354500 + }, + { + "epoch": 21.58, + "learning_rate": 3.9215781572806586e-05, + "loss": 2.3145, + "step": 4355000 + }, + { + "epoch": 21.58, + "learning_rate": 3.92145429863805e-05, + "loss": 2.3169, + "step": 4355500 + }, + { + "epoch": 21.58, + "learning_rate": 3.921330687712728e-05, + "loss": 2.3224, + "step": 4356000 + }, + { + "epoch": 21.58, + "learning_rate": 3.9212068290701195e-05, + "loss": 2.2893, + "step": 4356500 + }, + { + "epoch": 21.59, + "learning_rate": 3.921082970427511e-05, + "loss": 2.3134, + "step": 4357000 + }, + { + "epoch": 21.59, + "learning_rate": 3.920959111784902e-05, + "loss": 2.3234, + "step": 4357500 + }, + { + "epoch": 21.59, + "learning_rate": 3.920835500859579e-05, + "loss": 2.2992, + "step": 4358000 + }, + { + "epoch": 21.59, + "learning_rate": 3.920711642216971e-05, + "loss": 2.3117, + "step": 4358500 + }, + { + "epoch": 21.6, + "learning_rate": 3.9205877835743625e-05, + "loss": 2.321, + "step": 4359000 + }, + { + "epoch": 21.6, + "learning_rate": 3.920463924931754e-05, + "loss": 2.2993, + "step": 4359500 + }, + { + "epoch": 21.6, + "learning_rate": 3.920340314006431e-05, + "loss": 2.319, + "step": 4360000 + }, + { + "epoch": 21.6, + "learning_rate": 3.920216455363823e-05, + "loss": 2.3492, + "step": 4360500 + }, + { + "epoch": 21.61, + "learning_rate": 3.9200928444384996e-05, + "loss": 2.3343, + "step": 4361000 + }, + { + "epoch": 21.61, + "learning_rate": 3.919968985795891e-05, + "loss": 2.3362, + "step": 4361500 + }, + { + "epoch": 21.61, + "learning_rate": 3.9198451271532824e-05, + "loss": 2.3155, + "step": 4362000 + }, + { + "epoch": 21.61, + "learning_rate": 3.919721516227959e-05, + "loss": 2.3279, + "step": 4362500 + }, + { + "epoch": 21.62, + "learning_rate": 3.919597657585351e-05, + "loss": 2.3272, + "step": 4363000 + }, + { + "epoch": 21.62, + "learning_rate": 3.9194737989427426e-05, + "loss": 2.3127, + "step": 4363500 + }, + { + "epoch": 21.62, + "learning_rate": 3.919349940300134e-05, + "loss": 2.3002, + "step": 4364000 + }, + { + "epoch": 21.62, + "learning_rate": 3.919226081657526e-05, + "loss": 2.3042, + "step": 4364500 + }, + { + "epoch": 21.63, + "learning_rate": 3.919102223014918e-05, + "loss": 2.302, + "step": 4365000 + }, + { + "epoch": 21.63, + "learning_rate": 3.9189786120895946e-05, + "loss": 2.3265, + "step": 4365500 + }, + { + "epoch": 21.63, + "learning_rate": 3.918854753446986e-05, + "loss": 2.3245, + "step": 4366000 + }, + { + "epoch": 21.63, + "learning_rate": 3.918730894804378e-05, + "loss": 2.3159, + "step": 4366500 + }, + { + "epoch": 21.64, + "learning_rate": 3.918607283879055e-05, + "loss": 2.3157, + "step": 4367000 + }, + { + "epoch": 21.64, + "learning_rate": 3.918483425236446e-05, + "loss": 2.3319, + "step": 4367500 + }, + { + "epoch": 21.64, + "learning_rate": 3.9183595665938376e-05, + "loss": 2.3131, + "step": 4368000 + }, + { + "epoch": 21.64, + "learning_rate": 3.918235707951229e-05, + "loss": 2.3323, + "step": 4368500 + }, + { + "epoch": 21.65, + "learning_rate": 3.918111849308621e-05, + "loss": 2.3466, + "step": 4369000 + }, + { + "epoch": 21.65, + "learning_rate": 3.9179879906660126e-05, + "loss": 2.3123, + "step": 4369500 + }, + { + "epoch": 21.65, + "learning_rate": 3.917864132023404e-05, + "loss": 2.296, + "step": 4370000 + }, + { + "epoch": 21.65, + "learning_rate": 3.917740273380796e-05, + "loss": 2.313, + "step": 4370500 + }, + { + "epoch": 21.66, + "learning_rate": 3.917616414738188e-05, + "loss": 2.3314, + "step": 4371000 + }, + { + "epoch": 21.66, + "learning_rate": 3.9174928038128646e-05, + "loss": 2.2958, + "step": 4371500 + }, + { + "epoch": 21.66, + "learning_rate": 3.917368945170256e-05, + "loss": 2.3307, + "step": 4372000 + }, + { + "epoch": 21.66, + "learning_rate": 3.917245086527648e-05, + "loss": 2.3187, + "step": 4372500 + }, + { + "epoch": 21.67, + "learning_rate": 3.91712122788504e-05, + "loss": 2.3269, + "step": 4373000 + }, + { + "epoch": 21.67, + "learning_rate": 3.9169973692424314e-05, + "loss": 2.3228, + "step": 4373500 + }, + { + "epoch": 21.67, + "learning_rate": 3.916873510599823e-05, + "loss": 2.3128, + "step": 4374000 + }, + { + "epoch": 21.67, + "learning_rate": 3.916749651957215e-05, + "loss": 2.3191, + "step": 4374500 + }, + { + "epoch": 21.68, + "learning_rate": 3.9166257933146064e-05, + "loss": 2.3091, + "step": 4375000 + }, + { + "epoch": 21.68, + "learning_rate": 3.9165019346719975e-05, + "loss": 2.3338, + "step": 4375500 + }, + { + "epoch": 21.68, + "learning_rate": 3.916378076029389e-05, + "loss": 2.3332, + "step": 4376000 + }, + { + "epoch": 21.68, + "learning_rate": 3.916254465104066e-05, + "loss": 2.3182, + "step": 4376500 + }, + { + "epoch": 21.69, + "learning_rate": 3.916130606461458e-05, + "loss": 2.3053, + "step": 4377000 + }, + { + "epoch": 21.69, + "learning_rate": 3.9160067478188494e-05, + "loss": 2.3355, + "step": 4377500 + }, + { + "epoch": 21.69, + "learning_rate": 3.915882889176241e-05, + "loss": 2.3166, + "step": 4378000 + }, + { + "epoch": 21.69, + "learning_rate": 3.915759278250918e-05, + "loss": 2.3133, + "step": 4378500 + }, + { + "epoch": 21.7, + "learning_rate": 3.91563541960831e-05, + "loss": 2.3149, + "step": 4379000 + }, + { + "epoch": 21.7, + "learning_rate": 3.9155115609657014e-05, + "loss": 2.3224, + "step": 4379500 + }, + { + "epoch": 21.7, + "learning_rate": 3.915387702323093e-05, + "loss": 2.3327, + "step": 4380000 + }, + { + "epoch": 21.7, + "learning_rate": 3.915263843680485e-05, + "loss": 2.3332, + "step": 4380500 + }, + { + "epoch": 21.7, + "learning_rate": 3.9151399850378765e-05, + "loss": 2.3271, + "step": 4381000 + }, + { + "epoch": 21.71, + "learning_rate": 3.915016126395268e-05, + "loss": 2.286, + "step": 4381500 + }, + { + "epoch": 21.71, + "learning_rate": 3.914892267752659e-05, + "loss": 2.3318, + "step": 4382000 + }, + { + "epoch": 21.71, + "learning_rate": 3.914768409110051e-05, + "loss": 2.3186, + "step": 4382500 + }, + { + "epoch": 21.71, + "learning_rate": 3.9146445504674425e-05, + "loss": 2.3394, + "step": 4383000 + }, + { + "epoch": 21.72, + "learning_rate": 3.914520691824834e-05, + "loss": 2.3411, + "step": 4383500 + }, + { + "epoch": 21.72, + "learning_rate": 3.914396833182226e-05, + "loss": 2.312, + "step": 4384000 + }, + { + "epoch": 21.72, + "learning_rate": 3.914273222256903e-05, + "loss": 2.3266, + "step": 4384500 + }, + { + "epoch": 21.72, + "learning_rate": 3.9141493636142945e-05, + "loss": 2.3351, + "step": 4385000 + }, + { + "epoch": 21.73, + "learning_rate": 3.914025504971686e-05, + "loss": 2.3116, + "step": 4385500 + }, + { + "epoch": 21.73, + "learning_rate": 3.913901646329078e-05, + "loss": 2.3258, + "step": 4386000 + }, + { + "epoch": 21.73, + "learning_rate": 3.9137777876864696e-05, + "loss": 2.3249, + "step": 4386500 + }, + { + "epoch": 21.73, + "learning_rate": 3.9136541767611465e-05, + "loss": 2.3161, + "step": 4387000 + }, + { + "epoch": 21.74, + "learning_rate": 3.913530318118538e-05, + "loss": 2.3343, + "step": 4387500 + }, + { + "epoch": 21.74, + "learning_rate": 3.91340645947593e-05, + "loss": 2.2996, + "step": 4388000 + }, + { + "epoch": 21.74, + "learning_rate": 3.913282848550606e-05, + "loss": 2.307, + "step": 4388500 + }, + { + "epoch": 21.74, + "learning_rate": 3.913159485342569e-05, + "loss": 2.308, + "step": 4389000 + }, + { + "epoch": 21.75, + "learning_rate": 3.9130356266999605e-05, + "loss": 2.3072, + "step": 4389500 + }, + { + "epoch": 21.75, + "learning_rate": 3.9129117680573515e-05, + "loss": 2.3254, + "step": 4390000 + }, + { + "epoch": 21.75, + "learning_rate": 3.912787909414743e-05, + "loss": 2.3295, + "step": 4390500 + }, + { + "epoch": 21.75, + "learning_rate": 3.912664050772135e-05, + "loss": 2.3067, + "step": 4391000 + }, + { + "epoch": 21.76, + "learning_rate": 3.9125401921295266e-05, + "loss": 2.3233, + "step": 4391500 + }, + { + "epoch": 21.76, + "learning_rate": 3.912416333486918e-05, + "loss": 2.3388, + "step": 4392000 + }, + { + "epoch": 21.76, + "learning_rate": 3.912292474844309e-05, + "loss": 2.2997, + "step": 4392500 + }, + { + "epoch": 21.76, + "learning_rate": 3.912168616201701e-05, + "loss": 2.3244, + "step": 4393000 + }, + { + "epoch": 21.77, + "learning_rate": 3.912044757559093e-05, + "loss": 2.3057, + "step": 4393500 + }, + { + "epoch": 21.77, + "learning_rate": 3.9119208989164844e-05, + "loss": 2.291, + "step": 4394000 + }, + { + "epoch": 21.77, + "learning_rate": 3.911797040273876e-05, + "loss": 2.3322, + "step": 4394500 + }, + { + "epoch": 21.77, + "learning_rate": 3.911673181631268e-05, + "loss": 2.3331, + "step": 4395000 + }, + { + "epoch": 21.78, + "learning_rate": 3.9115493229886595e-05, + "loss": 2.3059, + "step": 4395500 + }, + { + "epoch": 21.78, + "learning_rate": 3.911425464346051e-05, + "loss": 2.3113, + "step": 4396000 + }, + { + "epoch": 21.78, + "learning_rate": 3.911301605703443e-05, + "loss": 2.3046, + "step": 4396500 + }, + { + "epoch": 21.78, + "learning_rate": 3.91117799477812e-05, + "loss": 2.3182, + "step": 4397000 + }, + { + "epoch": 21.79, + "learning_rate": 3.9110541361355114e-05, + "loss": 2.3106, + "step": 4397500 + }, + { + "epoch": 21.79, + "learning_rate": 3.910930277492903e-05, + "loss": 2.3463, + "step": 4398000 + }, + { + "epoch": 21.79, + "learning_rate": 3.910806418850295e-05, + "loss": 2.3151, + "step": 4398500 + }, + { + "epoch": 21.79, + "learning_rate": 3.9106825602076865e-05, + "loss": 2.3151, + "step": 4399000 + }, + { + "epoch": 21.8, + "learning_rate": 3.910558701565078e-05, + "loss": 2.332, + "step": 4399500 + }, + { + "epoch": 21.8, + "learning_rate": 3.91043484292247e-05, + "loss": 2.3448, + "step": 4400000 + }, + { + "epoch": 21.8, + "learning_rate": 3.9103109842798616e-05, + "loss": 2.3485, + "step": 4400500 + }, + { + "epoch": 21.8, + "learning_rate": 3.910187373354538e-05, + "loss": 2.342, + "step": 4401000 + }, + { + "epoch": 21.81, + "learning_rate": 3.9100635147119295e-05, + "loss": 2.3263, + "step": 4401500 + }, + { + "epoch": 21.81, + "learning_rate": 3.9099399037866063e-05, + "loss": 2.3294, + "step": 4402000 + }, + { + "epoch": 21.81, + "learning_rate": 3.909816292861284e-05, + "loss": 2.3133, + "step": 4402500 + }, + { + "epoch": 21.81, + "learning_rate": 3.9096924342186756e-05, + "loss": 2.3127, + "step": 4403000 + }, + { + "epoch": 21.82, + "learning_rate": 3.9095685755760666e-05, + "loss": 2.3094, + "step": 4403500 + }, + { + "epoch": 21.82, + "learning_rate": 3.909444716933458e-05, + "loss": 2.2822, + "step": 4404000 + }, + { + "epoch": 21.82, + "learning_rate": 3.90932085829085e-05, + "loss": 2.3257, + "step": 4404500 + }, + { + "epoch": 21.82, + "learning_rate": 3.909196999648242e-05, + "loss": 2.2952, + "step": 4405000 + }, + { + "epoch": 21.83, + "learning_rate": 3.9090731410056334e-05, + "loss": 2.3526, + "step": 4405500 + }, + { + "epoch": 21.83, + "learning_rate": 3.9089492823630244e-05, + "loss": 2.3249, + "step": 4406000 + }, + { + "epoch": 21.83, + "learning_rate": 3.908825423720416e-05, + "loss": 2.3282, + "step": 4406500 + }, + { + "epoch": 21.83, + "learning_rate": 3.9087018127950936e-05, + "loss": 2.3322, + "step": 4407000 + }, + { + "epoch": 21.84, + "learning_rate": 3.9085779541524853e-05, + "loss": 2.3248, + "step": 4407500 + }, + { + "epoch": 21.84, + "learning_rate": 3.9084540955098764e-05, + "loss": 2.325, + "step": 4408000 + }, + { + "epoch": 21.84, + "learning_rate": 3.908330236867268e-05, + "loss": 2.3289, + "step": 4408500 + }, + { + "epoch": 21.84, + "learning_rate": 3.90820637822466e-05, + "loss": 2.3138, + "step": 4409000 + }, + { + "epoch": 21.85, + "learning_rate": 3.908082767299337e-05, + "loss": 2.3137, + "step": 4409500 + }, + { + "epoch": 21.85, + "learning_rate": 3.907958908656728e-05, + "loss": 2.3257, + "step": 4410000 + }, + { + "epoch": 21.85, + "learning_rate": 3.90783505001412e-05, + "loss": 2.322, + "step": 4410500 + }, + { + "epoch": 21.85, + "learning_rate": 3.907711191371512e-05, + "loss": 2.3273, + "step": 4411000 + }, + { + "epoch": 21.86, + "learning_rate": 3.9075873327289034e-05, + "loss": 2.3332, + "step": 4411500 + }, + { + "epoch": 21.86, + "learning_rate": 3.907463474086295e-05, + "loss": 2.337, + "step": 4412000 + }, + { + "epoch": 21.86, + "learning_rate": 3.907339863160972e-05, + "loss": 2.3351, + "step": 4412500 + }, + { + "epoch": 21.86, + "learning_rate": 3.9072160045183637e-05, + "loss": 2.3349, + "step": 4413000 + }, + { + "epoch": 21.87, + "learning_rate": 3.9070921458757554e-05, + "loss": 2.3297, + "step": 4413500 + }, + { + "epoch": 21.87, + "learning_rate": 3.906968287233147e-05, + "loss": 2.3183, + "step": 4414000 + }, + { + "epoch": 21.87, + "learning_rate": 3.906844428590538e-05, + "loss": 2.3166, + "step": 4414500 + }, + { + "epoch": 21.87, + "learning_rate": 3.90672056994793e-05, + "loss": 2.317, + "step": 4415000 + }, + { + "epoch": 21.88, + "learning_rate": 3.9065967113053214e-05, + "loss": 2.3059, + "step": 4415500 + }, + { + "epoch": 21.88, + "learning_rate": 3.906472852662713e-05, + "loss": 2.3185, + "step": 4416000 + }, + { + "epoch": 21.88, + "learning_rate": 3.906349241737391e-05, + "loss": 2.3452, + "step": 4416500 + }, + { + "epoch": 21.88, + "learning_rate": 3.906225383094782e-05, + "loss": 2.32, + "step": 4417000 + }, + { + "epoch": 21.89, + "learning_rate": 3.9061015244521734e-05, + "loss": 2.3385, + "step": 4417500 + }, + { + "epoch": 21.89, + "learning_rate": 3.905977665809565e-05, + "loss": 2.3351, + "step": 4418000 + }, + { + "epoch": 21.89, + "learning_rate": 3.905853807166957e-05, + "loss": 2.3168, + "step": 4418500 + }, + { + "epoch": 21.89, + "learning_rate": 3.905730196241634e-05, + "loss": 2.3263, + "step": 4419000 + }, + { + "epoch": 21.9, + "learning_rate": 3.9056063375990254e-05, + "loss": 2.331, + "step": 4419500 + }, + { + "epoch": 21.9, + "learning_rate": 3.905482478956417e-05, + "loss": 2.3449, + "step": 4420000 + }, + { + "epoch": 21.9, + "learning_rate": 3.905358868031094e-05, + "loss": 2.327, + "step": 4420500 + }, + { + "epoch": 21.9, + "learning_rate": 3.9052350093884856e-05, + "loss": 2.3243, + "step": 4421000 + }, + { + "epoch": 21.91, + "learning_rate": 3.905111150745877e-05, + "loss": 2.3226, + "step": 4421500 + }, + { + "epoch": 21.91, + "learning_rate": 3.904987292103269e-05, + "loss": 2.3242, + "step": 4422000 + }, + { + "epoch": 21.91, + "learning_rate": 3.904863433460661e-05, + "loss": 2.3421, + "step": 4422500 + }, + { + "epoch": 21.91, + "learning_rate": 3.9047395748180524e-05, + "loss": 2.3393, + "step": 4423000 + }, + { + "epoch": 21.92, + "learning_rate": 3.9046157161754434e-05, + "loss": 2.3457, + "step": 4423500 + }, + { + "epoch": 21.92, + "learning_rate": 3.904491857532835e-05, + "loss": 2.3174, + "step": 4424000 + }, + { + "epoch": 21.92, + "learning_rate": 3.904367998890227e-05, + "loss": 2.3136, + "step": 4424500 + }, + { + "epoch": 21.92, + "learning_rate": 3.904244387964904e-05, + "loss": 2.311, + "step": 4425000 + }, + { + "epoch": 21.93, + "learning_rate": 3.9041205293222954e-05, + "loss": 2.3206, + "step": 4425500 + }, + { + "epoch": 21.93, + "learning_rate": 3.903996670679687e-05, + "loss": 2.3157, + "step": 4426000 + }, + { + "epoch": 21.93, + "learning_rate": 3.903872812037079e-05, + "loss": 2.3178, + "step": 4426500 + }, + { + "epoch": 21.93, + "learning_rate": 3.90374895339447e-05, + "loss": 2.3013, + "step": 4427000 + }, + { + "epoch": 21.94, + "learning_rate": 3.9036250947518615e-05, + "loss": 2.3279, + "step": 4427500 + }, + { + "epoch": 21.94, + "learning_rate": 3.903501236109253e-05, + "loss": 2.3246, + "step": 4428000 + }, + { + "epoch": 21.94, + "learning_rate": 3.903377377466645e-05, + "loss": 2.3211, + "step": 4428500 + }, + { + "epoch": 21.94, + "learning_rate": 3.9032535188240365e-05, + "loss": 2.3386, + "step": 4429000 + }, + { + "epoch": 21.95, + "learning_rate": 3.903129907898714e-05, + "loss": 2.3297, + "step": 4429500 + }, + { + "epoch": 21.95, + "learning_rate": 3.903006049256105e-05, + "loss": 2.3189, + "step": 4430000 + }, + { + "epoch": 21.95, + "learning_rate": 3.902882438330782e-05, + "loss": 2.3271, + "step": 4430500 + }, + { + "epoch": 21.95, + "learning_rate": 3.902758579688174e-05, + "loss": 2.2818, + "step": 4431000 + }, + { + "epoch": 21.96, + "learning_rate": 3.9026347210455654e-05, + "loss": 2.2994, + "step": 4431500 + }, + { + "epoch": 21.96, + "learning_rate": 3.902510862402957e-05, + "loss": 2.3206, + "step": 4432000 + }, + { + "epoch": 21.96, + "learning_rate": 3.902387003760349e-05, + "loss": 2.3146, + "step": 4432500 + }, + { + "epoch": 21.96, + "learning_rate": 3.90226314511774e-05, + "loss": 2.325, + "step": 4433000 + }, + { + "epoch": 21.97, + "learning_rate": 3.9021392864751315e-05, + "loss": 2.3225, + "step": 4433500 + }, + { + "epoch": 21.97, + "learning_rate": 3.902015675549809e-05, + "loss": 2.3511, + "step": 4434000 + }, + { + "epoch": 21.97, + "learning_rate": 3.901891816907201e-05, + "loss": 2.307, + "step": 4434500 + }, + { + "epoch": 21.97, + "learning_rate": 3.9017679582645924e-05, + "loss": 2.3254, + "step": 4435000 + }, + { + "epoch": 21.98, + "learning_rate": 3.901644099621984e-05, + "loss": 2.3352, + "step": 4435500 + }, + { + "epoch": 21.98, + "learning_rate": 3.901520240979376e-05, + "loss": 2.337, + "step": 4436000 + }, + { + "epoch": 21.98, + "learning_rate": 3.901396630054052e-05, + "loss": 2.3028, + "step": 4436500 + }, + { + "epoch": 21.98, + "learning_rate": 3.901272771411444e-05, + "loss": 2.3097, + "step": 4437000 + }, + { + "epoch": 21.98, + "learning_rate": 3.9011489127688354e-05, + "loss": 2.3093, + "step": 4437500 + }, + { + "epoch": 21.99, + "learning_rate": 3.901025054126227e-05, + "loss": 2.348, + "step": 4438000 + }, + { + "epoch": 21.99, + "learning_rate": 3.900901195483619e-05, + "loss": 2.3076, + "step": 4438500 + }, + { + "epoch": 21.99, + "learning_rate": 3.900777584558296e-05, + "loss": 2.3348, + "step": 4439000 + }, + { + "epoch": 21.99, + "learning_rate": 3.9006537259156874e-05, + "loss": 2.3648, + "step": 4439500 + }, + { + "epoch": 22.0, + "learning_rate": 3.900529867273079e-05, + "loss": 2.3346, + "step": 4440000 + }, + { + "epoch": 22.0, + "learning_rate": 3.900406008630471e-05, + "loss": 2.3289, + "step": 4440500 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.6535946905725298, + "eval_accuracy_mlm": 0.6084227722805187, + "eval_accuracy_nsp": 0.86680995767947, + "eval_loss": 2.3525571823120117, + "eval_runtime": 145.9518, + "eval_samples_per_second": 1746.871, + "eval_steps_per_second": 72.791, + "step": 4440546 + }, + { + "epoch": 22.0, + "learning_rate": 3.900282397705147e-05, + "loss": 2.2772, + "step": 4441000 + }, + { + "epoch": 22.0, + "learning_rate": 3.9001585390625386e-05, + "loss": 2.2912, + "step": 4441500 + }, + { + "epoch": 22.01, + "learning_rate": 3.90003468041993e-05, + "loss": 2.282, + "step": 4442000 + }, + { + "epoch": 22.01, + "learning_rate": 3.899911069494607e-05, + "loss": 2.2739, + "step": 4442500 + }, + { + "epoch": 22.01, + "learning_rate": 3.899787210851999e-05, + "loss": 2.2682, + "step": 4443000 + }, + { + "epoch": 22.01, + "learning_rate": 3.8996633522093906e-05, + "loss": 2.2959, + "step": 4443500 + }, + { + "epoch": 22.02, + "learning_rate": 3.899539493566782e-05, + "loss": 2.2891, + "step": 4444000 + }, + { + "epoch": 22.02, + "learning_rate": 3.899415634924174e-05, + "loss": 2.2862, + "step": 4444500 + }, + { + "epoch": 22.02, + "learning_rate": 3.899291776281566e-05, + "loss": 2.3008, + "step": 4445000 + }, + { + "epoch": 22.02, + "learning_rate": 3.8991679176389574e-05, + "loss": 2.2792, + "step": 4445500 + }, + { + "epoch": 22.03, + "learning_rate": 3.899044058996349e-05, + "loss": 2.2976, + "step": 4446000 + }, + { + "epoch": 22.03, + "learning_rate": 3.898920448071026e-05, + "loss": 2.303, + "step": 4446500 + }, + { + "epoch": 22.03, + "learning_rate": 3.898796589428417e-05, + "loss": 2.2843, + "step": 4447000 + }, + { + "epoch": 22.03, + "learning_rate": 3.8986727307858087e-05, + "loss": 2.2869, + "step": 4447500 + }, + { + "epoch": 22.04, + "learning_rate": 3.8985488721432003e-05, + "loss": 2.2573, + "step": 4448000 + }, + { + "epoch": 22.04, + "learning_rate": 3.898425013500592e-05, + "loss": 2.2712, + "step": 4448500 + }, + { + "epoch": 22.04, + "learning_rate": 3.898301154857984e-05, + "loss": 2.275, + "step": 4449000 + }, + { + "epoch": 22.04, + "learning_rate": 3.8981772962153754e-05, + "loss": 2.2868, + "step": 4449500 + }, + { + "epoch": 22.05, + "learning_rate": 3.898053437572767e-05, + "loss": 2.3145, + "step": 4450000 + }, + { + "epoch": 22.05, + "learning_rate": 3.897929578930159e-05, + "loss": 2.3197, + "step": 4450500 + }, + { + "epoch": 22.05, + "learning_rate": 3.8978057202875505e-05, + "loss": 2.2986, + "step": 4451000 + }, + { + "epoch": 22.05, + "learning_rate": 3.8976821093622274e-05, + "loss": 2.2807, + "step": 4451500 + }, + { + "epoch": 22.06, + "learning_rate": 3.897558498436904e-05, + "loss": 2.3027, + "step": 4452000 + }, + { + "epoch": 22.06, + "learning_rate": 3.897434639794296e-05, + "loss": 2.2952, + "step": 4452500 + }, + { + "epoch": 22.06, + "learning_rate": 3.8973107811516877e-05, + "loss": 2.2791, + "step": 4453000 + }, + { + "epoch": 22.06, + "learning_rate": 3.8971869225090793e-05, + "loss": 2.2952, + "step": 4453500 + }, + { + "epoch": 22.07, + "learning_rate": 3.8970630638664704e-05, + "loss": 2.3056, + "step": 4454000 + }, + { + "epoch": 22.07, + "learning_rate": 3.896939205223862e-05, + "loss": 2.3098, + "step": 4454500 + }, + { + "epoch": 22.07, + "learning_rate": 3.896815346581254e-05, + "loss": 2.2978, + "step": 4455000 + }, + { + "epoch": 22.07, + "learning_rate": 3.8966914879386454e-05, + "loss": 2.306, + "step": 4455500 + }, + { + "epoch": 22.08, + "learning_rate": 3.896567629296037e-05, + "loss": 2.2921, + "step": 4456000 + }, + { + "epoch": 22.08, + "learning_rate": 3.896443770653429e-05, + "loss": 2.3061, + "step": 4456500 + }, + { + "epoch": 22.08, + "learning_rate": 3.896320159728106e-05, + "loss": 2.2947, + "step": 4457000 + }, + { + "epoch": 22.08, + "learning_rate": 3.8961963010854974e-05, + "loss": 2.2962, + "step": 4457500 + }, + { + "epoch": 22.09, + "learning_rate": 3.896072442442889e-05, + "loss": 2.2948, + "step": 4458000 + }, + { + "epoch": 22.09, + "learning_rate": 3.895948831517566e-05, + "loss": 2.3185, + "step": 4458500 + }, + { + "epoch": 22.09, + "learning_rate": 3.895824972874958e-05, + "loss": 2.3078, + "step": 4459000 + }, + { + "epoch": 22.09, + "learning_rate": 3.8957011142323494e-05, + "loss": 2.303, + "step": 4459500 + }, + { + "epoch": 22.1, + "learning_rate": 3.8955775033070256e-05, + "loss": 2.2979, + "step": 4460000 + }, + { + "epoch": 22.1, + "learning_rate": 3.895453644664417e-05, + "loss": 2.2898, + "step": 4460500 + }, + { + "epoch": 22.1, + "learning_rate": 3.895330033739095e-05, + "loss": 2.3058, + "step": 4461000 + }, + { + "epoch": 22.1, + "learning_rate": 3.8952061750964865e-05, + "loss": 2.2828, + "step": 4461500 + }, + { + "epoch": 22.11, + "learning_rate": 3.8950823164538775e-05, + "loss": 2.3222, + "step": 4462000 + }, + { + "epoch": 22.11, + "learning_rate": 3.894958457811269e-05, + "loss": 2.3003, + "step": 4462500 + }, + { + "epoch": 22.11, + "learning_rate": 3.894834599168661e-05, + "loss": 2.2981, + "step": 4463000 + }, + { + "epoch": 22.11, + "learning_rate": 3.8947107405260526e-05, + "loss": 2.3091, + "step": 4463500 + }, + { + "epoch": 22.12, + "learning_rate": 3.894586881883444e-05, + "loss": 2.271, + "step": 4464000 + }, + { + "epoch": 22.12, + "learning_rate": 3.894463023240836e-05, + "loss": 2.334, + "step": 4464500 + }, + { + "epoch": 22.12, + "learning_rate": 3.894339164598228e-05, + "loss": 2.2926, + "step": 4465000 + }, + { + "epoch": 22.12, + "learning_rate": 3.8942153059556194e-05, + "loss": 2.3064, + "step": 4465500 + }, + { + "epoch": 22.13, + "learning_rate": 3.894091447313011e-05, + "loss": 2.2886, + "step": 4466000 + }, + { + "epoch": 22.13, + "learning_rate": 3.893967588670403e-05, + "loss": 2.2827, + "step": 4466500 + }, + { + "epoch": 22.13, + "learning_rate": 3.8938437300277944e-05, + "loss": 2.2968, + "step": 4467000 + }, + { + "epoch": 22.13, + "learning_rate": 3.8937201191024707e-05, + "loss": 2.298, + "step": 4467500 + }, + { + "epoch": 22.14, + "learning_rate": 3.8935962604598623e-05, + "loss": 2.2935, + "step": 4468000 + }, + { + "epoch": 22.14, + "learning_rate": 3.893472401817254e-05, + "loss": 2.2881, + "step": 4468500 + }, + { + "epoch": 22.14, + "learning_rate": 3.893348543174646e-05, + "loss": 2.2915, + "step": 4469000 + }, + { + "epoch": 22.14, + "learning_rate": 3.8932246845320374e-05, + "loss": 2.3102, + "step": 4469500 + }, + { + "epoch": 22.15, + "learning_rate": 3.893100825889429e-05, + "loss": 2.3013, + "step": 4470000 + }, + { + "epoch": 22.15, + "learning_rate": 3.892977214964106e-05, + "loss": 2.285, + "step": 4470500 + }, + { + "epoch": 22.15, + "learning_rate": 3.892853356321498e-05, + "loss": 2.2956, + "step": 4471000 + }, + { + "epoch": 22.15, + "learning_rate": 3.892729745396174e-05, + "loss": 2.3063, + "step": 4471500 + }, + { + "epoch": 22.16, + "learning_rate": 3.8926058867535656e-05, + "loss": 2.3184, + "step": 4472000 + }, + { + "epoch": 22.16, + "learning_rate": 3.892482028110957e-05, + "loss": 2.2951, + "step": 4472500 + }, + { + "epoch": 22.16, + "learning_rate": 3.892358169468349e-05, + "loss": 2.2968, + "step": 4473000 + }, + { + "epoch": 22.16, + "learning_rate": 3.892234310825741e-05, + "loss": 2.2868, + "step": 4473500 + }, + { + "epoch": 22.17, + "learning_rate": 3.892110699900418e-05, + "loss": 2.2937, + "step": 4474000 + }, + { + "epoch": 22.17, + "learning_rate": 3.891986841257809e-05, + "loss": 2.2953, + "step": 4474500 + }, + { + "epoch": 22.17, + "learning_rate": 3.891862982615201e-05, + "loss": 2.3181, + "step": 4475000 + }, + { + "epoch": 22.17, + "learning_rate": 3.8917391239725926e-05, + "loss": 2.2859, + "step": 4475500 + }, + { + "epoch": 22.18, + "learning_rate": 3.891615265329984e-05, + "loss": 2.3111, + "step": 4476000 + }, + { + "epoch": 22.18, + "learning_rate": 3.891491406687376e-05, + "loss": 2.3094, + "step": 4476500 + }, + { + "epoch": 22.18, + "learning_rate": 3.891367548044768e-05, + "loss": 2.3165, + "step": 4477000 + }, + { + "epoch": 22.18, + "learning_rate": 3.8912436894021594e-05, + "loss": 2.3021, + "step": 4477500 + }, + { + "epoch": 22.19, + "learning_rate": 3.891119830759551e-05, + "loss": 2.2889, + "step": 4478000 + }, + { + "epoch": 22.19, + "learning_rate": 3.890995972116943e-05, + "loss": 2.3061, + "step": 4478500 + }, + { + "epoch": 22.19, + "learning_rate": 3.8908721134743345e-05, + "loss": 2.303, + "step": 4479000 + }, + { + "epoch": 22.19, + "learning_rate": 3.890748254831726e-05, + "loss": 2.2969, + "step": 4479500 + }, + { + "epoch": 22.2, + "learning_rate": 3.890624891623688e-05, + "loss": 2.2988, + "step": 4480000 + }, + { + "epoch": 22.2, + "learning_rate": 3.890501032981079e-05, + "loss": 2.3114, + "step": 4480500 + }, + { + "epoch": 22.2, + "learning_rate": 3.890377174338471e-05, + "loss": 2.3078, + "step": 4481000 + }, + { + "epoch": 22.2, + "learning_rate": 3.8902533156958626e-05, + "loss": 2.3274, + "step": 4481500 + }, + { + "epoch": 22.21, + "learning_rate": 3.890129457053254e-05, + "loss": 2.296, + "step": 4482000 + }, + { + "epoch": 22.21, + "learning_rate": 3.890005598410646e-05, + "loss": 2.3063, + "step": 4482500 + }, + { + "epoch": 22.21, + "learning_rate": 3.889881739768038e-05, + "loss": 2.2988, + "step": 4483000 + }, + { + "epoch": 22.21, + "learning_rate": 3.8897578811254294e-05, + "loss": 2.2946, + "step": 4483500 + }, + { + "epoch": 22.22, + "learning_rate": 3.889634022482821e-05, + "loss": 2.2692, + "step": 4484000 + }, + { + "epoch": 22.22, + "learning_rate": 3.889510411557497e-05, + "loss": 2.2717, + "step": 4484500 + }, + { + "epoch": 22.22, + "learning_rate": 3.889386552914889e-05, + "loss": 2.3011, + "step": 4485000 + }, + { + "epoch": 22.22, + "learning_rate": 3.889262694272281e-05, + "loss": 2.2948, + "step": 4485500 + }, + { + "epoch": 22.23, + "learning_rate": 3.8891388356296724e-05, + "loss": 2.2936, + "step": 4486000 + }, + { + "epoch": 22.23, + "learning_rate": 3.889014976987064e-05, + "loss": 2.2986, + "step": 4486500 + }, + { + "epoch": 22.23, + "learning_rate": 3.888891366061741e-05, + "loss": 2.3014, + "step": 4487000 + }, + { + "epoch": 22.23, + "learning_rate": 3.8887675074191326e-05, + "loss": 2.3245, + "step": 4487500 + }, + { + "epoch": 22.24, + "learning_rate": 3.8886436487765243e-05, + "loss": 2.3155, + "step": 4488000 + }, + { + "epoch": 22.24, + "learning_rate": 3.888520037851201e-05, + "loss": 2.3137, + "step": 4488500 + }, + { + "epoch": 22.24, + "learning_rate": 3.888396179208593e-05, + "loss": 2.3031, + "step": 4489000 + }, + { + "epoch": 22.24, + "learning_rate": 3.8882723205659846e-05, + "loss": 2.3025, + "step": 4489500 + }, + { + "epoch": 22.25, + "learning_rate": 3.888148461923376e-05, + "loss": 2.3273, + "step": 4490000 + }, + { + "epoch": 22.25, + "learning_rate": 3.888024850998053e-05, + "loss": 2.3053, + "step": 4490500 + }, + { + "epoch": 22.25, + "learning_rate": 3.887900992355445e-05, + "loss": 2.3006, + "step": 4491000 + }, + { + "epoch": 22.25, + "learning_rate": 3.8877771337128366e-05, + "loss": 2.3109, + "step": 4491500 + }, + { + "epoch": 22.25, + "learning_rate": 3.887653275070228e-05, + "loss": 2.3198, + "step": 4492000 + }, + { + "epoch": 22.26, + "learning_rate": 3.887529664144905e-05, + "loss": 2.3127, + "step": 4492500 + }, + { + "epoch": 22.26, + "learning_rate": 3.887405805502297e-05, + "loss": 2.2989, + "step": 4493000 + }, + { + "epoch": 22.26, + "learning_rate": 3.8872819468596885e-05, + "loss": 2.3253, + "step": 4493500 + }, + { + "epoch": 22.26, + "learning_rate": 3.88715808821708e-05, + "loss": 2.2805, + "step": 4494000 + }, + { + "epoch": 22.27, + "learning_rate": 3.887034229574472e-05, + "loss": 2.3011, + "step": 4494500 + }, + { + "epoch": 22.27, + "learning_rate": 3.8869103709318636e-05, + "loss": 2.3047, + "step": 4495000 + }, + { + "epoch": 22.27, + "learning_rate": 3.8867865122892546e-05, + "loss": 2.2922, + "step": 4495500 + }, + { + "epoch": 22.27, + "learning_rate": 3.886662653646646e-05, + "loss": 2.2869, + "step": 4496000 + }, + { + "epoch": 22.28, + "learning_rate": 3.886538795004038e-05, + "loss": 2.3214, + "step": 4496500 + }, + { + "epoch": 22.28, + "learning_rate": 3.88641493636143e-05, + "loss": 2.3244, + "step": 4497000 + }, + { + "epoch": 22.28, + "learning_rate": 3.8862910777188214e-05, + "loss": 2.3028, + "step": 4497500 + }, + { + "epoch": 22.28, + "learning_rate": 3.8861672190762124e-05, + "loss": 2.3, + "step": 4498000 + }, + { + "epoch": 22.29, + "learning_rate": 3.88604360815089e-05, + "loss": 2.2982, + "step": 4498500 + }, + { + "epoch": 22.29, + "learning_rate": 3.8859197495082817e-05, + "loss": 2.3043, + "step": 4499000 + }, + { + "epoch": 22.29, + "learning_rate": 3.885795890865673e-05, + "loss": 2.3042, + "step": 4499500 + }, + { + "epoch": 22.29, + "learning_rate": 3.8856720322230644e-05, + "loss": 2.3022, + "step": 4500000 + }, + { + "epoch": 22.3, + "learning_rate": 3.885548173580456e-05, + "loss": 2.3037, + "step": 4500500 + }, + { + "epoch": 22.3, + "learning_rate": 3.885424314937848e-05, + "loss": 2.2896, + "step": 4501000 + }, + { + "epoch": 22.3, + "learning_rate": 3.88530095172981e-05, + "loss": 2.2943, + "step": 4501500 + }, + { + "epoch": 22.3, + "learning_rate": 3.8851770930872015e-05, + "loss": 2.2988, + "step": 4502000 + }, + { + "epoch": 22.31, + "learning_rate": 3.8850534821618784e-05, + "loss": 2.3111, + "step": 4502500 + }, + { + "epoch": 22.31, + "learning_rate": 3.88492962351927e-05, + "loss": 2.3171, + "step": 4503000 + }, + { + "epoch": 22.31, + "learning_rate": 3.884805764876662e-05, + "loss": 2.2992, + "step": 4503500 + }, + { + "epoch": 22.31, + "learning_rate": 3.8846819062340535e-05, + "loss": 2.3312, + "step": 4504000 + }, + { + "epoch": 22.32, + "learning_rate": 3.884558047591445e-05, + "loss": 2.3045, + "step": 4504500 + }, + { + "epoch": 22.32, + "learning_rate": 3.884434188948837e-05, + "loss": 2.3171, + "step": 4505000 + }, + { + "epoch": 22.32, + "learning_rate": 3.8843103303062286e-05, + "loss": 2.3009, + "step": 4505500 + }, + { + "epoch": 22.32, + "learning_rate": 3.88418647166362e-05, + "loss": 2.3001, + "step": 4506000 + }, + { + "epoch": 22.33, + "learning_rate": 3.884062613021012e-05, + "loss": 2.3133, + "step": 4506500 + }, + { + "epoch": 22.33, + "learning_rate": 3.883939002095688e-05, + "loss": 2.2882, + "step": 4507000 + }, + { + "epoch": 22.33, + "learning_rate": 3.88381514345308e-05, + "loss": 2.3329, + "step": 4507500 + }, + { + "epoch": 22.33, + "learning_rate": 3.8836912848104715e-05, + "loss": 2.3059, + "step": 4508000 + }, + { + "epoch": 22.34, + "learning_rate": 3.883567426167863e-05, + "loss": 2.2917, + "step": 4508500 + }, + { + "epoch": 22.34, + "learning_rate": 3.883443567525255e-05, + "loss": 2.2894, + "step": 4509000 + }, + { + "epoch": 22.34, + "learning_rate": 3.8833197088826466e-05, + "loss": 2.3078, + "step": 4509500 + }, + { + "epoch": 22.34, + "learning_rate": 3.883195850240038e-05, + "loss": 2.3134, + "step": 4510000 + }, + { + "epoch": 22.35, + "learning_rate": 3.88307199159743e-05, + "loss": 2.3113, + "step": 4510500 + }, + { + "epoch": 22.35, + "learning_rate": 3.882948132954822e-05, + "loss": 2.2759, + "step": 4511000 + }, + { + "epoch": 22.35, + "learning_rate": 3.8828242743122134e-05, + "loss": 2.3251, + "step": 4511500 + }, + { + "epoch": 22.35, + "learning_rate": 3.8827004156696044e-05, + "loss": 2.3013, + "step": 4512000 + }, + { + "epoch": 22.36, + "learning_rate": 3.882576557026996e-05, + "loss": 2.2991, + "step": 4512500 + }, + { + "epoch": 22.36, + "learning_rate": 3.882453193818958e-05, + "loss": 2.3148, + "step": 4513000 + }, + { + "epoch": 22.36, + "learning_rate": 3.88232933517635e-05, + "loss": 2.3174, + "step": 4513500 + }, + { + "epoch": 22.36, + "learning_rate": 3.882205724251027e-05, + "loss": 2.2938, + "step": 4514000 + }, + { + "epoch": 22.37, + "learning_rate": 3.8820818656084184e-05, + "loss": 2.3036, + "step": 4514500 + }, + { + "epoch": 22.37, + "learning_rate": 3.88195800696581e-05, + "loss": 2.2946, + "step": 4515000 + }, + { + "epoch": 22.37, + "learning_rate": 3.881834148323202e-05, + "loss": 2.3099, + "step": 4515500 + }, + { + "epoch": 22.37, + "learning_rate": 3.8817102896805935e-05, + "loss": 2.3022, + "step": 4516000 + }, + { + "epoch": 22.38, + "learning_rate": 3.881586431037985e-05, + "loss": 2.309, + "step": 4516500 + }, + { + "epoch": 22.38, + "learning_rate": 3.881462820112662e-05, + "loss": 2.2991, + "step": 4517000 + }, + { + "epoch": 22.38, + "learning_rate": 3.881338961470054e-05, + "loss": 2.2815, + "step": 4517500 + }, + { + "epoch": 22.38, + "learning_rate": 3.8812151028274455e-05, + "loss": 2.3127, + "step": 4518000 + }, + { + "epoch": 22.39, + "learning_rate": 3.881091244184837e-05, + "loss": 2.3122, + "step": 4518500 + }, + { + "epoch": 22.39, + "learning_rate": 3.880967385542228e-05, + "loss": 2.305, + "step": 4519000 + }, + { + "epoch": 22.39, + "learning_rate": 3.88084352689962e-05, + "loss": 2.3096, + "step": 4519500 + }, + { + "epoch": 22.39, + "learning_rate": 3.8807196682570116e-05, + "loss": 2.2711, + "step": 4520000 + }, + { + "epoch": 22.4, + "learning_rate": 3.880595809614403e-05, + "loss": 2.3383, + "step": 4520500 + }, + { + "epoch": 22.4, + "learning_rate": 3.880471950971795e-05, + "loss": 2.2977, + "step": 4521000 + }, + { + "epoch": 22.4, + "learning_rate": 3.8803480923291866e-05, + "loss": 2.3228, + "step": 4521500 + }, + { + "epoch": 22.4, + "learning_rate": 3.880224233686578e-05, + "loss": 2.3133, + "step": 4522000 + }, + { + "epoch": 22.41, + "learning_rate": 3.880100622761255e-05, + "loss": 2.3178, + "step": 4522500 + }, + { + "epoch": 22.41, + "learning_rate": 3.879976764118647e-05, + "loss": 2.2977, + "step": 4523000 + }, + { + "epoch": 22.41, + "learning_rate": 3.8798529054760386e-05, + "loss": 2.3113, + "step": 4523500 + }, + { + "epoch": 22.41, + "learning_rate": 3.87972904683343e-05, + "loss": 2.2864, + "step": 4524000 + }, + { + "epoch": 22.42, + "learning_rate": 3.879605435908107e-05, + "loss": 2.2973, + "step": 4524500 + }, + { + "epoch": 22.42, + "learning_rate": 3.879481577265499e-05, + "loss": 2.3181, + "step": 4525000 + }, + { + "epoch": 22.42, + "learning_rate": 3.8793577186228905e-05, + "loss": 2.3087, + "step": 4525500 + }, + { + "epoch": 22.42, + "learning_rate": 3.8792338599802816e-05, + "loss": 2.3024, + "step": 4526000 + }, + { + "epoch": 22.43, + "learning_rate": 3.879110001337673e-05, + "loss": 2.297, + "step": 4526500 + }, + { + "epoch": 22.43, + "learning_rate": 3.878986142695065e-05, + "loss": 2.2964, + "step": 4527000 + }, + { + "epoch": 22.43, + "learning_rate": 3.878862531769742e-05, + "loss": 2.3192, + "step": 4527500 + }, + { + "epoch": 22.43, + "learning_rate": 3.8787386731271335e-05, + "loss": 2.2818, + "step": 4528000 + }, + { + "epoch": 22.44, + "learning_rate": 3.878614814484525e-05, + "loss": 2.3023, + "step": 4528500 + }, + { + "epoch": 22.44, + "learning_rate": 3.878490955841917e-05, + "loss": 2.3361, + "step": 4529000 + }, + { + "epoch": 22.44, + "learning_rate": 3.8783670971993086e-05, + "loss": 2.3235, + "step": 4529500 + }, + { + "epoch": 22.44, + "learning_rate": 3.8782432385567e-05, + "loss": 2.2829, + "step": 4530000 + }, + { + "epoch": 22.45, + "learning_rate": 3.878119379914092e-05, + "loss": 2.3039, + "step": 4530500 + }, + { + "epoch": 22.45, + "learning_rate": 3.877995521271484e-05, + "loss": 2.2703, + "step": 4531000 + }, + { + "epoch": 22.45, + "learning_rate": 3.8778719103461606e-05, + "loss": 2.2963, + "step": 4531500 + }, + { + "epoch": 22.45, + "learning_rate": 3.877748051703552e-05, + "loss": 2.3257, + "step": 4532000 + }, + { + "epoch": 22.46, + "learning_rate": 3.877624193060943e-05, + "loss": 2.3125, + "step": 4532500 + }, + { + "epoch": 22.46, + "learning_rate": 3.877500334418335e-05, + "loss": 2.3036, + "step": 4533000 + }, + { + "epoch": 22.46, + "learning_rate": 3.877376723493012e-05, + "loss": 2.3172, + "step": 4533500 + }, + { + "epoch": 22.46, + "learning_rate": 3.8772528648504035e-05, + "loss": 2.3257, + "step": 4534000 + }, + { + "epoch": 22.47, + "learning_rate": 3.8771292539250804e-05, + "loss": 2.2861, + "step": 4534500 + }, + { + "epoch": 22.47, + "learning_rate": 3.877005395282472e-05, + "loss": 2.3049, + "step": 4535000 + }, + { + "epoch": 22.47, + "learning_rate": 3.876881536639864e-05, + "loss": 2.3128, + "step": 4535500 + }, + { + "epoch": 22.47, + "learning_rate": 3.8767576779972555e-05, + "loss": 2.3123, + "step": 4536000 + }, + { + "epoch": 22.48, + "learning_rate": 3.8766340670719324e-05, + "loss": 2.3054, + "step": 4536500 + }, + { + "epoch": 22.48, + "learning_rate": 3.876510208429324e-05, + "loss": 2.3278, + "step": 4537000 + }, + { + "epoch": 22.48, + "learning_rate": 3.876386349786715e-05, + "loss": 2.3234, + "step": 4537500 + }, + { + "epoch": 22.48, + "learning_rate": 3.876262491144107e-05, + "loss": 2.3163, + "step": 4538000 + }, + { + "epoch": 22.49, + "learning_rate": 3.8761386325014985e-05, + "loss": 2.3147, + "step": 4538500 + }, + { + "epoch": 22.49, + "learning_rate": 3.87601477385889e-05, + "loss": 2.325, + "step": 4539000 + }, + { + "epoch": 22.49, + "learning_rate": 3.875890915216282e-05, + "loss": 2.3072, + "step": 4539500 + }, + { + "epoch": 22.49, + "learning_rate": 3.8757670565736735e-05, + "loss": 2.3123, + "step": 4540000 + }, + { + "epoch": 22.5, + "learning_rate": 3.875643197931065e-05, + "loss": 2.3073, + "step": 4540500 + }, + { + "epoch": 22.5, + "learning_rate": 3.875519587005742e-05, + "loss": 2.2933, + "step": 4541000 + }, + { + "epoch": 22.5, + "learning_rate": 3.875395976080419e-05, + "loss": 2.3299, + "step": 4541500 + }, + { + "epoch": 22.5, + "learning_rate": 3.875272117437811e-05, + "loss": 2.3161, + "step": 4542000 + }, + { + "epoch": 22.51, + "learning_rate": 3.8751482587952024e-05, + "loss": 2.3171, + "step": 4542500 + }, + { + "epoch": 22.51, + "learning_rate": 3.875024400152594e-05, + "loss": 2.3235, + "step": 4543000 + }, + { + "epoch": 22.51, + "learning_rate": 3.874900541509986e-05, + "loss": 2.3079, + "step": 4543500 + }, + { + "epoch": 22.51, + "learning_rate": 3.8747769305846627e-05, + "loss": 2.313, + "step": 4544000 + }, + { + "epoch": 22.52, + "learning_rate": 3.8746530719420543e-05, + "loss": 2.3353, + "step": 4544500 + }, + { + "epoch": 22.52, + "learning_rate": 3.874529213299446e-05, + "loss": 2.3102, + "step": 4545000 + }, + { + "epoch": 22.52, + "learning_rate": 3.874405354656838e-05, + "loss": 2.3287, + "step": 4545500 + }, + { + "epoch": 22.52, + "learning_rate": 3.8742817437315146e-05, + "loss": 2.3158, + "step": 4546000 + }, + { + "epoch": 22.52, + "learning_rate": 3.874157885088906e-05, + "loss": 2.2897, + "step": 4546500 + }, + { + "epoch": 22.53, + "learning_rate": 3.874034026446297e-05, + "loss": 2.3011, + "step": 4547000 + }, + { + "epoch": 22.53, + "learning_rate": 3.873910167803689e-05, + "loss": 2.2923, + "step": 4547500 + }, + { + "epoch": 22.53, + "learning_rate": 3.873786309161081e-05, + "loss": 2.2992, + "step": 4548000 + }, + { + "epoch": 22.53, + "learning_rate": 3.8736624505184724e-05, + "loss": 2.3284, + "step": 4548500 + }, + { + "epoch": 22.54, + "learning_rate": 3.873538839593149e-05, + "loss": 2.3066, + "step": 4549000 + }, + { + "epoch": 22.54, + "learning_rate": 3.873414980950541e-05, + "loss": 2.3289, + "step": 4549500 + }, + { + "epoch": 22.54, + "learning_rate": 3.873291122307933e-05, + "loss": 2.2994, + "step": 4550000 + }, + { + "epoch": 22.54, + "learning_rate": 3.8731672636653244e-05, + "loss": 2.3128, + "step": 4550500 + }, + { + "epoch": 22.55, + "learning_rate": 3.873043405022716e-05, + "loss": 2.3113, + "step": 4551000 + }, + { + "epoch": 22.55, + "learning_rate": 3.872919794097393e-05, + "loss": 2.3115, + "step": 4551500 + }, + { + "epoch": 22.55, + "learning_rate": 3.8727959354547846e-05, + "loss": 2.3109, + "step": 4552000 + }, + { + "epoch": 22.55, + "learning_rate": 3.872672076812176e-05, + "loss": 2.32, + "step": 4552500 + }, + { + "epoch": 22.56, + "learning_rate": 3.872548218169568e-05, + "loss": 2.3099, + "step": 4553000 + }, + { + "epoch": 22.56, + "learning_rate": 3.872424607244244e-05, + "loss": 2.3342, + "step": 4553500 + }, + { + "epoch": 22.56, + "learning_rate": 3.872300996318921e-05, + "loss": 2.2928, + "step": 4554000 + }, + { + "epoch": 22.56, + "learning_rate": 3.872177137676313e-05, + "loss": 2.3082, + "step": 4554500 + }, + { + "epoch": 22.57, + "learning_rate": 3.8720532790337045e-05, + "loss": 2.326, + "step": 4555000 + }, + { + "epoch": 22.57, + "learning_rate": 3.871929420391096e-05, + "loss": 2.3155, + "step": 4555500 + }, + { + "epoch": 22.57, + "learning_rate": 3.871805561748488e-05, + "loss": 2.299, + "step": 4556000 + }, + { + "epoch": 22.57, + "learning_rate": 3.8716817031058796e-05, + "loss": 2.3201, + "step": 4556500 + }, + { + "epoch": 22.58, + "learning_rate": 3.871557844463271e-05, + "loss": 2.2982, + "step": 4557000 + }, + { + "epoch": 22.58, + "learning_rate": 3.871433985820663e-05, + "loss": 2.3207, + "step": 4557500 + }, + { + "epoch": 22.58, + "learning_rate": 3.8713101271780546e-05, + "loss": 2.316, + "step": 4558000 + }, + { + "epoch": 22.58, + "learning_rate": 3.871186516252731e-05, + "loss": 2.3293, + "step": 4558500 + }, + { + "epoch": 22.59, + "learning_rate": 3.8710626576101225e-05, + "loss": 2.3033, + "step": 4559000 + }, + { + "epoch": 22.59, + "learning_rate": 3.870938798967514e-05, + "loss": 2.3159, + "step": 4559500 + }, + { + "epoch": 22.59, + "learning_rate": 3.870814940324906e-05, + "loss": 2.3366, + "step": 4560000 + }, + { + "epoch": 22.59, + "learning_rate": 3.8706910816822976e-05, + "loss": 2.3203, + "step": 4560500 + }, + { + "epoch": 22.6, + "learning_rate": 3.870567223039689e-05, + "loss": 2.32, + "step": 4561000 + }, + { + "epoch": 22.6, + "learning_rate": 3.870443364397081e-05, + "loss": 2.315, + "step": 4561500 + }, + { + "epoch": 22.6, + "learning_rate": 3.870319505754473e-05, + "loss": 2.3213, + "step": 4562000 + }, + { + "epoch": 22.6, + "learning_rate": 3.8701956471118644e-05, + "loss": 2.3211, + "step": 4562500 + }, + { + "epoch": 22.61, + "learning_rate": 3.870072036186541e-05, + "loss": 2.3087, + "step": 4563000 + }, + { + "epoch": 22.61, + "learning_rate": 3.869948177543933e-05, + "loss": 2.3402, + "step": 4563500 + }, + { + "epoch": 22.61, + "learning_rate": 3.8698243189013247e-05, + "loss": 2.3021, + "step": 4564000 + }, + { + "epoch": 22.61, + "learning_rate": 3.8697004602587163e-05, + "loss": 2.3204, + "step": 4564500 + }, + { + "epoch": 22.62, + "learning_rate": 3.869576601616108e-05, + "loss": 2.2966, + "step": 4565000 + }, + { + "epoch": 22.62, + "learning_rate": 3.869452990690784e-05, + "loss": 2.3114, + "step": 4565500 + }, + { + "epoch": 22.62, + "learning_rate": 3.869329132048176e-05, + "loss": 2.3284, + "step": 4566000 + }, + { + "epoch": 22.62, + "learning_rate": 3.8692052734055676e-05, + "loss": 2.3001, + "step": 4566500 + }, + { + "epoch": 22.63, + "learning_rate": 3.869081414762959e-05, + "loss": 2.3107, + "step": 4567000 + }, + { + "epoch": 22.63, + "learning_rate": 3.868957556120351e-05, + "loss": 2.322, + "step": 4567500 + }, + { + "epoch": 22.63, + "learning_rate": 3.868833697477743e-05, + "loss": 2.3449, + "step": 4568000 + }, + { + "epoch": 22.63, + "learning_rate": 3.8687098388351344e-05, + "loss": 2.3089, + "step": 4568500 + }, + { + "epoch": 22.64, + "learning_rate": 3.868585980192526e-05, + "loss": 2.3135, + "step": 4569000 + }, + { + "epoch": 22.64, + "learning_rate": 3.868462121549918e-05, + "loss": 2.3005, + "step": 4569500 + }, + { + "epoch": 22.64, + "learning_rate": 3.868338510624595e-05, + "loss": 2.3158, + "step": 4570000 + }, + { + "epoch": 22.64, + "learning_rate": 3.868214899699271e-05, + "loss": 2.3041, + "step": 4570500 + }, + { + "epoch": 22.65, + "learning_rate": 3.8680910410566626e-05, + "loss": 2.328, + "step": 4571000 + }, + { + "epoch": 22.65, + "learning_rate": 3.867967182414054e-05, + "loss": 2.3015, + "step": 4571500 + }, + { + "epoch": 22.65, + "learning_rate": 3.867843323771446e-05, + "loss": 2.3389, + "step": 4572000 + }, + { + "epoch": 22.65, + "learning_rate": 3.867719712846123e-05, + "loss": 2.2967, + "step": 4572500 + }, + { + "epoch": 22.66, + "learning_rate": 3.8675961019208004e-05, + "loss": 2.3422, + "step": 4573000 + }, + { + "epoch": 22.66, + "learning_rate": 3.867472243278192e-05, + "loss": 2.3099, + "step": 4573500 + }, + { + "epoch": 22.66, + "learning_rate": 3.867348384635584e-05, + "loss": 2.326, + "step": 4574000 + }, + { + "epoch": 22.66, + "learning_rate": 3.8672245259929755e-05, + "loss": 2.3225, + "step": 4574500 + }, + { + "epoch": 22.67, + "learning_rate": 3.8671006673503665e-05, + "loss": 2.3198, + "step": 4575000 + }, + { + "epoch": 22.67, + "learning_rate": 3.866976808707758e-05, + "loss": 2.3108, + "step": 4575500 + }, + { + "epoch": 22.67, + "learning_rate": 3.86685295006515e-05, + "loss": 2.3139, + "step": 4576000 + }, + { + "epoch": 22.67, + "learning_rate": 3.8667290914225416e-05, + "loss": 2.3271, + "step": 4576500 + }, + { + "epoch": 22.68, + "learning_rate": 3.866605232779933e-05, + "loss": 2.3365, + "step": 4577000 + }, + { + "epoch": 22.68, + "learning_rate": 3.86648162185461e-05, + "loss": 2.316, + "step": 4577500 + }, + { + "epoch": 22.68, + "learning_rate": 3.866357763212002e-05, + "loss": 2.3204, + "step": 4578000 + }, + { + "epoch": 22.68, + "learning_rate": 3.8662339045693935e-05, + "loss": 2.3074, + "step": 4578500 + }, + { + "epoch": 22.69, + "learning_rate": 3.8661100459267845e-05, + "loss": 2.3012, + "step": 4579000 + }, + { + "epoch": 22.69, + "learning_rate": 3.865986187284176e-05, + "loss": 2.3167, + "step": 4579500 + }, + { + "epoch": 22.69, + "learning_rate": 3.865862328641568e-05, + "loss": 2.3111, + "step": 4580000 + }, + { + "epoch": 22.69, + "learning_rate": 3.8657387177162455e-05, + "loss": 2.3244, + "step": 4580500 + }, + { + "epoch": 22.7, + "learning_rate": 3.865614859073637e-05, + "loss": 2.3031, + "step": 4581000 + }, + { + "epoch": 22.7, + "learning_rate": 3.865491000431028e-05, + "loss": 2.3241, + "step": 4581500 + }, + { + "epoch": 22.7, + "learning_rate": 3.86536714178842e-05, + "loss": 2.2923, + "step": 4582000 + }, + { + "epoch": 22.7, + "learning_rate": 3.865243530863097e-05, + "loss": 2.2862, + "step": 4582500 + }, + { + "epoch": 22.71, + "learning_rate": 3.8651196722204885e-05, + "loss": 2.3268, + "step": 4583000 + }, + { + "epoch": 22.71, + "learning_rate": 3.86499581357788e-05, + "loss": 2.3275, + "step": 4583500 + }, + { + "epoch": 22.71, + "learning_rate": 3.864871954935272e-05, + "loss": 2.3048, + "step": 4584000 + }, + { + "epoch": 22.71, + "learning_rate": 3.8647480962926635e-05, + "loss": 2.3134, + "step": 4584500 + }, + { + "epoch": 22.72, + "learning_rate": 3.8646244853673404e-05, + "loss": 2.304, + "step": 4585000 + }, + { + "epoch": 22.72, + "learning_rate": 3.864500626724732e-05, + "loss": 2.2997, + "step": 4585500 + }, + { + "epoch": 22.72, + "learning_rate": 3.864376768082124e-05, + "loss": 2.3093, + "step": 4586000 + }, + { + "epoch": 22.72, + "learning_rate": 3.8642529094395155e-05, + "loss": 2.3179, + "step": 4586500 + }, + { + "epoch": 22.73, + "learning_rate": 3.864129050796907e-05, + "loss": 2.317, + "step": 4587000 + }, + { + "epoch": 22.73, + "learning_rate": 3.8640054398715834e-05, + "loss": 2.3088, + "step": 4587500 + }, + { + "epoch": 22.73, + "learning_rate": 3.86388182894626e-05, + "loss": 2.2975, + "step": 4588000 + }, + { + "epoch": 22.73, + "learning_rate": 3.863757970303652e-05, + "loss": 2.3084, + "step": 4588500 + }, + { + "epoch": 22.74, + "learning_rate": 3.8636341116610437e-05, + "loss": 2.2775, + "step": 4589000 + }, + { + "epoch": 22.74, + "learning_rate": 3.8635102530184353e-05, + "loss": 2.2953, + "step": 4589500 + }, + { + "epoch": 22.74, + "learning_rate": 3.863386642093112e-05, + "loss": 2.3188, + "step": 4590000 + }, + { + "epoch": 22.74, + "learning_rate": 3.863262783450504e-05, + "loss": 2.3119, + "step": 4590500 + }, + { + "epoch": 22.75, + "learning_rate": 3.8631389248078956e-05, + "loss": 2.2956, + "step": 4591000 + }, + { + "epoch": 22.75, + "learning_rate": 3.863015066165287e-05, + "loss": 2.2869, + "step": 4591500 + }, + { + "epoch": 22.75, + "learning_rate": 3.862891207522678e-05, + "loss": 2.3249, + "step": 4592000 + }, + { + "epoch": 22.75, + "learning_rate": 3.86276734888007e-05, + "loss": 2.3244, + "step": 4592500 + }, + { + "epoch": 22.76, + "learning_rate": 3.862643737954747e-05, + "loss": 2.3138, + "step": 4593000 + }, + { + "epoch": 22.76, + "learning_rate": 3.8625198793121386e-05, + "loss": 2.2791, + "step": 4593500 + }, + { + "epoch": 22.76, + "learning_rate": 3.86239602066953e-05, + "loss": 2.313, + "step": 4594000 + }, + { + "epoch": 22.76, + "learning_rate": 3.862272409744208e-05, + "loss": 2.3295, + "step": 4594500 + }, + { + "epoch": 22.77, + "learning_rate": 3.862148551101599e-05, + "loss": 2.3138, + "step": 4595000 + }, + { + "epoch": 22.77, + "learning_rate": 3.8620246924589905e-05, + "loss": 2.2907, + "step": 4595500 + }, + { + "epoch": 22.77, + "learning_rate": 3.861900833816382e-05, + "loss": 2.3033, + "step": 4596000 + }, + { + "epoch": 22.77, + "learning_rate": 3.861776975173774e-05, + "loss": 2.3326, + "step": 4596500 + }, + { + "epoch": 22.78, + "learning_rate": 3.8616531165311656e-05, + "loss": 2.3104, + "step": 4597000 + }, + { + "epoch": 22.78, + "learning_rate": 3.861529257888557e-05, + "loss": 2.3412, + "step": 4597500 + }, + { + "epoch": 22.78, + "learning_rate": 3.861405399245949e-05, + "loss": 2.3166, + "step": 4598000 + }, + { + "epoch": 22.78, + "learning_rate": 3.86128154060334e-05, + "loss": 2.3151, + "step": 4598500 + }, + { + "epoch": 22.79, + "learning_rate": 3.861157681960732e-05, + "loss": 2.325, + "step": 4599000 + }, + { + "epoch": 22.79, + "learning_rate": 3.8610338233181234e-05, + "loss": 2.307, + "step": 4599500 + }, + { + "epoch": 22.79, + "learning_rate": 3.860909964675515e-05, + "loss": 2.3147, + "step": 4600000 + }, + { + "epoch": 22.79, + "learning_rate": 3.860786106032907e-05, + "loss": 2.3078, + "step": 4600500 + }, + { + "epoch": 22.79, + "learning_rate": 3.8606622473902985e-05, + "loss": 2.3078, + "step": 4601000 + }, + { + "epoch": 22.8, + "learning_rate": 3.86053838874769e-05, + "loss": 2.3242, + "step": 4601500 + }, + { + "epoch": 22.8, + "learning_rate": 3.860414777822367e-05, + "loss": 2.3032, + "step": 4602000 + }, + { + "epoch": 22.8, + "learning_rate": 3.860290919179759e-05, + "loss": 2.3121, + "step": 4602500 + }, + { + "epoch": 22.8, + "learning_rate": 3.8601670605371504e-05, + "loss": 2.3143, + "step": 4603000 + }, + { + "epoch": 22.81, + "learning_rate": 3.860043201894542e-05, + "loss": 2.3154, + "step": 4603500 + }, + { + "epoch": 22.81, + "learning_rate": 3.859919343251934e-05, + "loss": 2.327, + "step": 4604000 + }, + { + "epoch": 22.81, + "learning_rate": 3.8597954846093255e-05, + "loss": 2.3061, + "step": 4604500 + }, + { + "epoch": 22.81, + "learning_rate": 3.859671625966717e-05, + "loss": 2.3172, + "step": 4605000 + }, + { + "epoch": 22.82, + "learning_rate": 3.859547767324109e-05, + "loss": 2.3136, + "step": 4605500 + }, + { + "epoch": 22.82, + "learning_rate": 3.8594239086815006e-05, + "loss": 2.3018, + "step": 4606000 + }, + { + "epoch": 22.82, + "learning_rate": 3.859300297756177e-05, + "loss": 2.2998, + "step": 4606500 + }, + { + "epoch": 22.82, + "learning_rate": 3.8591764391135685e-05, + "loss": 2.3314, + "step": 4607000 + }, + { + "epoch": 22.83, + "learning_rate": 3.85905258047096e-05, + "loss": 2.3057, + "step": 4607500 + }, + { + "epoch": 22.83, + "learning_rate": 3.858928721828352e-05, + "loss": 2.3195, + "step": 4608000 + }, + { + "epoch": 22.83, + "learning_rate": 3.8588048631857436e-05, + "loss": 2.3239, + "step": 4608500 + }, + { + "epoch": 22.83, + "learning_rate": 3.8586812522604205e-05, + "loss": 2.293, + "step": 4609000 + }, + { + "epoch": 22.84, + "learning_rate": 3.858557393617812e-05, + "loss": 2.3212, + "step": 4609500 + }, + { + "epoch": 22.84, + "learning_rate": 3.858433534975204e-05, + "loss": 2.3048, + "step": 4610000 + }, + { + "epoch": 22.84, + "learning_rate": 3.8583096763325955e-05, + "loss": 2.2934, + "step": 4610500 + }, + { + "epoch": 22.84, + "learning_rate": 3.858185817689987e-05, + "loss": 2.2999, + "step": 4611000 + }, + { + "epoch": 22.85, + "learning_rate": 3.858062206764664e-05, + "loss": 2.3105, + "step": 4611500 + }, + { + "epoch": 22.85, + "learning_rate": 3.857938348122055e-05, + "loss": 2.3013, + "step": 4612000 + }, + { + "epoch": 22.85, + "learning_rate": 3.857814489479447e-05, + "loss": 2.3245, + "step": 4612500 + }, + { + "epoch": 22.85, + "learning_rate": 3.8576911262714096e-05, + "loss": 2.311, + "step": 4613000 + }, + { + "epoch": 22.86, + "learning_rate": 3.857567267628801e-05, + "loss": 2.3275, + "step": 4613500 + }, + { + "epoch": 22.86, + "learning_rate": 3.857443408986192e-05, + "loss": 2.3123, + "step": 4614000 + }, + { + "epoch": 22.86, + "learning_rate": 3.857319550343584e-05, + "loss": 2.3256, + "step": 4614500 + }, + { + "epoch": 22.86, + "learning_rate": 3.8571956917009757e-05, + "loss": 2.3182, + "step": 4615000 + }, + { + "epoch": 22.87, + "learning_rate": 3.8570718330583674e-05, + "loss": 2.3452, + "step": 4615500 + }, + { + "epoch": 22.87, + "learning_rate": 3.856947974415759e-05, + "loss": 2.3253, + "step": 4616000 + }, + { + "epoch": 22.87, + "learning_rate": 3.856824115773151e-05, + "loss": 2.3012, + "step": 4616500 + }, + { + "epoch": 22.87, + "learning_rate": 3.8567002571305424e-05, + "loss": 2.3173, + "step": 4617000 + }, + { + "epoch": 22.88, + "learning_rate": 3.856576398487934e-05, + "loss": 2.3155, + "step": 4617500 + }, + { + "epoch": 22.88, + "learning_rate": 3.856452539845326e-05, + "loss": 2.2929, + "step": 4618000 + }, + { + "epoch": 22.88, + "learning_rate": 3.8563286812027175e-05, + "loss": 2.3329, + "step": 4618500 + }, + { + "epoch": 22.88, + "learning_rate": 3.8562048225601085e-05, + "loss": 2.3319, + "step": 4619000 + }, + { + "epoch": 22.89, + "learning_rate": 3.8560809639175e-05, + "loss": 2.3235, + "step": 4619500 + }, + { + "epoch": 22.89, + "learning_rate": 3.855957600709462e-05, + "loss": 2.3148, + "step": 4620000 + }, + { + "epoch": 22.89, + "learning_rate": 3.855833742066854e-05, + "loss": 2.3293, + "step": 4620500 + }, + { + "epoch": 22.89, + "learning_rate": 3.855709883424246e-05, + "loss": 2.2721, + "step": 4621000 + }, + { + "epoch": 22.9, + "learning_rate": 3.8555860247816374e-05, + "loss": 2.3437, + "step": 4621500 + }, + { + "epoch": 22.9, + "learning_rate": 3.855462166139029e-05, + "loss": 2.3259, + "step": 4622000 + }, + { + "epoch": 22.9, + "learning_rate": 3.855338307496421e-05, + "loss": 2.3164, + "step": 4622500 + }, + { + "epoch": 22.9, + "learning_rate": 3.8552146965710976e-05, + "loss": 2.3135, + "step": 4623000 + }, + { + "epoch": 22.91, + "learning_rate": 3.8550910856457745e-05, + "loss": 2.3246, + "step": 4623500 + }, + { + "epoch": 22.91, + "learning_rate": 3.854967227003166e-05, + "loss": 2.3097, + "step": 4624000 + }, + { + "epoch": 22.91, + "learning_rate": 3.854843368360558e-05, + "loss": 2.3233, + "step": 4624500 + }, + { + "epoch": 22.91, + "learning_rate": 3.8547195097179496e-05, + "loss": 2.3195, + "step": 4625000 + }, + { + "epoch": 22.92, + "learning_rate": 3.854595651075341e-05, + "loss": 2.3101, + "step": 4625500 + }, + { + "epoch": 22.92, + "learning_rate": 3.854471792432733e-05, + "loss": 2.3006, + "step": 4626000 + }, + { + "epoch": 22.92, + "learning_rate": 3.854347933790124e-05, + "loss": 2.3205, + "step": 4626500 + }, + { + "epoch": 22.92, + "learning_rate": 3.854224075147516e-05, + "loss": 2.3168, + "step": 4627000 + }, + { + "epoch": 22.93, + "learning_rate": 3.8541002165049074e-05, + "loss": 2.3087, + "step": 4627500 + }, + { + "epoch": 22.93, + "learning_rate": 3.853976357862299e-05, + "loss": 2.3106, + "step": 4628000 + }, + { + "epoch": 22.93, + "learning_rate": 3.853852746936976e-05, + "loss": 2.3153, + "step": 4628500 + }, + { + "epoch": 22.93, + "learning_rate": 3.8537288882943676e-05, + "loss": 2.3313, + "step": 4629000 + }, + { + "epoch": 22.94, + "learning_rate": 3.8536050296517587e-05, + "loss": 2.3148, + "step": 4629500 + }, + { + "epoch": 22.94, + "learning_rate": 3.8534811710091504e-05, + "loss": 2.3088, + "step": 4630000 + }, + { + "epoch": 22.94, + "learning_rate": 3.853357312366542e-05, + "loss": 2.3086, + "step": 4630500 + }, + { + "epoch": 22.94, + "learning_rate": 3.853233453723934e-05, + "loss": 2.3283, + "step": 4631000 + }, + { + "epoch": 22.95, + "learning_rate": 3.853109842798611e-05, + "loss": 2.3124, + "step": 4631500 + }, + { + "epoch": 22.95, + "learning_rate": 3.852985984156003e-05, + "loss": 2.3007, + "step": 4632000 + }, + { + "epoch": 22.95, + "learning_rate": 3.852862125513394e-05, + "loss": 2.318, + "step": 4632500 + }, + { + "epoch": 22.95, + "learning_rate": 3.852738266870786e-05, + "loss": 2.3008, + "step": 4633000 + }, + { + "epoch": 22.96, + "learning_rate": 3.8526144082281774e-05, + "loss": 2.3385, + "step": 4633500 + }, + { + "epoch": 22.96, + "learning_rate": 3.852490549585569e-05, + "loss": 2.3005, + "step": 4634000 + }, + { + "epoch": 22.96, + "learning_rate": 3.852366690942961e-05, + "loss": 2.2903, + "step": 4634500 + }, + { + "epoch": 22.96, + "learning_rate": 3.8522428323003525e-05, + "loss": 2.3239, + "step": 4635000 + }, + { + "epoch": 22.97, + "learning_rate": 3.8521192213750294e-05, + "loss": 2.3164, + "step": 4635500 + }, + { + "epoch": 22.97, + "learning_rate": 3.8519953627324204e-05, + "loss": 2.3282, + "step": 4636000 + }, + { + "epoch": 22.97, + "learning_rate": 3.851871504089812e-05, + "loss": 2.3116, + "step": 4636500 + }, + { + "epoch": 22.97, + "learning_rate": 3.851747645447204e-05, + "loss": 2.3066, + "step": 4637000 + }, + { + "epoch": 22.98, + "learning_rate": 3.8516237868045954e-05, + "loss": 2.3298, + "step": 4637500 + }, + { + "epoch": 22.98, + "learning_rate": 3.851499928161987e-05, + "loss": 2.3011, + "step": 4638000 + }, + { + "epoch": 22.98, + "learning_rate": 3.851376069519379e-05, + "loss": 2.33, + "step": 4638500 + }, + { + "epoch": 22.98, + "learning_rate": 3.851252458594056e-05, + "loss": 2.2978, + "step": 4639000 + }, + { + "epoch": 22.99, + "learning_rate": 3.8511285999514474e-05, + "loss": 2.3144, + "step": 4639500 + }, + { + "epoch": 22.99, + "learning_rate": 3.851004741308839e-05, + "loss": 2.3008, + "step": 4640000 + }, + { + "epoch": 22.99, + "learning_rate": 3.850880882666231e-05, + "loss": 2.3002, + "step": 4640500 + }, + { + "epoch": 22.99, + "learning_rate": 3.8507570240236225e-05, + "loss": 2.3052, + "step": 4641000 + }, + { + "epoch": 23.0, + "learning_rate": 3.850633165381014e-05, + "loss": 2.3447, + "step": 4641500 + }, + { + "epoch": 23.0, + "learning_rate": 3.850509306738406e-05, + "loss": 2.2959, + "step": 4642000 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.6535051236946925, + "eval_accuracy_mlm": 0.6085347615669268, + "eval_accuracy_nsp": 0.8653273663608658, + "eval_loss": 2.3427975177764893, + "eval_runtime": 145.7935, + "eval_samples_per_second": 1748.768, + "eval_steps_per_second": 72.87, + "step": 4642389 + }, + { + "epoch": 23.0, + "learning_rate": 3.8503854480957976e-05, + "loss": 2.2864, + "step": 4642500 + }, + { + "epoch": 23.0, + "learning_rate": 3.850261589453189e-05, + "loss": 2.2797, + "step": 4643000 + }, + { + "epoch": 23.01, + "learning_rate": 3.850137730810581e-05, + "loss": 2.2687, + "step": 4643500 + }, + { + "epoch": 23.01, + "learning_rate": 3.8500138721679726e-05, + "loss": 2.2998, + "step": 4644000 + }, + { + "epoch": 23.01, + "learning_rate": 3.849890261242649e-05, + "loss": 2.2772, + "step": 4644500 + }, + { + "epoch": 23.01, + "learning_rate": 3.8497664026000405e-05, + "loss": 2.2775, + "step": 4645000 + }, + { + "epoch": 23.02, + "learning_rate": 3.849642543957432e-05, + "loss": 2.2837, + "step": 4645500 + }, + { + "epoch": 23.02, + "learning_rate": 3.849518685314824e-05, + "loss": 2.2925, + "step": 4646000 + }, + { + "epoch": 23.02, + "learning_rate": 3.849395074389501e-05, + "loss": 2.2727, + "step": 4646500 + }, + { + "epoch": 23.02, + "learning_rate": 3.8492712157468925e-05, + "loss": 2.2708, + "step": 4647000 + }, + { + "epoch": 23.03, + "learning_rate": 3.849147357104284e-05, + "loss": 2.2694, + "step": 4647500 + }, + { + "epoch": 23.03, + "learning_rate": 3.849023498461676e-05, + "loss": 2.2986, + "step": 4648000 + }, + { + "epoch": 23.03, + "learning_rate": 3.8488996398190676e-05, + "loss": 2.2675, + "step": 4648500 + }, + { + "epoch": 23.03, + "learning_rate": 3.848775781176459e-05, + "loss": 2.2932, + "step": 4649000 + }, + { + "epoch": 23.04, + "learning_rate": 3.848651922533851e-05, + "loss": 2.2778, + "step": 4649500 + }, + { + "epoch": 23.04, + "learning_rate": 3.8485280638912427e-05, + "loss": 2.2582, + "step": 4650000 + }, + { + "epoch": 23.04, + "learning_rate": 3.848404452965919e-05, + "loss": 2.2853, + "step": 4650500 + }, + { + "epoch": 23.04, + "learning_rate": 3.8482805943233105e-05, + "loss": 2.2806, + "step": 4651000 + }, + { + "epoch": 23.05, + "learning_rate": 3.848156735680702e-05, + "loss": 2.3017, + "step": 4651500 + }, + { + "epoch": 23.05, + "learning_rate": 3.848032877038094e-05, + "loss": 2.2922, + "step": 4652000 + }, + { + "epoch": 23.05, + "learning_rate": 3.8479090183954856e-05, + "loss": 2.2852, + "step": 4652500 + }, + { + "epoch": 23.05, + "learning_rate": 3.847785159752877e-05, + "loss": 2.2834, + "step": 4653000 + }, + { + "epoch": 23.06, + "learning_rate": 3.847661301110269e-05, + "loss": 2.2794, + "step": 4653500 + }, + { + "epoch": 23.06, + "learning_rate": 3.847537690184946e-05, + "loss": 2.2736, + "step": 4654000 + }, + { + "epoch": 23.06, + "learning_rate": 3.8474138315423376e-05, + "loss": 2.2817, + "step": 4654500 + }, + { + "epoch": 23.06, + "learning_rate": 3.847289972899729e-05, + "loss": 2.3046, + "step": 4655000 + }, + { + "epoch": 23.06, + "learning_rate": 3.847166114257121e-05, + "loss": 2.2827, + "step": 4655500 + }, + { + "epoch": 23.07, + "learning_rate": 3.847042255614513e-05, + "loss": 2.3029, + "step": 4656000 + }, + { + "epoch": 23.07, + "learning_rate": 3.8469183969719044e-05, + "loss": 2.2855, + "step": 4656500 + }, + { + "epoch": 23.07, + "learning_rate": 3.846794538329296e-05, + "loss": 2.2797, + "step": 4657000 + }, + { + "epoch": 23.07, + "learning_rate": 3.846670927403972e-05, + "loss": 2.269, + "step": 4657500 + }, + { + "epoch": 23.08, + "learning_rate": 3.846547068761364e-05, + "loss": 2.3017, + "step": 4658000 + }, + { + "epoch": 23.08, + "learning_rate": 3.8464232101187556e-05, + "loss": 2.3036, + "step": 4658500 + }, + { + "epoch": 23.08, + "learning_rate": 3.846299351476147e-05, + "loss": 2.2968, + "step": 4659000 + }, + { + "epoch": 23.08, + "learning_rate": 3.846175492833539e-05, + "loss": 2.2592, + "step": 4659500 + }, + { + "epoch": 23.09, + "learning_rate": 3.846051634190931e-05, + "loss": 2.2923, + "step": 4660000 + }, + { + "epoch": 23.09, + "learning_rate": 3.8459277755483224e-05, + "loss": 2.2877, + "step": 4660500 + }, + { + "epoch": 23.09, + "learning_rate": 3.845804164622999e-05, + "loss": 2.2687, + "step": 4661000 + }, + { + "epoch": 23.09, + "learning_rate": 3.845680305980391e-05, + "loss": 2.2669, + "step": 4661500 + }, + { + "epoch": 23.1, + "learning_rate": 3.845556447337783e-05, + "loss": 2.2845, + "step": 4662000 + }, + { + "epoch": 23.1, + "learning_rate": 3.8454325886951744e-05, + "loss": 2.2866, + "step": 4662500 + }, + { + "epoch": 23.1, + "learning_rate": 3.845308730052566e-05, + "loss": 2.3037, + "step": 4663000 + }, + { + "epoch": 23.1, + "learning_rate": 3.845184871409958e-05, + "loss": 2.2609, + "step": 4663500 + }, + { + "epoch": 23.11, + "learning_rate": 3.8450610127673494e-05, + "loss": 2.2816, + "step": 4664000 + }, + { + "epoch": 23.11, + "learning_rate": 3.844937154124741e-05, + "loss": 2.2791, + "step": 4664500 + }, + { + "epoch": 23.11, + "learning_rate": 3.8448135431994173e-05, + "loss": 2.3103, + "step": 4665000 + }, + { + "epoch": 23.11, + "learning_rate": 3.844689932274094e-05, + "loss": 2.2907, + "step": 4665500 + }, + { + "epoch": 23.12, + "learning_rate": 3.844566073631486e-05, + "loss": 2.296, + "step": 4666000 + }, + { + "epoch": 23.12, + "learning_rate": 3.8444422149888776e-05, + "loss": 2.2811, + "step": 4666500 + }, + { + "epoch": 23.12, + "learning_rate": 3.844318356346269e-05, + "loss": 2.2881, + "step": 4667000 + }, + { + "epoch": 23.12, + "learning_rate": 3.844194497703661e-05, + "loss": 2.2979, + "step": 4667500 + }, + { + "epoch": 23.13, + "learning_rate": 3.844070886778338e-05, + "loss": 2.3004, + "step": 4668000 + }, + { + "epoch": 23.13, + "learning_rate": 3.8439470281357296e-05, + "loss": 2.2805, + "step": 4668500 + }, + { + "epoch": 23.13, + "learning_rate": 3.843823169493121e-05, + "loss": 2.2995, + "step": 4669000 + }, + { + "epoch": 23.13, + "learning_rate": 3.843699310850512e-05, + "loss": 2.2929, + "step": 4669500 + }, + { + "epoch": 23.14, + "learning_rate": 3.843575452207904e-05, + "loss": 2.3062, + "step": 4670000 + }, + { + "epoch": 23.14, + "learning_rate": 3.843451593565296e-05, + "loss": 2.3233, + "step": 4670500 + }, + { + "epoch": 23.14, + "learning_rate": 3.8433277349226874e-05, + "loss": 2.3191, + "step": 4671000 + }, + { + "epoch": 23.14, + "learning_rate": 3.843203876280079e-05, + "loss": 2.2908, + "step": 4671500 + }, + { + "epoch": 23.15, + "learning_rate": 3.843080265354756e-05, + "loss": 2.267, + "step": 4672000 + }, + { + "epoch": 23.15, + "learning_rate": 3.842956902146718e-05, + "loss": 2.2694, + "step": 4672500 + }, + { + "epoch": 23.15, + "learning_rate": 3.84283304350411e-05, + "loss": 2.2964, + "step": 4673000 + }, + { + "epoch": 23.15, + "learning_rate": 3.8427091848615014e-05, + "loss": 2.3116, + "step": 4673500 + }, + { + "epoch": 23.16, + "learning_rate": 3.842585573936178e-05, + "loss": 2.2689, + "step": 4674000 + }, + { + "epoch": 23.16, + "learning_rate": 3.84246171529357e-05, + "loss": 2.2976, + "step": 4674500 + }, + { + "epoch": 23.16, + "learning_rate": 3.8423378566509617e-05, + "loss": 2.3003, + "step": 4675000 + }, + { + "epoch": 23.16, + "learning_rate": 3.8422139980083533e-05, + "loss": 2.3213, + "step": 4675500 + }, + { + "epoch": 23.17, + "learning_rate": 3.842090139365745e-05, + "loss": 2.287, + "step": 4676000 + }, + { + "epoch": 23.17, + "learning_rate": 3.841966280723137e-05, + "loss": 2.303, + "step": 4676500 + }, + { + "epoch": 23.17, + "learning_rate": 3.8418424220805284e-05, + "loss": 2.2827, + "step": 4677000 + }, + { + "epoch": 23.17, + "learning_rate": 3.84171856343792e-05, + "loss": 2.2881, + "step": 4677500 + }, + { + "epoch": 23.18, + "learning_rate": 3.841594704795312e-05, + "loss": 2.3183, + "step": 4678000 + }, + { + "epoch": 23.18, + "learning_rate": 3.8414708461527035e-05, + "loss": 2.3166, + "step": 4678500 + }, + { + "epoch": 23.18, + "learning_rate": 3.841346987510095e-05, + "loss": 2.3065, + "step": 4679000 + }, + { + "epoch": 23.18, + "learning_rate": 3.841223128867486e-05, + "loss": 2.278, + "step": 4679500 + }, + { + "epoch": 23.19, + "learning_rate": 3.841099270224878e-05, + "loss": 2.3138, + "step": 4680000 + }, + { + "epoch": 23.19, + "learning_rate": 3.8409754115822696e-05, + "loss": 2.3044, + "step": 4680500 + }, + { + "epoch": 23.19, + "learning_rate": 3.840851552939661e-05, + "loss": 2.2684, + "step": 4681000 + }, + { + "epoch": 23.19, + "learning_rate": 3.840727694297053e-05, + "loss": 2.293, + "step": 4681500 + }, + { + "epoch": 23.2, + "learning_rate": 3.840603835654445e-05, + "loss": 2.3256, + "step": 4682000 + }, + { + "epoch": 23.2, + "learning_rate": 3.840480224729121e-05, + "loss": 2.2807, + "step": 4682500 + }, + { + "epoch": 23.2, + "learning_rate": 3.8403563660865126e-05, + "loss": 2.2796, + "step": 4683000 + }, + { + "epoch": 23.2, + "learning_rate": 3.840232507443904e-05, + "loss": 2.3154, + "step": 4683500 + }, + { + "epoch": 23.21, + "learning_rate": 3.840108648801296e-05, + "loss": 2.2889, + "step": 4684000 + }, + { + "epoch": 23.21, + "learning_rate": 3.8399847901586876e-05, + "loss": 2.2843, + "step": 4684500 + }, + { + "epoch": 23.21, + "learning_rate": 3.839861179233365e-05, + "loss": 2.2907, + "step": 4685000 + }, + { + "epoch": 23.21, + "learning_rate": 3.839737320590757e-05, + "loss": 2.3155, + "step": 4685500 + }, + { + "epoch": 23.22, + "learning_rate": 3.839613461948148e-05, + "loss": 2.3057, + "step": 4686000 + }, + { + "epoch": 23.22, + "learning_rate": 3.8394896033055396e-05, + "loss": 2.2936, + "step": 4686500 + }, + { + "epoch": 23.22, + "learning_rate": 3.839365744662931e-05, + "loss": 2.3158, + "step": 4687000 + }, + { + "epoch": 23.22, + "learning_rate": 3.839241886020323e-05, + "loss": 2.2809, + "step": 4687500 + }, + { + "epoch": 23.23, + "learning_rate": 3.839118027377715e-05, + "loss": 2.2988, + "step": 4688000 + }, + { + "epoch": 23.23, + "learning_rate": 3.8389941687351064e-05, + "loss": 2.2952, + "step": 4688500 + }, + { + "epoch": 23.23, + "learning_rate": 3.838870310092498e-05, + "loss": 2.2915, + "step": 4689000 + }, + { + "epoch": 23.23, + "learning_rate": 3.83874645144989e-05, + "loss": 2.2737, + "step": 4689500 + }, + { + "epoch": 23.24, + "learning_rate": 3.838622592807281e-05, + "loss": 2.2757, + "step": 4690000 + }, + { + "epoch": 23.24, + "learning_rate": 3.8384989818819577e-05, + "loss": 2.3108, + "step": 4690500 + }, + { + "epoch": 23.24, + "learning_rate": 3.838375370956635e-05, + "loss": 2.3196, + "step": 4691000 + }, + { + "epoch": 23.24, + "learning_rate": 3.838251512314027e-05, + "loss": 2.2907, + "step": 4691500 + }, + { + "epoch": 23.25, + "learning_rate": 3.838127653671418e-05, + "loss": 2.2718, + "step": 4692000 + }, + { + "epoch": 23.25, + "learning_rate": 3.8380037950288096e-05, + "loss": 2.2993, + "step": 4692500 + }, + { + "epoch": 23.25, + "learning_rate": 3.8378801841034865e-05, + "loss": 2.3027, + "step": 4693000 + }, + { + "epoch": 23.25, + "learning_rate": 3.837756325460878e-05, + "loss": 2.29, + "step": 4693500 + }, + { + "epoch": 23.26, + "learning_rate": 3.83763246681827e-05, + "loss": 2.2988, + "step": 4694000 + }, + { + "epoch": 23.26, + "learning_rate": 3.8375086081756616e-05, + "loss": 2.2946, + "step": 4694500 + }, + { + "epoch": 23.26, + "learning_rate": 3.8373847495330526e-05, + "loss": 2.3021, + "step": 4695000 + }, + { + "epoch": 23.26, + "learning_rate": 3.837260890890444e-05, + "loss": 2.2775, + "step": 4695500 + }, + { + "epoch": 23.27, + "learning_rate": 3.837137032247836e-05, + "loss": 2.2802, + "step": 4696000 + }, + { + "epoch": 23.27, + "learning_rate": 3.837013173605228e-05, + "loss": 2.3041, + "step": 4696500 + }, + { + "epoch": 23.27, + "learning_rate": 3.8368893149626194e-05, + "loss": 2.2622, + "step": 4697000 + }, + { + "epoch": 23.27, + "learning_rate": 3.836765704037297e-05, + "loss": 2.305, + "step": 4697500 + }, + { + "epoch": 23.28, + "learning_rate": 3.8366418453946886e-05, + "loss": 2.3045, + "step": 4698000 + }, + { + "epoch": 23.28, + "learning_rate": 3.836518234469365e-05, + "loss": 2.282, + "step": 4698500 + }, + { + "epoch": 23.28, + "learning_rate": 3.8363943758267565e-05, + "loss": 2.2845, + "step": 4699000 + }, + { + "epoch": 23.28, + "learning_rate": 3.836270517184148e-05, + "loss": 2.2755, + "step": 4699500 + }, + { + "epoch": 23.29, + "learning_rate": 3.83614665854154e-05, + "loss": 2.2895, + "step": 4700000 + }, + { + "epoch": 23.29, + "learning_rate": 3.8360227998989316e-05, + "loss": 2.2926, + "step": 4700500 + }, + { + "epoch": 23.29, + "learning_rate": 3.835898941256323e-05, + "loss": 2.2743, + "step": 4701000 + }, + { + "epoch": 23.29, + "learning_rate": 3.835775330331e-05, + "loss": 2.2907, + "step": 4701500 + }, + { + "epoch": 23.3, + "learning_rate": 3.835651471688392e-05, + "loss": 2.2967, + "step": 4702000 + }, + { + "epoch": 23.3, + "learning_rate": 3.8355276130457836e-05, + "loss": 2.3121, + "step": 4702500 + }, + { + "epoch": 23.3, + "learning_rate": 3.835403754403175e-05, + "loss": 2.3018, + "step": 4703000 + }, + { + "epoch": 23.3, + "learning_rate": 3.835279895760567e-05, + "loss": 2.2903, + "step": 4703500 + }, + { + "epoch": 23.31, + "learning_rate": 3.8351560371179586e-05, + "loss": 2.3101, + "step": 4704000 + }, + { + "epoch": 23.31, + "learning_rate": 3.8350321784753496e-05, + "loss": 2.2976, + "step": 4704500 + }, + { + "epoch": 23.31, + "learning_rate": 3.8349085675500265e-05, + "loss": 2.2819, + "step": 4705000 + }, + { + "epoch": 23.31, + "learning_rate": 3.834784708907418e-05, + "loss": 2.2727, + "step": 4705500 + }, + { + "epoch": 23.32, + "learning_rate": 3.834661097982095e-05, + "loss": 2.2911, + "step": 4706000 + }, + { + "epoch": 23.32, + "learning_rate": 3.834537239339487e-05, + "loss": 2.3118, + "step": 4706500 + }, + { + "epoch": 23.32, + "learning_rate": 3.8344133806968785e-05, + "loss": 2.3047, + "step": 4707000 + }, + { + "epoch": 23.32, + "learning_rate": 3.83428952205427e-05, + "loss": 2.3097, + "step": 4707500 + }, + { + "epoch": 23.33, + "learning_rate": 3.834165663411662e-05, + "loss": 2.2727, + "step": 4708000 + }, + { + "epoch": 23.33, + "learning_rate": 3.8340418047690536e-05, + "loss": 2.313, + "step": 4708500 + }, + { + "epoch": 23.33, + "learning_rate": 3.833917946126445e-05, + "loss": 2.295, + "step": 4709000 + }, + { + "epoch": 23.33, + "learning_rate": 3.833794087483837e-05, + "loss": 2.3, + "step": 4709500 + }, + { + "epoch": 23.33, + "learning_rate": 3.8336702288412286e-05, + "loss": 2.287, + "step": 4710000 + }, + { + "epoch": 23.34, + "learning_rate": 3.83354637019862e-05, + "loss": 2.2963, + "step": 4710500 + }, + { + "epoch": 23.34, + "learning_rate": 3.8334227592732965e-05, + "loss": 2.2942, + "step": 4711000 + }, + { + "epoch": 23.34, + "learning_rate": 3.833298900630688e-05, + "loss": 2.2964, + "step": 4711500 + }, + { + "epoch": 23.34, + "learning_rate": 3.83317504198808e-05, + "loss": 2.3011, + "step": 4712000 + }, + { + "epoch": 23.35, + "learning_rate": 3.8330511833454716e-05, + "loss": 2.2922, + "step": 4712500 + }, + { + "epoch": 23.35, + "learning_rate": 3.832927324702863e-05, + "loss": 2.2892, + "step": 4713000 + }, + { + "epoch": 23.35, + "learning_rate": 3.832803466060255e-05, + "loss": 2.3098, + "step": 4713500 + }, + { + "epoch": 23.35, + "learning_rate": 3.832679607417646e-05, + "loss": 2.2946, + "step": 4714000 + }, + { + "epoch": 23.36, + "learning_rate": 3.8325559964923236e-05, + "loss": 2.2958, + "step": 4714500 + }, + { + "epoch": 23.36, + "learning_rate": 3.832432137849715e-05, + "loss": 2.3095, + "step": 4715000 + }, + { + "epoch": 23.36, + "learning_rate": 3.832308279207107e-05, + "loss": 2.2891, + "step": 4715500 + }, + { + "epoch": 23.36, + "learning_rate": 3.8321844205644987e-05, + "loss": 2.2835, + "step": 4716000 + }, + { + "epoch": 23.37, + "learning_rate": 3.8320605619218903e-05, + "loss": 2.3211, + "step": 4716500 + }, + { + "epoch": 23.37, + "learning_rate": 3.831936950996567e-05, + "loss": 2.2844, + "step": 4717000 + }, + { + "epoch": 23.37, + "learning_rate": 3.831813092353958e-05, + "loss": 2.298, + "step": 4717500 + }, + { + "epoch": 23.37, + "learning_rate": 3.83168923371135e-05, + "loss": 2.2937, + "step": 4718000 + }, + { + "epoch": 23.38, + "learning_rate": 3.8315653750687416e-05, + "loss": 2.2833, + "step": 4718500 + }, + { + "epoch": 23.38, + "learning_rate": 3.831441516426133e-05, + "loss": 2.2817, + "step": 4719000 + }, + { + "epoch": 23.38, + "learning_rate": 3.831317657783525e-05, + "loss": 2.2641, + "step": 4719500 + }, + { + "epoch": 23.38, + "learning_rate": 3.831193799140916e-05, + "loss": 2.3006, + "step": 4720000 + }, + { + "epoch": 23.39, + "learning_rate": 3.8310701882155936e-05, + "loss": 2.3117, + "step": 4720500 + }, + { + "epoch": 23.39, + "learning_rate": 3.830946329572985e-05, + "loss": 2.2952, + "step": 4721000 + }, + { + "epoch": 23.39, + "learning_rate": 3.830822470930377e-05, + "loss": 2.2941, + "step": 4721500 + }, + { + "epoch": 23.39, + "learning_rate": 3.830698612287769e-05, + "loss": 2.303, + "step": 4722000 + }, + { + "epoch": 23.4, + "learning_rate": 3.8305747536451604e-05, + "loss": 2.2916, + "step": 4722500 + }, + { + "epoch": 23.4, + "learning_rate": 3.830451142719837e-05, + "loss": 2.2864, + "step": 4723000 + }, + { + "epoch": 23.4, + "learning_rate": 3.830327284077229e-05, + "loss": 2.2994, + "step": 4723500 + }, + { + "epoch": 23.4, + "learning_rate": 3.8302034254346206e-05, + "loss": 2.3089, + "step": 4724000 + }, + { + "epoch": 23.41, + "learning_rate": 3.830079814509297e-05, + "loss": 2.3033, + "step": 4724500 + }, + { + "epoch": 23.41, + "learning_rate": 3.8299559558666885e-05, + "loss": 2.2952, + "step": 4725000 + }, + { + "epoch": 23.41, + "learning_rate": 3.82983209722408e-05, + "loss": 2.3225, + "step": 4725500 + }, + { + "epoch": 23.41, + "learning_rate": 3.829708238581472e-05, + "loss": 2.2881, + "step": 4726000 + }, + { + "epoch": 23.42, + "learning_rate": 3.8295843799388636e-05, + "loss": 2.2867, + "step": 4726500 + }, + { + "epoch": 23.42, + "learning_rate": 3.829460521296255e-05, + "loss": 2.2851, + "step": 4727000 + }, + { + "epoch": 23.42, + "learning_rate": 3.829336662653647e-05, + "loss": 2.2913, + "step": 4727500 + }, + { + "epoch": 23.42, + "learning_rate": 3.829212804011039e-05, + "loss": 2.2991, + "step": 4728000 + }, + { + "epoch": 23.43, + "learning_rate": 3.8290889453684304e-05, + "loss": 2.2934, + "step": 4728500 + }, + { + "epoch": 23.43, + "learning_rate": 3.828965086725822e-05, + "loss": 2.2761, + "step": 4729000 + }, + { + "epoch": 23.43, + "learning_rate": 3.828841228083213e-05, + "loss": 2.3159, + "step": 4729500 + }, + { + "epoch": 23.43, + "learning_rate": 3.828717369440605e-05, + "loss": 2.3028, + "step": 4730000 + }, + { + "epoch": 23.44, + "learning_rate": 3.8285935107979965e-05, + "loss": 2.3088, + "step": 4730500 + }, + { + "epoch": 23.44, + "learning_rate": 3.828469652155388e-05, + "loss": 2.2843, + "step": 4731000 + }, + { + "epoch": 23.44, + "learning_rate": 3.828346041230065e-05, + "loss": 2.3045, + "step": 4731500 + }, + { + "epoch": 23.44, + "learning_rate": 3.828222182587457e-05, + "loss": 2.2686, + "step": 4732000 + }, + { + "epoch": 23.45, + "learning_rate": 3.8280983239448484e-05, + "loss": 2.2848, + "step": 4732500 + }, + { + "epoch": 23.45, + "learning_rate": 3.82797446530224e-05, + "loss": 2.2904, + "step": 4733000 + }, + { + "epoch": 23.45, + "learning_rate": 3.827850854376917e-05, + "loss": 2.3025, + "step": 4733500 + }, + { + "epoch": 23.45, + "learning_rate": 3.827726995734309e-05, + "loss": 2.312, + "step": 4734000 + }, + { + "epoch": 23.46, + "learning_rate": 3.8276031370917004e-05, + "loss": 2.2943, + "step": 4734500 + }, + { + "epoch": 23.46, + "learning_rate": 3.827479526166377e-05, + "loss": 2.2959, + "step": 4735000 + }, + { + "epoch": 23.46, + "learning_rate": 3.8273559152410535e-05, + "loss": 2.284, + "step": 4735500 + }, + { + "epoch": 23.46, + "learning_rate": 3.827232056598445e-05, + "loss": 2.2884, + "step": 4736000 + }, + { + "epoch": 23.47, + "learning_rate": 3.827108197955837e-05, + "loss": 2.3079, + "step": 4736500 + }, + { + "epoch": 23.47, + "learning_rate": 3.8269843393132285e-05, + "loss": 2.2866, + "step": 4737000 + }, + { + "epoch": 23.47, + "learning_rate": 3.82686048067062e-05, + "loss": 2.283, + "step": 4737500 + }, + { + "epoch": 23.47, + "learning_rate": 3.826736622028012e-05, + "loss": 2.3011, + "step": 4738000 + }, + { + "epoch": 23.48, + "learning_rate": 3.8266127633854036e-05, + "loss": 2.2811, + "step": 4738500 + }, + { + "epoch": 23.48, + "learning_rate": 3.826488904742795e-05, + "loss": 2.2937, + "step": 4739000 + }, + { + "epoch": 23.48, + "learning_rate": 3.826365046100187e-05, + "loss": 2.2939, + "step": 4739500 + }, + { + "epoch": 23.48, + "learning_rate": 3.826241187457579e-05, + "loss": 2.3019, + "step": 4740000 + }, + { + "epoch": 23.49, + "learning_rate": 3.8261173288149704e-05, + "loss": 2.3265, + "step": 4740500 + }, + { + "epoch": 23.49, + "learning_rate": 3.825993470172362e-05, + "loss": 2.2916, + "step": 4741000 + }, + { + "epoch": 23.49, + "learning_rate": 3.825869611529754e-05, + "loss": 2.2738, + "step": 4741500 + }, + { + "epoch": 23.49, + "learning_rate": 3.825746000604431e-05, + "loss": 2.2937, + "step": 4742000 + }, + { + "epoch": 23.5, + "learning_rate": 3.8256221419618224e-05, + "loss": 2.2827, + "step": 4742500 + }, + { + "epoch": 23.5, + "learning_rate": 3.825498283319214e-05, + "loss": 2.2835, + "step": 4743000 + }, + { + "epoch": 23.5, + "learning_rate": 3.825374424676606e-05, + "loss": 2.3097, + "step": 4743500 + }, + { + "epoch": 23.5, + "learning_rate": 3.8252505660339974e-05, + "loss": 2.3038, + "step": 4744000 + }, + { + "epoch": 23.51, + "learning_rate": 3.8251267073913884e-05, + "loss": 2.3029, + "step": 4744500 + }, + { + "epoch": 23.51, + "learning_rate": 3.82500284874878e-05, + "loss": 2.327, + "step": 4745000 + }, + { + "epoch": 23.51, + "learning_rate": 3.824878990106172e-05, + "loss": 2.3, + "step": 4745500 + }, + { + "epoch": 23.51, + "learning_rate": 3.8247551314635635e-05, + "loss": 2.2879, + "step": 4746000 + }, + { + "epoch": 23.52, + "learning_rate": 3.8246315205382404e-05, + "loss": 2.2921, + "step": 4746500 + }, + { + "epoch": 23.52, + "learning_rate": 3.824507661895632e-05, + "loss": 2.2748, + "step": 4747000 + }, + { + "epoch": 23.52, + "learning_rate": 3.824383803253024e-05, + "loss": 2.26, + "step": 4747500 + }, + { + "epoch": 23.52, + "learning_rate": 3.8242599446104155e-05, + "loss": 2.3142, + "step": 4748000 + }, + { + "epoch": 23.53, + "learning_rate": 3.8241360859678065e-05, + "loss": 2.2673, + "step": 4748500 + }, + { + "epoch": 23.53, + "learning_rate": 3.824012475042484e-05, + "loss": 2.2915, + "step": 4749000 + }, + { + "epoch": 23.53, + "learning_rate": 3.823888616399876e-05, + "loss": 2.2943, + "step": 4749500 + }, + { + "epoch": 23.53, + "learning_rate": 3.8237647577572674e-05, + "loss": 2.2901, + "step": 4750000 + }, + { + "epoch": 23.54, + "learning_rate": 3.823640899114659e-05, + "loss": 2.3014, + "step": 4750500 + }, + { + "epoch": 23.54, + "learning_rate": 3.8235172881893353e-05, + "loss": 2.3051, + "step": 4751000 + }, + { + "epoch": 23.54, + "learning_rate": 3.823393429546727e-05, + "loss": 2.3029, + "step": 4751500 + }, + { + "epoch": 23.54, + "learning_rate": 3.823269570904119e-05, + "loss": 2.3158, + "step": 4752000 + }, + { + "epoch": 23.55, + "learning_rate": 3.8231459599787956e-05, + "loss": 2.3026, + "step": 4752500 + }, + { + "epoch": 23.55, + "learning_rate": 3.823022101336187e-05, + "loss": 2.2962, + "step": 4753000 + }, + { + "epoch": 23.55, + "learning_rate": 3.822898242693579e-05, + "loss": 2.3032, + "step": 4753500 + }, + { + "epoch": 23.55, + "learning_rate": 3.822774384050971e-05, + "loss": 2.2858, + "step": 4754000 + }, + { + "epoch": 23.56, + "learning_rate": 3.8226505254083624e-05, + "loss": 2.312, + "step": 4754500 + }, + { + "epoch": 23.56, + "learning_rate": 3.8225269144830386e-05, + "loss": 2.3003, + "step": 4755000 + }, + { + "epoch": 23.56, + "learning_rate": 3.82240305584043e-05, + "loss": 2.3145, + "step": 4755500 + }, + { + "epoch": 23.56, + "learning_rate": 3.822279197197822e-05, + "loss": 2.2898, + "step": 4756000 + }, + { + "epoch": 23.57, + "learning_rate": 3.822155338555214e-05, + "loss": 2.2888, + "step": 4756500 + }, + { + "epoch": 23.57, + "learning_rate": 3.8220314799126054e-05, + "loss": 2.3015, + "step": 4757000 + }, + { + "epoch": 23.57, + "learning_rate": 3.821907621269997e-05, + "loss": 2.2855, + "step": 4757500 + }, + { + "epoch": 23.57, + "learning_rate": 3.821784010344674e-05, + "loss": 2.3261, + "step": 4758000 + }, + { + "epoch": 23.58, + "learning_rate": 3.8216601517020656e-05, + "loss": 2.2987, + "step": 4758500 + }, + { + "epoch": 23.58, + "learning_rate": 3.821536293059457e-05, + "loss": 2.2882, + "step": 4759000 + }, + { + "epoch": 23.58, + "learning_rate": 3.8214129298514194e-05, + "loss": 2.3058, + "step": 4759500 + }, + { + "epoch": 23.58, + "learning_rate": 3.821289071208811e-05, + "loss": 2.3102, + "step": 4760000 + }, + { + "epoch": 23.59, + "learning_rate": 3.821165460283488e-05, + "loss": 2.3256, + "step": 4760500 + }, + { + "epoch": 23.59, + "learning_rate": 3.8210416016408796e-05, + "loss": 2.3053, + "step": 4761000 + }, + { + "epoch": 23.59, + "learning_rate": 3.8209177429982713e-05, + "loss": 2.3171, + "step": 4761500 + }, + { + "epoch": 23.59, + "learning_rate": 3.820793884355663e-05, + "loss": 2.3362, + "step": 4762000 + }, + { + "epoch": 23.6, + "learning_rate": 3.820670025713055e-05, + "loss": 2.3121, + "step": 4762500 + }, + { + "epoch": 23.6, + "learning_rate": 3.8205461670704464e-05, + "loss": 2.3041, + "step": 4763000 + }, + { + "epoch": 23.6, + "learning_rate": 3.820422308427838e-05, + "loss": 2.2931, + "step": 4763500 + }, + { + "epoch": 23.6, + "learning_rate": 3.82029844978523e-05, + "loss": 2.3172, + "step": 4764000 + }, + { + "epoch": 23.6, + "learning_rate": 3.820174591142621e-05, + "loss": 2.3078, + "step": 4764500 + }, + { + "epoch": 23.61, + "learning_rate": 3.8200507325000125e-05, + "loss": 2.2917, + "step": 4765000 + }, + { + "epoch": 23.61, + "learning_rate": 3.819926873857404e-05, + "loss": 2.3073, + "step": 4765500 + }, + { + "epoch": 23.61, + "learning_rate": 3.819803015214796e-05, + "loss": 2.2881, + "step": 4766000 + }, + { + "epoch": 23.61, + "learning_rate": 3.8196791565721876e-05, + "loss": 2.335, + "step": 4766500 + }, + { + "epoch": 23.62, + "learning_rate": 3.819555297929579e-05, + "loss": 2.2791, + "step": 4767000 + }, + { + "epoch": 23.62, + "learning_rate": 3.819431439286971e-05, + "loss": 2.2827, + "step": 4767500 + }, + { + "epoch": 23.62, + "learning_rate": 3.819307580644363e-05, + "loss": 2.3115, + "step": 4768000 + }, + { + "epoch": 23.62, + "learning_rate": 3.819183722001754e-05, + "loss": 2.3083, + "step": 4768500 + }, + { + "epoch": 23.63, + "learning_rate": 3.8190598633591454e-05, + "loss": 2.3059, + "step": 4769000 + }, + { + "epoch": 23.63, + "learning_rate": 3.818936252433822e-05, + "loss": 2.2958, + "step": 4769500 + }, + { + "epoch": 23.63, + "learning_rate": 3.8188126415085e-05, + "loss": 2.3038, + "step": 4770000 + }, + { + "epoch": 23.63, + "learning_rate": 3.8186887828658915e-05, + "loss": 2.3175, + "step": 4770500 + }, + { + "epoch": 23.64, + "learning_rate": 3.8185649242232825e-05, + "loss": 2.2936, + "step": 4771000 + }, + { + "epoch": 23.64, + "learning_rate": 3.818441065580674e-05, + "loss": 2.2973, + "step": 4771500 + }, + { + "epoch": 23.64, + "learning_rate": 3.818317206938066e-05, + "loss": 2.2993, + "step": 4772000 + }, + { + "epoch": 23.64, + "learning_rate": 3.8181933482954576e-05, + "loss": 2.3115, + "step": 4772500 + }, + { + "epoch": 23.65, + "learning_rate": 3.818069489652849e-05, + "loss": 2.3108, + "step": 4773000 + }, + { + "epoch": 23.65, + "learning_rate": 3.817945631010241e-05, + "loss": 2.3075, + "step": 4773500 + }, + { + "epoch": 23.65, + "learning_rate": 3.817821772367633e-05, + "loss": 2.2759, + "step": 4774000 + }, + { + "epoch": 23.65, + "learning_rate": 3.817698409159595e-05, + "loss": 2.3221, + "step": 4774500 + }, + { + "epoch": 23.66, + "learning_rate": 3.8175747982342716e-05, + "loss": 2.2834, + "step": 4775000 + }, + { + "epoch": 23.66, + "learning_rate": 3.817450939591663e-05, + "loss": 2.2967, + "step": 4775500 + }, + { + "epoch": 23.66, + "learning_rate": 3.8173270809490543e-05, + "loss": 2.2973, + "step": 4776000 + }, + { + "epoch": 23.66, + "learning_rate": 3.817203222306446e-05, + "loss": 2.2763, + "step": 4776500 + }, + { + "epoch": 23.67, + "learning_rate": 3.817079611381123e-05, + "loss": 2.2766, + "step": 4777000 + }, + { + "epoch": 23.67, + "learning_rate": 3.8169557527385146e-05, + "loss": 2.3263, + "step": 4777500 + }, + { + "epoch": 23.67, + "learning_rate": 3.816831894095906e-05, + "loss": 2.2982, + "step": 4778000 + }, + { + "epoch": 23.67, + "learning_rate": 3.816708035453298e-05, + "loss": 2.3112, + "step": 4778500 + }, + { + "epoch": 23.68, + "learning_rate": 3.81658417681069e-05, + "loss": 2.3014, + "step": 4779000 + }, + { + "epoch": 23.68, + "learning_rate": 3.8164603181680814e-05, + "loss": 2.286, + "step": 4779500 + }, + { + "epoch": 23.68, + "learning_rate": 3.816336459525473e-05, + "loss": 2.3127, + "step": 4780000 + }, + { + "epoch": 23.68, + "learning_rate": 3.816212600882865e-05, + "loss": 2.3024, + "step": 4780500 + }, + { + "epoch": 23.69, + "learning_rate": 3.8160887422402565e-05, + "loss": 2.2597, + "step": 4781000 + }, + { + "epoch": 23.69, + "learning_rate": 3.815964883597648e-05, + "loss": 2.2918, + "step": 4781500 + }, + { + "epoch": 23.69, + "learning_rate": 3.81584102495504e-05, + "loss": 2.2857, + "step": 4782000 + }, + { + "epoch": 23.69, + "learning_rate": 3.8157171663124315e-05, + "loss": 2.3056, + "step": 4782500 + }, + { + "epoch": 23.7, + "learning_rate": 3.815593307669823e-05, + "loss": 2.2823, + "step": 4783000 + }, + { + "epoch": 23.7, + "learning_rate": 3.815469449027214e-05, + "loss": 2.2994, + "step": 4783500 + }, + { + "epoch": 23.7, + "learning_rate": 3.815345590384606e-05, + "loss": 2.2764, + "step": 4784000 + }, + { + "epoch": 23.7, + "learning_rate": 3.8152217317419976e-05, + "loss": 2.2975, + "step": 4784500 + }, + { + "epoch": 23.71, + "learning_rate": 3.815097873099389e-05, + "loss": 2.2998, + "step": 4785000 + }, + { + "epoch": 23.71, + "learning_rate": 3.814974014456781e-05, + "loss": 2.293, + "step": 4785500 + }, + { + "epoch": 23.71, + "learning_rate": 3.814850403531458e-05, + "loss": 2.2987, + "step": 4786000 + }, + { + "epoch": 23.71, + "learning_rate": 3.814726544888849e-05, + "loss": 2.2947, + "step": 4786500 + }, + { + "epoch": 23.72, + "learning_rate": 3.8146026862462406e-05, + "loss": 2.296, + "step": 4787000 + }, + { + "epoch": 23.72, + "learning_rate": 3.814478827603632e-05, + "loss": 2.3184, + "step": 4787500 + }, + { + "epoch": 23.72, + "learning_rate": 3.814354968961024e-05, + "loss": 2.3023, + "step": 4788000 + }, + { + "epoch": 23.72, + "learning_rate": 3.814231110318416e-05, + "loss": 2.2848, + "step": 4788500 + }, + { + "epoch": 23.73, + "learning_rate": 3.8141072516758074e-05, + "loss": 2.2869, + "step": 4789000 + }, + { + "epoch": 23.73, + "learning_rate": 3.813983640750484e-05, + "loss": 2.3372, + "step": 4789500 + }, + { + "epoch": 23.73, + "learning_rate": 3.813859782107876e-05, + "loss": 2.3091, + "step": 4790000 + }, + { + "epoch": 23.73, + "learning_rate": 3.8137359234652676e-05, + "loss": 2.2714, + "step": 4790500 + }, + { + "epoch": 23.74, + "learning_rate": 3.813612064822659e-05, + "loss": 2.2939, + "step": 4791000 + }, + { + "epoch": 23.74, + "learning_rate": 3.813488206180051e-05, + "loss": 2.3059, + "step": 4791500 + }, + { + "epoch": 23.74, + "learning_rate": 3.813364347537443e-05, + "loss": 2.2798, + "step": 4792000 + }, + { + "epoch": 23.74, + "learning_rate": 3.8132407366121196e-05, + "loss": 2.2939, + "step": 4792500 + }, + { + "epoch": 23.75, + "learning_rate": 3.8131168779695106e-05, + "loss": 2.2965, + "step": 4793000 + }, + { + "epoch": 23.75, + "learning_rate": 3.812993019326902e-05, + "loss": 2.2802, + "step": 4793500 + }, + { + "epoch": 23.75, + "learning_rate": 3.81286940840158e-05, + "loss": 2.3213, + "step": 4794000 + }, + { + "epoch": 23.75, + "learning_rate": 3.8127455497589716e-05, + "loss": 2.3187, + "step": 4794500 + }, + { + "epoch": 23.76, + "learning_rate": 3.812621691116363e-05, + "loss": 2.314, + "step": 4795000 + }, + { + "epoch": 23.76, + "learning_rate": 3.812497832473755e-05, + "loss": 2.283, + "step": 4795500 + }, + { + "epoch": 23.76, + "learning_rate": 3.812373973831146e-05, + "loss": 2.3006, + "step": 4796000 + }, + { + "epoch": 23.76, + "learning_rate": 3.812250362905823e-05, + "loss": 2.3053, + "step": 4796500 + }, + { + "epoch": 23.77, + "learning_rate": 3.8121265042632145e-05, + "loss": 2.2898, + "step": 4797000 + }, + { + "epoch": 23.77, + "learning_rate": 3.812002645620606e-05, + "loss": 2.3042, + "step": 4797500 + }, + { + "epoch": 23.77, + "learning_rate": 3.811878786977998e-05, + "loss": 2.3032, + "step": 4798000 + }, + { + "epoch": 23.77, + "learning_rate": 3.8117549283353896e-05, + "loss": 2.2994, + "step": 4798500 + }, + { + "epoch": 23.78, + "learning_rate": 3.8116310696927806e-05, + "loss": 2.3109, + "step": 4799000 + }, + { + "epoch": 23.78, + "learning_rate": 3.811507211050172e-05, + "loss": 2.3052, + "step": 4799500 + }, + { + "epoch": 23.78, + "learning_rate": 3.81138360012485e-05, + "loss": 2.2868, + "step": 4800000 + }, + { + "epoch": 23.78, + "learning_rate": 3.8112597414822416e-05, + "loss": 2.3109, + "step": 4800500 + }, + { + "epoch": 23.79, + "learning_rate": 3.811135882839633e-05, + "loss": 2.3048, + "step": 4801000 + }, + { + "epoch": 23.79, + "learning_rate": 3.811012024197025e-05, + "loss": 2.3028, + "step": 4801500 + }, + { + "epoch": 23.79, + "learning_rate": 3.810888165554416e-05, + "loss": 2.3225, + "step": 4802000 + }, + { + "epoch": 23.79, + "learning_rate": 3.810764306911808e-05, + "loss": 2.2936, + "step": 4802500 + }, + { + "epoch": 23.8, + "learning_rate": 3.8106404482691994e-05, + "loss": 2.3293, + "step": 4803000 + }, + { + "epoch": 23.8, + "learning_rate": 3.810516589626591e-05, + "loss": 2.3076, + "step": 4803500 + }, + { + "epoch": 23.8, + "learning_rate": 3.810392730983983e-05, + "loss": 2.2963, + "step": 4804000 + }, + { + "epoch": 23.8, + "learning_rate": 3.8102688723413744e-05, + "loss": 2.2903, + "step": 4804500 + }, + { + "epoch": 23.81, + "learning_rate": 3.810145261416051e-05, + "loss": 2.3059, + "step": 4805000 + }, + { + "epoch": 23.81, + "learning_rate": 3.810021402773442e-05, + "loss": 2.2973, + "step": 4805500 + }, + { + "epoch": 23.81, + "learning_rate": 3.809897544130834e-05, + "loss": 2.2988, + "step": 4806000 + }, + { + "epoch": 23.81, + "learning_rate": 3.809773685488226e-05, + "loss": 2.2914, + "step": 4806500 + }, + { + "epoch": 23.82, + "learning_rate": 3.8096498268456174e-05, + "loss": 2.3179, + "step": 4807000 + }, + { + "epoch": 23.82, + "learning_rate": 3.809525968203009e-05, + "loss": 2.3134, + "step": 4807500 + }, + { + "epoch": 23.82, + "learning_rate": 3.809402357277687e-05, + "loss": 2.28, + "step": 4808000 + }, + { + "epoch": 23.82, + "learning_rate": 3.809278498635078e-05, + "loss": 2.3142, + "step": 4808500 + }, + { + "epoch": 23.83, + "learning_rate": 3.8091546399924694e-05, + "loss": 2.2888, + "step": 4809000 + }, + { + "epoch": 23.83, + "learning_rate": 3.809030781349861e-05, + "loss": 2.3241, + "step": 4809500 + }, + { + "epoch": 23.83, + "learning_rate": 3.808906922707253e-05, + "loss": 2.3173, + "step": 4810000 + }, + { + "epoch": 23.83, + "learning_rate": 3.8087830640646445e-05, + "loss": 2.2905, + "step": 4810500 + }, + { + "epoch": 23.84, + "learning_rate": 3.8086597008566065e-05, + "loss": 2.3409, + "step": 4811000 + }, + { + "epoch": 23.84, + "learning_rate": 3.808535842213998e-05, + "loss": 2.3207, + "step": 4811500 + }, + { + "epoch": 23.84, + "learning_rate": 3.80841198357139e-05, + "loss": 2.3175, + "step": 4812000 + }, + { + "epoch": 23.84, + "learning_rate": 3.808288372646067e-05, + "loss": 2.3065, + "step": 4812500 + }, + { + "epoch": 23.85, + "learning_rate": 3.8081645140034585e-05, + "loss": 2.2876, + "step": 4813000 + }, + { + "epoch": 23.85, + "learning_rate": 3.80804065536085e-05, + "loss": 2.3141, + "step": 4813500 + }, + { + "epoch": 23.85, + "learning_rate": 3.8079170444355264e-05, + "loss": 2.2991, + "step": 4814000 + }, + { + "epoch": 23.85, + "learning_rate": 3.807793185792918e-05, + "loss": 2.2971, + "step": 4814500 + }, + { + "epoch": 23.86, + "learning_rate": 3.80766932715031e-05, + "loss": 2.2939, + "step": 4815000 + }, + { + "epoch": 23.86, + "learning_rate": 3.8075454685077015e-05, + "loss": 2.3154, + "step": 4815500 + }, + { + "epoch": 23.86, + "learning_rate": 3.807421609865093e-05, + "loss": 2.3036, + "step": 4816000 + }, + { + "epoch": 23.86, + "learning_rate": 3.807297751222485e-05, + "loss": 2.3105, + "step": 4816500 + }, + { + "epoch": 23.87, + "learning_rate": 3.8071738925798765e-05, + "loss": 2.281, + "step": 4817000 + }, + { + "epoch": 23.87, + "learning_rate": 3.807050033937268e-05, + "loss": 2.2819, + "step": 4817500 + }, + { + "epoch": 23.87, + "learning_rate": 3.80692617529466e-05, + "loss": 2.2728, + "step": 4818000 + }, + { + "epoch": 23.87, + "learning_rate": 3.8068023166520516e-05, + "loss": 2.3142, + "step": 4818500 + }, + { + "epoch": 23.87, + "learning_rate": 3.806678458009443e-05, + "loss": 2.3092, + "step": 4819000 + }, + { + "epoch": 23.88, + "learning_rate": 3.806554599366835e-05, + "loss": 2.3183, + "step": 4819500 + }, + { + "epoch": 23.88, + "learning_rate": 3.806430740724227e-05, + "loss": 2.2989, + "step": 4820000 + }, + { + "epoch": 23.88, + "learning_rate": 3.8063071297989036e-05, + "loss": 2.2973, + "step": 4820500 + }, + { + "epoch": 23.88, + "learning_rate": 3.8061837665908656e-05, + "loss": 2.3134, + "step": 4821000 + }, + { + "epoch": 23.89, + "learning_rate": 3.8060599079482567e-05, + "loss": 2.3101, + "step": 4821500 + }, + { + "epoch": 23.89, + "learning_rate": 3.8059360493056483e-05, + "loss": 2.3073, + "step": 4822000 + }, + { + "epoch": 23.89, + "learning_rate": 3.80581219066304e-05, + "loss": 2.2903, + "step": 4822500 + }, + { + "epoch": 23.89, + "learning_rate": 3.805688332020432e-05, + "loss": 2.2952, + "step": 4823000 + }, + { + "epoch": 23.9, + "learning_rate": 3.8055644733778234e-05, + "loss": 2.334, + "step": 4823500 + }, + { + "epoch": 23.9, + "learning_rate": 3.805440614735215e-05, + "loss": 2.3168, + "step": 4824000 + }, + { + "epoch": 23.9, + "learning_rate": 3.805316756092607e-05, + "loss": 2.3011, + "step": 4824500 + }, + { + "epoch": 23.9, + "learning_rate": 3.805193145167284e-05, + "loss": 2.3194, + "step": 4825000 + }, + { + "epoch": 23.91, + "learning_rate": 3.8050692865246754e-05, + "loss": 2.313, + "step": 4825500 + }, + { + "epoch": 23.91, + "learning_rate": 3.804945427882067e-05, + "loss": 2.3036, + "step": 4826000 + }, + { + "epoch": 23.91, + "learning_rate": 3.804821569239458e-05, + "loss": 2.2979, + "step": 4826500 + }, + { + "epoch": 23.91, + "learning_rate": 3.80469771059685e-05, + "loss": 2.2836, + "step": 4827000 + }, + { + "epoch": 23.92, + "learning_rate": 3.8045738519542415e-05, + "loss": 2.3093, + "step": 4827500 + }, + { + "epoch": 23.92, + "learning_rate": 3.8044502410289184e-05, + "loss": 2.2908, + "step": 4828000 + }, + { + "epoch": 23.92, + "learning_rate": 3.80432638238631e-05, + "loss": 2.3154, + "step": 4828500 + }, + { + "epoch": 23.92, + "learning_rate": 3.804202523743702e-05, + "loss": 2.3275, + "step": 4829000 + }, + { + "epoch": 23.93, + "learning_rate": 3.8040786651010934e-05, + "loss": 2.3259, + "step": 4829500 + }, + { + "epoch": 23.93, + "learning_rate": 3.803954806458485e-05, + "loss": 2.3006, + "step": 4830000 + }, + { + "epoch": 23.93, + "learning_rate": 3.803830947815877e-05, + "loss": 2.3199, + "step": 4830500 + }, + { + "epoch": 23.93, + "learning_rate": 3.8037070891732685e-05, + "loss": 2.3036, + "step": 4831000 + }, + { + "epoch": 23.94, + "learning_rate": 3.80358323053066e-05, + "loss": 2.2679, + "step": 4831500 + }, + { + "epoch": 23.94, + "learning_rate": 3.803459619605337e-05, + "loss": 2.3195, + "step": 4832000 + }, + { + "epoch": 23.94, + "learning_rate": 3.803335760962729e-05, + "loss": 2.3192, + "step": 4832500 + }, + { + "epoch": 23.94, + "learning_rate": 3.803212150037406e-05, + "loss": 2.3352, + "step": 4833000 + }, + { + "epoch": 23.95, + "learning_rate": 3.8030885391120825e-05, + "loss": 2.3116, + "step": 4833500 + }, + { + "epoch": 23.95, + "learning_rate": 3.802964680469474e-05, + "loss": 2.3202, + "step": 4834000 + }, + { + "epoch": 23.95, + "learning_rate": 3.802840821826866e-05, + "loss": 2.3153, + "step": 4834500 + }, + { + "epoch": 23.95, + "learning_rate": 3.8027169631842576e-05, + "loss": 2.3291, + "step": 4835000 + }, + { + "epoch": 23.96, + "learning_rate": 3.802593104541649e-05, + "loss": 2.2869, + "step": 4835500 + }, + { + "epoch": 23.96, + "learning_rate": 3.802469245899041e-05, + "loss": 2.3052, + "step": 4836000 + }, + { + "epoch": 23.96, + "learning_rate": 3.802345387256433e-05, + "loss": 2.313, + "step": 4836500 + }, + { + "epoch": 23.96, + "learning_rate": 3.802221776331109e-05, + "loss": 2.2973, + "step": 4837000 + }, + { + "epoch": 23.97, + "learning_rate": 3.8020979176885006e-05, + "loss": 2.3024, + "step": 4837500 + }, + { + "epoch": 23.97, + "learning_rate": 3.801974059045892e-05, + "loss": 2.3348, + "step": 4838000 + }, + { + "epoch": 23.97, + "learning_rate": 3.801850200403284e-05, + "loss": 2.3053, + "step": 4838500 + }, + { + "epoch": 23.97, + "learning_rate": 3.801726341760676e-05, + "loss": 2.3055, + "step": 4839000 + }, + { + "epoch": 23.98, + "learning_rate": 3.8016024831180674e-05, + "loss": 2.3312, + "step": 4839500 + }, + { + "epoch": 23.98, + "learning_rate": 3.801478872192744e-05, + "loss": 2.2953, + "step": 4840000 + }, + { + "epoch": 23.98, + "learning_rate": 3.801355013550136e-05, + "loss": 2.3206, + "step": 4840500 + }, + { + "epoch": 23.98, + "learning_rate": 3.8012311549075276e-05, + "loss": 2.3084, + "step": 4841000 + }, + { + "epoch": 23.99, + "learning_rate": 3.801107296264919e-05, + "loss": 2.2991, + "step": 4841500 + }, + { + "epoch": 23.99, + "learning_rate": 3.800983437622311e-05, + "loss": 2.2901, + "step": 4842000 + }, + { + "epoch": 23.99, + "learning_rate": 3.800859578979703e-05, + "loss": 2.3141, + "step": 4842500 + }, + { + "epoch": 23.99, + "learning_rate": 3.8007357203370944e-05, + "loss": 2.3149, + "step": 4843000 + }, + { + "epoch": 24.0, + "learning_rate": 3.8006118616944854e-05, + "loss": 2.2997, + "step": 4843500 + }, + { + "epoch": 24.0, + "learning_rate": 3.800488003051877e-05, + "loss": 2.3124, + "step": 4844000 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.6541513737695038, + "eval_accuracy_mlm": 0.608915092455769, + "eval_accuracy_nsp": 0.8673943653685494, + "eval_loss": 2.3393051624298096, + "eval_runtime": 146.2902, + "eval_samples_per_second": 1742.831, + "eval_steps_per_second": 72.623, + "step": 4844232 + }, + { + "epoch": 24.0, + "learning_rate": 3.800364144409269e-05, + "loss": 2.2969, + "step": 4844500 + }, + { + "epoch": 24.0, + "learning_rate": 3.8002402857666605e-05, + "loss": 2.2555, + "step": 4845000 + }, + { + "epoch": 24.01, + "learning_rate": 3.800116427124052e-05, + "loss": 2.2708, + "step": 4845500 + }, + { + "epoch": 24.01, + "learning_rate": 3.799992568481444e-05, + "loss": 2.2948, + "step": 4846000 + }, + { + "epoch": 24.01, + "learning_rate": 3.7998687098388356e-05, + "loss": 2.277, + "step": 4846500 + }, + { + "epoch": 24.01, + "learning_rate": 3.7997448511962266e-05, + "loss": 2.249, + "step": 4847000 + }, + { + "epoch": 24.02, + "learning_rate": 3.7996212402709035e-05, + "loss": 2.2613, + "step": 4847500 + }, + { + "epoch": 24.02, + "learning_rate": 3.799497629345581e-05, + "loss": 2.2796, + "step": 4848000 + }, + { + "epoch": 24.02, + "learning_rate": 3.799373770702973e-05, + "loss": 2.2798, + "step": 4848500 + }, + { + "epoch": 24.02, + "learning_rate": 3.7992499120603644e-05, + "loss": 2.2457, + "step": 4849000 + }, + { + "epoch": 24.03, + "learning_rate": 3.7991260534177554e-05, + "loss": 2.2718, + "step": 4849500 + }, + { + "epoch": 24.03, + "learning_rate": 3.799002194775147e-05, + "loss": 2.2644, + "step": 4850000 + }, + { + "epoch": 24.03, + "learning_rate": 3.798878336132539e-05, + "loss": 2.282, + "step": 4850500 + }, + { + "epoch": 24.03, + "learning_rate": 3.7987544774899305e-05, + "loss": 2.2948, + "step": 4851000 + }, + { + "epoch": 24.04, + "learning_rate": 3.7986308665646074e-05, + "loss": 2.2776, + "step": 4851500 + }, + { + "epoch": 24.04, + "learning_rate": 3.798507007921999e-05, + "loss": 2.2824, + "step": 4852000 + }, + { + "epoch": 24.04, + "learning_rate": 3.798383149279391e-05, + "loss": 2.2988, + "step": 4852500 + }, + { + "epoch": 24.04, + "learning_rate": 3.798259290636782e-05, + "loss": 2.296, + "step": 4853000 + }, + { + "epoch": 24.05, + "learning_rate": 3.7981356797114594e-05, + "loss": 2.2601, + "step": 4853500 + }, + { + "epoch": 24.05, + "learning_rate": 3.798011821068851e-05, + "loss": 2.2779, + "step": 4854000 + }, + { + "epoch": 24.05, + "learning_rate": 3.797887962426243e-05, + "loss": 2.2901, + "step": 4854500 + }, + { + "epoch": 24.05, + "learning_rate": 3.7977641037836344e-05, + "loss": 2.286, + "step": 4855000 + }, + { + "epoch": 24.06, + "learning_rate": 3.797640245141026e-05, + "loss": 2.2828, + "step": 4855500 + }, + { + "epoch": 24.06, + "learning_rate": 3.797516386498417e-05, + "loss": 2.2735, + "step": 4856000 + }, + { + "epoch": 24.06, + "learning_rate": 3.797392527855809e-05, + "loss": 2.2708, + "step": 4856500 + }, + { + "epoch": 24.06, + "learning_rate": 3.7972686692132005e-05, + "loss": 2.2948, + "step": 4857000 + }, + { + "epoch": 24.07, + "learning_rate": 3.7971450582878774e-05, + "loss": 2.2892, + "step": 4857500 + }, + { + "epoch": 24.07, + "learning_rate": 3.797021199645269e-05, + "loss": 2.2911, + "step": 4858000 + }, + { + "epoch": 24.07, + "learning_rate": 3.796897341002661e-05, + "loss": 2.2749, + "step": 4858500 + }, + { + "epoch": 24.07, + "learning_rate": 3.796773482360052e-05, + "loss": 2.3011, + "step": 4859000 + }, + { + "epoch": 24.08, + "learning_rate": 3.7966496237174435e-05, + "loss": 2.2884, + "step": 4859500 + }, + { + "epoch": 24.08, + "learning_rate": 3.796525765074835e-05, + "loss": 2.2755, + "step": 4860000 + }, + { + "epoch": 24.08, + "learning_rate": 3.796401906432227e-05, + "loss": 2.3002, + "step": 4860500 + }, + { + "epoch": 24.08, + "learning_rate": 3.7962780477896186e-05, + "loss": 2.2786, + "step": 4861000 + }, + { + "epoch": 24.09, + "learning_rate": 3.79615418914701e-05, + "loss": 2.2814, + "step": 4861500 + }, + { + "epoch": 24.09, + "learning_rate": 3.796030330504402e-05, + "loss": 2.2544, + "step": 4862000 + }, + { + "epoch": 24.09, + "learning_rate": 3.795906719579079e-05, + "loss": 2.2841, + "step": 4862500 + }, + { + "epoch": 24.09, + "learning_rate": 3.7957828609364705e-05, + "loss": 2.3162, + "step": 4863000 + }, + { + "epoch": 24.1, + "learning_rate": 3.7956592500111474e-05, + "loss": 2.2993, + "step": 4863500 + }, + { + "epoch": 24.1, + "learning_rate": 3.795535391368539e-05, + "loss": 2.288, + "step": 4864000 + }, + { + "epoch": 24.1, + "learning_rate": 3.795411532725931e-05, + "loss": 2.2672, + "step": 4864500 + }, + { + "epoch": 24.1, + "learning_rate": 3.795287921800608e-05, + "loss": 2.2889, + "step": 4865000 + }, + { + "epoch": 24.11, + "learning_rate": 3.7951640631579994e-05, + "loss": 2.2822, + "step": 4865500 + }, + { + "epoch": 24.11, + "learning_rate": 3.795040204515391e-05, + "loss": 2.271, + "step": 4866000 + }, + { + "epoch": 24.11, + "learning_rate": 3.794916345872783e-05, + "loss": 2.2691, + "step": 4866500 + }, + { + "epoch": 24.11, + "learning_rate": 3.7947924872301745e-05, + "loss": 2.2968, + "step": 4867000 + }, + { + "epoch": 24.12, + "learning_rate": 3.794668876304851e-05, + "loss": 2.2602, + "step": 4867500 + }, + { + "epoch": 24.12, + "learning_rate": 3.7945450176622424e-05, + "loss": 2.2793, + "step": 4868000 + }, + { + "epoch": 24.12, + "learning_rate": 3.794421159019634e-05, + "loss": 2.2726, + "step": 4868500 + }, + { + "epoch": 24.12, + "learning_rate": 3.794297300377026e-05, + "loss": 2.2902, + "step": 4869000 + }, + { + "epoch": 24.13, + "learning_rate": 3.7941734417344174e-05, + "loss": 2.2575, + "step": 4869500 + }, + { + "epoch": 24.13, + "learning_rate": 3.794049583091809e-05, + "loss": 2.2759, + "step": 4870000 + }, + { + "epoch": 24.13, + "learning_rate": 3.793925724449201e-05, + "loss": 2.2869, + "step": 4870500 + }, + { + "epoch": 24.13, + "learning_rate": 3.7938018658065925e-05, + "loss": 2.2826, + "step": 4871000 + }, + { + "epoch": 24.14, + "learning_rate": 3.7936780071639835e-05, + "loss": 2.2913, + "step": 4871500 + }, + { + "epoch": 24.14, + "learning_rate": 3.793554396238661e-05, + "loss": 2.3063, + "step": 4872000 + }, + { + "epoch": 24.14, + "learning_rate": 3.793430537596053e-05, + "loss": 2.2777, + "step": 4872500 + }, + { + "epoch": 24.14, + "learning_rate": 3.7933066789534445e-05, + "loss": 2.2694, + "step": 4873000 + }, + { + "epoch": 24.15, + "learning_rate": 3.793182820310836e-05, + "loss": 2.2867, + "step": 4873500 + }, + { + "epoch": 24.15, + "learning_rate": 3.793058961668228e-05, + "loss": 2.2729, + "step": 4874000 + }, + { + "epoch": 24.15, + "learning_rate": 3.792935350742905e-05, + "loss": 2.2849, + "step": 4874500 + }, + { + "epoch": 24.15, + "learning_rate": 3.792811492100296e-05, + "loss": 2.2694, + "step": 4875000 + }, + { + "epoch": 24.15, + "learning_rate": 3.7926876334576874e-05, + "loss": 2.263, + "step": 4875500 + }, + { + "epoch": 24.16, + "learning_rate": 3.792563774815079e-05, + "loss": 2.2894, + "step": 4876000 + }, + { + "epoch": 24.16, + "learning_rate": 3.792439916172471e-05, + "loss": 2.2946, + "step": 4876500 + }, + { + "epoch": 24.16, + "learning_rate": 3.792316305247148e-05, + "loss": 2.2824, + "step": 4877000 + }, + { + "epoch": 24.16, + "learning_rate": 3.7921924466045394e-05, + "loss": 2.2882, + "step": 4877500 + }, + { + "epoch": 24.17, + "learning_rate": 3.792068587961931e-05, + "loss": 2.2966, + "step": 4878000 + }, + { + "epoch": 24.17, + "learning_rate": 3.791944729319323e-05, + "loss": 2.2884, + "step": 4878500 + }, + { + "epoch": 24.17, + "learning_rate": 3.7918208706767145e-05, + "loss": 2.2832, + "step": 4879000 + }, + { + "epoch": 24.17, + "learning_rate": 3.7916972597513914e-05, + "loss": 2.268, + "step": 4879500 + }, + { + "epoch": 24.18, + "learning_rate": 3.791573401108783e-05, + "loss": 2.2686, + "step": 4880000 + }, + { + "epoch": 24.18, + "learning_rate": 3.791449542466175e-05, + "loss": 2.2891, + "step": 4880500 + }, + { + "epoch": 24.18, + "learning_rate": 3.7913256838235664e-05, + "loss": 2.2771, + "step": 4881000 + }, + { + "epoch": 24.18, + "learning_rate": 3.7912018251809575e-05, + "loss": 2.3122, + "step": 4881500 + }, + { + "epoch": 24.19, + "learning_rate": 3.791077966538349e-05, + "loss": 2.2735, + "step": 4882000 + }, + { + "epoch": 24.19, + "learning_rate": 3.790954107895741e-05, + "loss": 2.2336, + "step": 4882500 + }, + { + "epoch": 24.19, + "learning_rate": 3.7908302492531325e-05, + "loss": 2.2546, + "step": 4883000 + }, + { + "epoch": 24.19, + "learning_rate": 3.7907066383278094e-05, + "loss": 2.2733, + "step": 4883500 + }, + { + "epoch": 24.2, + "learning_rate": 3.790582779685201e-05, + "loss": 2.2813, + "step": 4884000 + }, + { + "epoch": 24.2, + "learning_rate": 3.790459168759878e-05, + "loss": 2.2616, + "step": 4884500 + }, + { + "epoch": 24.2, + "learning_rate": 3.79033531011727e-05, + "loss": 2.2972, + "step": 4885000 + }, + { + "epoch": 24.2, + "learning_rate": 3.7902114514746614e-05, + "loss": 2.2637, + "step": 4885500 + }, + { + "epoch": 24.21, + "learning_rate": 3.790087592832053e-05, + "loss": 2.2805, + "step": 4886000 + }, + { + "epoch": 24.21, + "learning_rate": 3.789963734189445e-05, + "loss": 2.2822, + "step": 4886500 + }, + { + "epoch": 24.21, + "learning_rate": 3.7898398755468365e-05, + "loss": 2.2969, + "step": 4887000 + }, + { + "epoch": 24.21, + "learning_rate": 3.789716016904228e-05, + "loss": 2.2901, + "step": 4887500 + }, + { + "epoch": 24.22, + "learning_rate": 3.78959215826162e-05, + "loss": 2.2994, + "step": 4888000 + }, + { + "epoch": 24.22, + "learning_rate": 3.789468299619011e-05, + "loss": 2.2735, + "step": 4888500 + }, + { + "epoch": 24.22, + "learning_rate": 3.7893444409764025e-05, + "loss": 2.2762, + "step": 4889000 + }, + { + "epoch": 24.22, + "learning_rate": 3.789220582333794e-05, + "loss": 2.3055, + "step": 4889500 + }, + { + "epoch": 24.23, + "learning_rate": 3.789096971408471e-05, + "loss": 2.2829, + "step": 4890000 + }, + { + "epoch": 24.23, + "learning_rate": 3.788973112765863e-05, + "loss": 2.2869, + "step": 4890500 + }, + { + "epoch": 24.23, + "learning_rate": 3.78884950184054e-05, + "loss": 2.284, + "step": 4891000 + }, + { + "epoch": 24.23, + "learning_rate": 3.7887256431979314e-05, + "loss": 2.2953, + "step": 4891500 + }, + { + "epoch": 24.24, + "learning_rate": 3.788601784555323e-05, + "loss": 2.2887, + "step": 4892000 + }, + { + "epoch": 24.24, + "learning_rate": 3.788477925912715e-05, + "loss": 2.2865, + "step": 4892500 + }, + { + "epoch": 24.24, + "learning_rate": 3.7883540672701065e-05, + "loss": 2.2958, + "step": 4893000 + }, + { + "epoch": 24.24, + "learning_rate": 3.788230208627498e-05, + "loss": 2.2692, + "step": 4893500 + }, + { + "epoch": 24.25, + "learning_rate": 3.7881065977021744e-05, + "loss": 2.2848, + "step": 4894000 + }, + { + "epoch": 24.25, + "learning_rate": 3.787982986776851e-05, + "loss": 2.2996, + "step": 4894500 + }, + { + "epoch": 24.25, + "learning_rate": 3.787859128134243e-05, + "loss": 2.2844, + "step": 4895000 + }, + { + "epoch": 24.25, + "learning_rate": 3.7877352694916346e-05, + "loss": 2.2728, + "step": 4895500 + }, + { + "epoch": 24.26, + "learning_rate": 3.7876116585663115e-05, + "loss": 2.2742, + "step": 4896000 + }, + { + "epoch": 24.26, + "learning_rate": 3.787487799923703e-05, + "loss": 2.2734, + "step": 4896500 + }, + { + "epoch": 24.26, + "learning_rate": 3.787363941281095e-05, + "loss": 2.3034, + "step": 4897000 + }, + { + "epoch": 24.26, + "learning_rate": 3.7872400826384866e-05, + "loss": 2.3104, + "step": 4897500 + }, + { + "epoch": 24.27, + "learning_rate": 3.787116223995878e-05, + "loss": 2.2996, + "step": 4898000 + }, + { + "epoch": 24.27, + "learning_rate": 3.786992365353269e-05, + "loss": 2.2914, + "step": 4898500 + }, + { + "epoch": 24.27, + "learning_rate": 3.786868506710661e-05, + "loss": 2.2983, + "step": 4899000 + }, + { + "epoch": 24.27, + "learning_rate": 3.786744648068053e-05, + "loss": 2.3012, + "step": 4899500 + }, + { + "epoch": 24.28, + "learning_rate": 3.78662103714273e-05, + "loss": 2.2913, + "step": 4900000 + }, + { + "epoch": 24.28, + "learning_rate": 3.786497178500121e-05, + "loss": 2.2849, + "step": 4900500 + }, + { + "epoch": 24.28, + "learning_rate": 3.786373319857513e-05, + "loss": 2.2893, + "step": 4901000 + }, + { + "epoch": 24.28, + "learning_rate": 3.7862494612149046e-05, + "loss": 2.2917, + "step": 4901500 + }, + { + "epoch": 24.29, + "learning_rate": 3.786125602572296e-05, + "loss": 2.2711, + "step": 4902000 + }, + { + "epoch": 24.29, + "learning_rate": 3.786001743929688e-05, + "loss": 2.2723, + "step": 4902500 + }, + { + "epoch": 24.29, + "learning_rate": 3.78587788528708e-05, + "loss": 2.2921, + "step": 4903000 + }, + { + "epoch": 24.29, + "learning_rate": 3.7857540266444714e-05, + "loss": 2.2999, + "step": 4903500 + }, + { + "epoch": 24.3, + "learning_rate": 3.785630168001863e-05, + "loss": 2.2939, + "step": 4904000 + }, + { + "epoch": 24.3, + "learning_rate": 3.78550655707654e-05, + "loss": 2.3084, + "step": 4904500 + }, + { + "epoch": 24.3, + "learning_rate": 3.785382698433931e-05, + "loss": 2.311, + "step": 4905000 + }, + { + "epoch": 24.3, + "learning_rate": 3.785258839791323e-05, + "loss": 2.2878, + "step": 4905500 + }, + { + "epoch": 24.31, + "learning_rate": 3.7851349811487144e-05, + "loss": 2.273, + "step": 4906000 + }, + { + "epoch": 24.31, + "learning_rate": 3.785011122506106e-05, + "loss": 2.2828, + "step": 4906500 + }, + { + "epoch": 24.31, + "learning_rate": 3.784887263863498e-05, + "loss": 2.2966, + "step": 4907000 + }, + { + "epoch": 24.31, + "learning_rate": 3.7847634052208895e-05, + "loss": 2.2957, + "step": 4907500 + }, + { + "epoch": 24.32, + "learning_rate": 3.784639546578281e-05, + "loss": 2.2923, + "step": 4908000 + }, + { + "epoch": 24.32, + "learning_rate": 3.784515687935673e-05, + "loss": 2.2919, + "step": 4908500 + }, + { + "epoch": 24.32, + "learning_rate": 3.7843918292930645e-05, + "loss": 2.2741, + "step": 4909000 + }, + { + "epoch": 24.32, + "learning_rate": 3.784267970650456e-05, + "loss": 2.3029, + "step": 4909500 + }, + { + "epoch": 24.33, + "learning_rate": 3.784144112007848e-05, + "loss": 2.2969, + "step": 4910000 + }, + { + "epoch": 24.33, + "learning_rate": 3.784020501082525e-05, + "loss": 2.2778, + "step": 4910500 + }, + { + "epoch": 24.33, + "learning_rate": 3.7838966424399165e-05, + "loss": 2.3072, + "step": 4911000 + }, + { + "epoch": 24.33, + "learning_rate": 3.783772783797308e-05, + "loss": 2.2775, + "step": 4911500 + }, + { + "epoch": 24.34, + "learning_rate": 3.7836489251547e-05, + "loss": 2.2819, + "step": 4912000 + }, + { + "epoch": 24.34, + "learning_rate": 3.783525314229376e-05, + "loss": 2.2748, + "step": 4912500 + }, + { + "epoch": 24.34, + "learning_rate": 3.783401703304053e-05, + "loss": 2.3047, + "step": 4913000 + }, + { + "epoch": 24.34, + "learning_rate": 3.783277844661445e-05, + "loss": 2.2986, + "step": 4913500 + }, + { + "epoch": 24.35, + "learning_rate": 3.7831539860188364e-05, + "loss": 2.3051, + "step": 4914000 + }, + { + "epoch": 24.35, + "learning_rate": 3.783030127376228e-05, + "loss": 2.2865, + "step": 4914500 + }, + { + "epoch": 24.35, + "learning_rate": 3.78290626873362e-05, + "loss": 2.3101, + "step": 4915000 + }, + { + "epoch": 24.35, + "learning_rate": 3.7827824100910114e-05, + "loss": 2.3132, + "step": 4915500 + }, + { + "epoch": 24.36, + "learning_rate": 3.782658551448403e-05, + "loss": 2.2878, + "step": 4916000 + }, + { + "epoch": 24.36, + "learning_rate": 3.78253494052308e-05, + "loss": 2.2744, + "step": 4916500 + }, + { + "epoch": 24.36, + "learning_rate": 3.782411329597757e-05, + "loss": 2.2969, + "step": 4917000 + }, + { + "epoch": 24.36, + "learning_rate": 3.7822874709551486e-05, + "loss": 2.2918, + "step": 4917500 + }, + { + "epoch": 24.37, + "learning_rate": 3.7821638600298255e-05, + "loss": 2.2919, + "step": 4918000 + }, + { + "epoch": 24.37, + "learning_rate": 3.782040001387217e-05, + "loss": 2.3031, + "step": 4918500 + }, + { + "epoch": 24.37, + "learning_rate": 3.781916142744609e-05, + "loss": 2.2955, + "step": 4919000 + }, + { + "epoch": 24.37, + "learning_rate": 3.7817922841020005e-05, + "loss": 2.2644, + "step": 4919500 + }, + { + "epoch": 24.38, + "learning_rate": 3.781668425459392e-05, + "loss": 2.3126, + "step": 4920000 + }, + { + "epoch": 24.38, + "learning_rate": 3.781544566816784e-05, + "loss": 2.2968, + "step": 4920500 + }, + { + "epoch": 24.38, + "learning_rate": 3.7814207081741756e-05, + "loss": 2.2943, + "step": 4921000 + }, + { + "epoch": 24.38, + "learning_rate": 3.781296849531567e-05, + "loss": 2.3009, + "step": 4921500 + }, + { + "epoch": 24.39, + "learning_rate": 3.781172990888958e-05, + "loss": 2.2804, + "step": 4922000 + }, + { + "epoch": 24.39, + "learning_rate": 3.781049379963635e-05, + "loss": 2.2773, + "step": 4922500 + }, + { + "epoch": 24.39, + "learning_rate": 3.780925521321027e-05, + "loss": 2.3084, + "step": 4923000 + }, + { + "epoch": 24.39, + "learning_rate": 3.7808016626784186e-05, + "loss": 2.3082, + "step": 4923500 + }, + { + "epoch": 24.4, + "learning_rate": 3.78067780403581e-05, + "loss": 2.2789, + "step": 4924000 + }, + { + "epoch": 24.4, + "learning_rate": 3.780554193110487e-05, + "loss": 2.2776, + "step": 4924500 + }, + { + "epoch": 24.4, + "learning_rate": 3.780430334467879e-05, + "loss": 2.2606, + "step": 4925000 + }, + { + "epoch": 24.4, + "learning_rate": 3.7803064758252706e-05, + "loss": 2.3084, + "step": 4925500 + }, + { + "epoch": 24.41, + "learning_rate": 3.780182617182662e-05, + "loss": 2.2744, + "step": 4926000 + }, + { + "epoch": 24.41, + "learning_rate": 3.780058758540054e-05, + "loss": 2.2805, + "step": 4926500 + }, + { + "epoch": 24.41, + "learning_rate": 3.7799348998974456e-05, + "loss": 2.2899, + "step": 4927000 + }, + { + "epoch": 24.41, + "learning_rate": 3.779811041254837e-05, + "loss": 2.2927, + "step": 4927500 + }, + { + "epoch": 24.42, + "learning_rate": 3.7796874303295135e-05, + "loss": 2.3032, + "step": 4928000 + }, + { + "epoch": 24.42, + "learning_rate": 3.779563571686905e-05, + "loss": 2.2916, + "step": 4928500 + }, + { + "epoch": 24.42, + "learning_rate": 3.779439713044297e-05, + "loss": 2.284, + "step": 4929000 + }, + { + "epoch": 24.42, + "learning_rate": 3.7793158544016886e-05, + "loss": 2.2702, + "step": 4929500 + }, + { + "epoch": 24.42, + "learning_rate": 3.7791922434763655e-05, + "loss": 2.2966, + "step": 4930000 + }, + { + "epoch": 24.43, + "learning_rate": 3.779068384833757e-05, + "loss": 2.2612, + "step": 4930500 + }, + { + "epoch": 24.43, + "learning_rate": 3.778944526191149e-05, + "loss": 2.304, + "step": 4931000 + }, + { + "epoch": 24.43, + "learning_rate": 3.7788206675485406e-05, + "loss": 2.2901, + "step": 4931500 + }, + { + "epoch": 24.43, + "learning_rate": 3.778696808905932e-05, + "loss": 2.2847, + "step": 4932000 + }, + { + "epoch": 24.44, + "learning_rate": 3.778572950263324e-05, + "loss": 2.2852, + "step": 4932500 + }, + { + "epoch": 24.44, + "learning_rate": 3.7784490916207156e-05, + "loss": 2.2935, + "step": 4933000 + }, + { + "epoch": 24.44, + "learning_rate": 3.7783252329781073e-05, + "loss": 2.2765, + "step": 4933500 + }, + { + "epoch": 24.44, + "learning_rate": 3.778201374335499e-05, + "loss": 2.3069, + "step": 4934000 + }, + { + "epoch": 24.45, + "learning_rate": 3.77807751569289e-05, + "loss": 2.3149, + "step": 4934500 + }, + { + "epoch": 24.45, + "learning_rate": 3.777953657050282e-05, + "loss": 2.3075, + "step": 4935000 + }, + { + "epoch": 24.45, + "learning_rate": 3.7778297984076734e-05, + "loss": 2.2755, + "step": 4935500 + }, + { + "epoch": 24.45, + "learning_rate": 3.777705939765065e-05, + "loss": 2.2864, + "step": 4936000 + }, + { + "epoch": 24.46, + "learning_rate": 3.777582328839742e-05, + "loss": 2.3105, + "step": 4936500 + }, + { + "epoch": 24.46, + "learning_rate": 3.777458470197134e-05, + "loss": 2.2645, + "step": 4937000 + }, + { + "epoch": 24.46, + "learning_rate": 3.7773346115545254e-05, + "loss": 2.2963, + "step": 4937500 + }, + { + "epoch": 24.46, + "learning_rate": 3.7772107529119164e-05, + "loss": 2.3063, + "step": 4938000 + }, + { + "epoch": 24.47, + "learning_rate": 3.777086894269308e-05, + "loss": 2.309, + "step": 4938500 + }, + { + "epoch": 24.47, + "learning_rate": 3.7769630356267e-05, + "loss": 2.309, + "step": 4939000 + }, + { + "epoch": 24.47, + "learning_rate": 3.7768391769840915e-05, + "loss": 2.2765, + "step": 4939500 + }, + { + "epoch": 24.47, + "learning_rate": 3.776715318341483e-05, + "loss": 2.2682, + "step": 4940000 + }, + { + "epoch": 24.48, + "learning_rate": 3.776591459698875e-05, + "loss": 2.2867, + "step": 4940500 + }, + { + "epoch": 24.48, + "learning_rate": 3.7764676010562666e-05, + "loss": 2.2626, + "step": 4941000 + }, + { + "epoch": 24.48, + "learning_rate": 3.776343742413658e-05, + "loss": 2.2847, + "step": 4941500 + }, + { + "epoch": 24.48, + "learning_rate": 3.776220131488335e-05, + "loss": 2.2891, + "step": 4942000 + }, + { + "epoch": 24.49, + "learning_rate": 3.776096272845727e-05, + "loss": 2.2844, + "step": 4942500 + }, + { + "epoch": 24.49, + "learning_rate": 3.7759724142031185e-05, + "loss": 2.2788, + "step": 4943000 + }, + { + "epoch": 24.49, + "learning_rate": 3.7758488032777954e-05, + "loss": 2.2986, + "step": 4943500 + }, + { + "epoch": 24.49, + "learning_rate": 3.7757249446351864e-05, + "loss": 2.2886, + "step": 4944000 + }, + { + "epoch": 24.5, + "learning_rate": 3.775601085992578e-05, + "loss": 2.3065, + "step": 4944500 + }, + { + "epoch": 24.5, + "learning_rate": 3.77547722734997e-05, + "loss": 2.2977, + "step": 4945000 + }, + { + "epoch": 24.5, + "learning_rate": 3.7753536164246474e-05, + "loss": 2.2616, + "step": 4945500 + }, + { + "epoch": 24.5, + "learning_rate": 3.775229757782039e-05, + "loss": 2.2727, + "step": 4946000 + }, + { + "epoch": 24.51, + "learning_rate": 3.775105899139431e-05, + "loss": 2.2846, + "step": 4946500 + }, + { + "epoch": 24.51, + "learning_rate": 3.774982040496822e-05, + "loss": 2.2827, + "step": 4947000 + }, + { + "epoch": 24.51, + "learning_rate": 3.7748581818542135e-05, + "loss": 2.2623, + "step": 4947500 + }, + { + "epoch": 24.51, + "learning_rate": 3.774734323211605e-05, + "loss": 2.3012, + "step": 4948000 + }, + { + "epoch": 24.52, + "learning_rate": 3.774610712286282e-05, + "loss": 2.2657, + "step": 4948500 + }, + { + "epoch": 24.52, + "learning_rate": 3.774486853643674e-05, + "loss": 2.279, + "step": 4949000 + }, + { + "epoch": 24.52, + "learning_rate": 3.7743629950010654e-05, + "loss": 2.3157, + "step": 4949500 + }, + { + "epoch": 24.52, + "learning_rate": 3.774239136358457e-05, + "loss": 2.2974, + "step": 4950000 + }, + { + "epoch": 24.53, + "learning_rate": 3.774115277715848e-05, + "loss": 2.2873, + "step": 4950500 + }, + { + "epoch": 24.53, + "learning_rate": 3.773991666790526e-05, + "loss": 2.2983, + "step": 4951000 + }, + { + "epoch": 24.53, + "learning_rate": 3.7738678081479174e-05, + "loss": 2.2926, + "step": 4951500 + }, + { + "epoch": 24.53, + "learning_rate": 3.773743949505309e-05, + "loss": 2.2734, + "step": 4952000 + }, + { + "epoch": 24.54, + "learning_rate": 3.773620090862701e-05, + "loss": 2.3017, + "step": 4952500 + }, + { + "epoch": 24.54, + "learning_rate": 3.7734962322200925e-05, + "loss": 2.3132, + "step": 4953000 + }, + { + "epoch": 24.54, + "learning_rate": 3.7733723735774835e-05, + "loss": 2.2891, + "step": 4953500 + }, + { + "epoch": 24.54, + "learning_rate": 3.773248514934875e-05, + "loss": 2.2889, + "step": 4954000 + }, + { + "epoch": 24.55, + "learning_rate": 3.773124656292267e-05, + "loss": 2.3073, + "step": 4954500 + }, + { + "epoch": 24.55, + "learning_rate": 3.7730007976496585e-05, + "loss": 2.2684, + "step": 4955000 + }, + { + "epoch": 24.55, + "learning_rate": 3.7728774344416206e-05, + "loss": 2.2905, + "step": 4955500 + }, + { + "epoch": 24.55, + "learning_rate": 3.772753575799012e-05, + "loss": 2.2836, + "step": 4956000 + }, + { + "epoch": 24.56, + "learning_rate": 3.772629717156404e-05, + "loss": 2.2936, + "step": 4956500 + }, + { + "epoch": 24.56, + "learning_rate": 3.772505858513796e-05, + "loss": 2.3092, + "step": 4957000 + }, + { + "epoch": 24.56, + "learning_rate": 3.7723822475884726e-05, + "loss": 2.2971, + "step": 4957500 + }, + { + "epoch": 24.56, + "learning_rate": 3.772258388945864e-05, + "loss": 2.2959, + "step": 4958000 + }, + { + "epoch": 24.57, + "learning_rate": 3.772134530303256e-05, + "loss": 2.2656, + "step": 4958500 + }, + { + "epoch": 24.57, + "learning_rate": 3.7720106716606477e-05, + "loss": 2.2685, + "step": 4959000 + }, + { + "epoch": 24.57, + "learning_rate": 3.7718868130180393e-05, + "loss": 2.2871, + "step": 4959500 + }, + { + "epoch": 24.57, + "learning_rate": 3.7717629543754304e-05, + "loss": 2.2768, + "step": 4960000 + }, + { + "epoch": 24.58, + "learning_rate": 3.771639095732822e-05, + "loss": 2.3032, + "step": 4960500 + }, + { + "epoch": 24.58, + "learning_rate": 3.771515237090214e-05, + "loss": 2.2941, + "step": 4961000 + }, + { + "epoch": 24.58, + "learning_rate": 3.7713913784476054e-05, + "loss": 2.2861, + "step": 4961500 + }, + { + "epoch": 24.58, + "learning_rate": 3.771267519804997e-05, + "loss": 2.2827, + "step": 4962000 + }, + { + "epoch": 24.59, + "learning_rate": 3.771143661162389e-05, + "loss": 2.2945, + "step": 4962500 + }, + { + "epoch": 24.59, + "learning_rate": 3.77101980251978e-05, + "loss": 2.292, + "step": 4963000 + }, + { + "epoch": 24.59, + "learning_rate": 3.7708959438771715e-05, + "loss": 2.3007, + "step": 4963500 + }, + { + "epoch": 24.59, + "learning_rate": 3.770772085234563e-05, + "loss": 2.2972, + "step": 4964000 + }, + { + "epoch": 24.6, + "learning_rate": 3.770648226591955e-05, + "loss": 2.2883, + "step": 4964500 + }, + { + "epoch": 24.6, + "learning_rate": 3.7705246156666325e-05, + "loss": 2.3001, + "step": 4965000 + }, + { + "epoch": 24.6, + "learning_rate": 3.7704010047413094e-05, + "loss": 2.2894, + "step": 4965500 + }, + { + "epoch": 24.6, + "learning_rate": 3.770277146098701e-05, + "loss": 2.2897, + "step": 4966000 + }, + { + "epoch": 24.61, + "learning_rate": 3.770153287456093e-05, + "loss": 2.267, + "step": 4966500 + }, + { + "epoch": 24.61, + "learning_rate": 3.770029428813484e-05, + "loss": 2.2917, + "step": 4967000 + }, + { + "epoch": 24.61, + "learning_rate": 3.7699055701708755e-05, + "loss": 2.3047, + "step": 4967500 + }, + { + "epoch": 24.61, + "learning_rate": 3.769781711528267e-05, + "loss": 2.2945, + "step": 4968000 + }, + { + "epoch": 24.62, + "learning_rate": 3.769657852885659e-05, + "loss": 2.2779, + "step": 4968500 + }, + { + "epoch": 24.62, + "learning_rate": 3.769534241960336e-05, + "loss": 2.2889, + "step": 4969000 + }, + { + "epoch": 24.62, + "learning_rate": 3.7694103833177274e-05, + "loss": 2.2736, + "step": 4969500 + }, + { + "epoch": 24.62, + "learning_rate": 3.769286524675119e-05, + "loss": 2.2839, + "step": 4970000 + }, + { + "epoch": 24.63, + "learning_rate": 3.769162666032511e-05, + "loss": 2.2858, + "step": 4970500 + }, + { + "epoch": 24.63, + "learning_rate": 3.7690388073899025e-05, + "loss": 2.2925, + "step": 4971000 + }, + { + "epoch": 24.63, + "learning_rate": 3.768914948747294e-05, + "loss": 2.2977, + "step": 4971500 + }, + { + "epoch": 24.63, + "learning_rate": 3.768791090104685e-05, + "loss": 2.2755, + "step": 4972000 + }, + { + "epoch": 24.64, + "learning_rate": 3.768667231462077e-05, + "loss": 2.2916, + "step": 4972500 + }, + { + "epoch": 24.64, + "learning_rate": 3.7685433728194686e-05, + "loss": 2.2846, + "step": 4973000 + }, + { + "epoch": 24.64, + "learning_rate": 3.76841951417686e-05, + "loss": 2.3258, + "step": 4973500 + }, + { + "epoch": 24.64, + "learning_rate": 3.768295903251537e-05, + "loss": 2.3021, + "step": 4974000 + }, + { + "epoch": 24.65, + "learning_rate": 3.768172044608929e-05, + "loss": 2.29, + "step": 4974500 + }, + { + "epoch": 24.65, + "learning_rate": 3.7680481859663205e-05, + "loss": 2.2926, + "step": 4975000 + }, + { + "epoch": 24.65, + "learning_rate": 3.7679245750409974e-05, + "loss": 2.3141, + "step": 4975500 + }, + { + "epoch": 24.65, + "learning_rate": 3.767800716398389e-05, + "loss": 2.3305, + "step": 4976000 + }, + { + "epoch": 24.66, + "learning_rate": 3.767676857755781e-05, + "loss": 2.298, + "step": 4976500 + }, + { + "epoch": 24.66, + "learning_rate": 3.7675529991131725e-05, + "loss": 2.2815, + "step": 4977000 + }, + { + "epoch": 24.66, + "learning_rate": 3.767429140470564e-05, + "loss": 2.2933, + "step": 4977500 + }, + { + "epoch": 24.66, + "learning_rate": 3.767305281827956e-05, + "loss": 2.2923, + "step": 4978000 + }, + { + "epoch": 24.67, + "learning_rate": 3.767181423185347e-05, + "loss": 2.2987, + "step": 4978500 + }, + { + "epoch": 24.67, + "learning_rate": 3.7670575645427386e-05, + "loss": 2.2733, + "step": 4979000 + }, + { + "epoch": 24.67, + "learning_rate": 3.76693370590013e-05, + "loss": 2.2781, + "step": 4979500 + }, + { + "epoch": 24.67, + "learning_rate": 3.766809847257522e-05, + "loss": 2.2993, + "step": 4980000 + }, + { + "epoch": 24.68, + "learning_rate": 3.766685988614914e-05, + "loss": 2.2971, + "step": 4980500 + }, + { + "epoch": 24.68, + "learning_rate": 3.7665621299723054e-05, + "loss": 2.2793, + "step": 4981000 + }, + { + "epoch": 24.68, + "learning_rate": 3.766438519046982e-05, + "loss": 2.3047, + "step": 4981500 + }, + { + "epoch": 24.68, + "learning_rate": 3.766314908121659e-05, + "loss": 2.2659, + "step": 4982000 + }, + { + "epoch": 24.69, + "learning_rate": 3.766191049479051e-05, + "loss": 2.2947, + "step": 4982500 + }, + { + "epoch": 24.69, + "learning_rate": 3.7660671908364425e-05, + "loss": 2.2981, + "step": 4983000 + }, + { + "epoch": 24.69, + "learning_rate": 3.765943332193834e-05, + "loss": 2.3044, + "step": 4983500 + }, + { + "epoch": 24.69, + "learning_rate": 3.765819473551226e-05, + "loss": 2.2545, + "step": 4984000 + }, + { + "epoch": 24.69, + "learning_rate": 3.765695614908617e-05, + "loss": 2.2985, + "step": 4984500 + }, + { + "epoch": 24.7, + "learning_rate": 3.7655717562660086e-05, + "loss": 2.2902, + "step": 4985000 + }, + { + "epoch": 24.7, + "learning_rate": 3.765448145340686e-05, + "loss": 2.2621, + "step": 4985500 + }, + { + "epoch": 24.7, + "learning_rate": 3.765324286698078e-05, + "loss": 2.2897, + "step": 4986000 + }, + { + "epoch": 24.7, + "learning_rate": 3.7652004280554696e-05, + "loss": 2.2919, + "step": 4986500 + }, + { + "epoch": 24.71, + "learning_rate": 3.7650765694128606e-05, + "loss": 2.2849, + "step": 4987000 + }, + { + "epoch": 24.71, + "learning_rate": 3.764952710770252e-05, + "loss": 2.2823, + "step": 4987500 + }, + { + "epoch": 24.71, + "learning_rate": 3.764828852127644e-05, + "loss": 2.3095, + "step": 4988000 + }, + { + "epoch": 24.71, + "learning_rate": 3.764705241202321e-05, + "loss": 2.3133, + "step": 4988500 + }, + { + "epoch": 24.72, + "learning_rate": 3.7645813825597125e-05, + "loss": 2.2789, + "step": 4989000 + }, + { + "epoch": 24.72, + "learning_rate": 3.764457523917104e-05, + "loss": 2.2912, + "step": 4989500 + }, + { + "epoch": 24.72, + "learning_rate": 3.764333665274496e-05, + "loss": 2.2882, + "step": 4990000 + }, + { + "epoch": 24.72, + "learning_rate": 3.7642098066318876e-05, + "loss": 2.2744, + "step": 4990500 + }, + { + "epoch": 24.73, + "learning_rate": 3.7640859479892786e-05, + "loss": 2.2954, + "step": 4991000 + }, + { + "epoch": 24.73, + "learning_rate": 3.76396208934667e-05, + "loss": 2.2657, + "step": 4991500 + }, + { + "epoch": 24.73, + "learning_rate": 3.763838230704062e-05, + "loss": 2.2872, + "step": 4992000 + }, + { + "epoch": 24.73, + "learning_rate": 3.763714372061454e-05, + "loss": 2.3018, + "step": 4992500 + }, + { + "epoch": 24.74, + "learning_rate": 3.7635905134188454e-05, + "loss": 2.276, + "step": 4993000 + }, + { + "epoch": 24.74, + "learning_rate": 3.763466654776237e-05, + "loss": 2.2895, + "step": 4993500 + }, + { + "epoch": 24.74, + "learning_rate": 3.763342796133629e-05, + "loss": 2.3095, + "step": 4994000 + }, + { + "epoch": 24.74, + "learning_rate": 3.763219185208306e-05, + "loss": 2.2876, + "step": 4994500 + }, + { + "epoch": 24.75, + "learning_rate": 3.7630955742829825e-05, + "loss": 2.3048, + "step": 4995000 + }, + { + "epoch": 24.75, + "learning_rate": 3.762971715640374e-05, + "loss": 2.296, + "step": 4995500 + }, + { + "epoch": 24.75, + "learning_rate": 3.762847856997766e-05, + "loss": 2.2919, + "step": 4996000 + }, + { + "epoch": 24.75, + "learning_rate": 3.7627239983551576e-05, + "loss": 2.2907, + "step": 4996500 + }, + { + "epoch": 24.76, + "learning_rate": 3.7626001397125486e-05, + "loss": 2.3036, + "step": 4997000 + }, + { + "epoch": 24.76, + "learning_rate": 3.76247628106994e-05, + "loss": 2.2775, + "step": 4997500 + }, + { + "epoch": 24.76, + "learning_rate": 3.762352422427332e-05, + "loss": 2.3023, + "step": 4998000 + }, + { + "epoch": 24.76, + "learning_rate": 3.762228563784724e-05, + "loss": 2.2945, + "step": 4998500 + }, + { + "epoch": 24.77, + "learning_rate": 3.762104952859401e-05, + "loss": 2.2875, + "step": 4999000 + }, + { + "epoch": 24.77, + "learning_rate": 3.761981094216793e-05, + "loss": 2.3163, + "step": 4999500 + }, + { + "epoch": 24.77, + "learning_rate": 3.7618572355741847e-05, + "loss": 2.3003, + "step": 5000000 + }, + { + "epoch": 24.77, + "learning_rate": 3.761733376931576e-05, + "loss": 2.2923, + "step": 5000500 + }, + { + "epoch": 24.78, + "learning_rate": 3.7616095182889674e-05, + "loss": 2.3032, + "step": 5001000 + }, + { + "epoch": 24.78, + "learning_rate": 3.761485659646359e-05, + "loss": 2.2881, + "step": 5001500 + }, + { + "epoch": 24.78, + "learning_rate": 3.761361801003751e-05, + "loss": 2.2852, + "step": 5002000 + }, + { + "epoch": 24.78, + "learning_rate": 3.7612379423611424e-05, + "loss": 2.2943, + "step": 5002500 + }, + { + "epoch": 24.79, + "learning_rate": 3.7611140837185335e-05, + "loss": 2.2967, + "step": 5003000 + }, + { + "epoch": 24.79, + "learning_rate": 3.760990225075925e-05, + "loss": 2.3114, + "step": 5003500 + }, + { + "epoch": 24.79, + "learning_rate": 3.760866614150602e-05, + "loss": 2.301, + "step": 5004000 + }, + { + "epoch": 24.79, + "learning_rate": 3.7607430032252796e-05, + "loss": 2.2613, + "step": 5004500 + }, + { + "epoch": 24.8, + "learning_rate": 3.760619144582671e-05, + "loss": 2.2814, + "step": 5005000 + }, + { + "epoch": 24.8, + "learning_rate": 3.760495285940063e-05, + "loss": 2.2881, + "step": 5005500 + }, + { + "epoch": 24.8, + "learning_rate": 3.760371427297455e-05, + "loss": 2.2897, + "step": 5006000 + }, + { + "epoch": 24.8, + "learning_rate": 3.760247568654846e-05, + "loss": 2.2766, + "step": 5006500 + }, + { + "epoch": 24.81, + "learning_rate": 3.7601237100122374e-05, + "loss": 2.2921, + "step": 5007000 + }, + { + "epoch": 24.81, + "learning_rate": 3.759999851369629e-05, + "loss": 2.3077, + "step": 5007500 + }, + { + "epoch": 24.81, + "learning_rate": 3.759875992727021e-05, + "loss": 2.2995, + "step": 5008000 + }, + { + "epoch": 24.81, + "learning_rate": 3.7597521340844125e-05, + "loss": 2.3194, + "step": 5008500 + }, + { + "epoch": 24.82, + "learning_rate": 3.759628275441804e-05, + "loss": 2.2753, + "step": 5009000 + }, + { + "epoch": 24.82, + "learning_rate": 3.759504416799196e-05, + "loss": 2.2873, + "step": 5009500 + }, + { + "epoch": 24.82, + "learning_rate": 3.759380558156587e-05, + "loss": 2.279, + "step": 5010000 + }, + { + "epoch": 24.82, + "learning_rate": 3.759256947231264e-05, + "loss": 2.2831, + "step": 5010500 + }, + { + "epoch": 24.83, + "learning_rate": 3.7591330885886554e-05, + "loss": 2.2996, + "step": 5011000 + }, + { + "epoch": 24.83, + "learning_rate": 3.759009229946047e-05, + "loss": 2.3105, + "step": 5011500 + }, + { + "epoch": 24.83, + "learning_rate": 3.758885371303439e-05, + "loss": 2.3228, + "step": 5012000 + }, + { + "epoch": 24.83, + "learning_rate": 3.7587615126608305e-05, + "loss": 2.3095, + "step": 5012500 + }, + { + "epoch": 24.84, + "learning_rate": 3.7586379017355074e-05, + "loss": 2.2962, + "step": 5013000 + }, + { + "epoch": 24.84, + "learning_rate": 3.758514043092899e-05, + "loss": 2.3076, + "step": 5013500 + }, + { + "epoch": 24.84, + "learning_rate": 3.758390432167576e-05, + "loss": 2.2853, + "step": 5014000 + }, + { + "epoch": 24.84, + "learning_rate": 3.7582665735249677e-05, + "loss": 2.2836, + "step": 5014500 + }, + { + "epoch": 24.85, + "learning_rate": 3.7581427148823594e-05, + "loss": 2.2983, + "step": 5015000 + }, + { + "epoch": 24.85, + "learning_rate": 3.758018856239751e-05, + "loss": 2.2958, + "step": 5015500 + }, + { + "epoch": 24.85, + "learning_rate": 3.757894997597142e-05, + "loss": 2.312, + "step": 5016000 + }, + { + "epoch": 24.85, + "learning_rate": 3.757771138954534e-05, + "loss": 2.2927, + "step": 5016500 + }, + { + "epoch": 24.86, + "learning_rate": 3.7576472803119254e-05, + "loss": 2.3107, + "step": 5017000 + }, + { + "epoch": 24.86, + "learning_rate": 3.757523421669317e-05, + "loss": 2.2668, + "step": 5017500 + }, + { + "epoch": 24.86, + "learning_rate": 3.757399563026709e-05, + "loss": 2.29, + "step": 5018000 + }, + { + "epoch": 24.86, + "learning_rate": 3.7572757043841005e-05, + "loss": 2.2804, + "step": 5018500 + }, + { + "epoch": 24.87, + "learning_rate": 3.757151845741492e-05, + "loss": 2.301, + "step": 5019000 + }, + { + "epoch": 24.87, + "learning_rate": 3.757027987098884e-05, + "loss": 2.2906, + "step": 5019500 + }, + { + "epoch": 24.87, + "learning_rate": 3.756904376173561e-05, + "loss": 2.264, + "step": 5020000 + }, + { + "epoch": 24.87, + "learning_rate": 3.756780765248238e-05, + "loss": 2.3048, + "step": 5020500 + }, + { + "epoch": 24.88, + "learning_rate": 3.7566571543229146e-05, + "loss": 2.2908, + "step": 5021000 + }, + { + "epoch": 24.88, + "learning_rate": 3.756533295680306e-05, + "loss": 2.31, + "step": 5021500 + }, + { + "epoch": 24.88, + "learning_rate": 3.756409437037698e-05, + "loss": 2.2986, + "step": 5022000 + }, + { + "epoch": 24.88, + "learning_rate": 3.7562855783950896e-05, + "loss": 2.2729, + "step": 5022500 + }, + { + "epoch": 24.89, + "learning_rate": 3.756161719752481e-05, + "loss": 2.2964, + "step": 5023000 + }, + { + "epoch": 24.89, + "learning_rate": 3.756038108827158e-05, + "loss": 2.2741, + "step": 5023500 + }, + { + "epoch": 24.89, + "learning_rate": 3.75591425018455e-05, + "loss": 2.315, + "step": 5024000 + }, + { + "epoch": 24.89, + "learning_rate": 3.755790391541941e-05, + "loss": 2.2977, + "step": 5024500 + }, + { + "epoch": 24.9, + "learning_rate": 3.7556665328993326e-05, + "loss": 2.292, + "step": 5025000 + }, + { + "epoch": 24.9, + "learning_rate": 3.755542674256724e-05, + "loss": 2.285, + "step": 5025500 + }, + { + "epoch": 24.9, + "learning_rate": 3.755418815614116e-05, + "loss": 2.3066, + "step": 5026000 + }, + { + "epoch": 24.9, + "learning_rate": 3.755295204688793e-05, + "loss": 2.2951, + "step": 5026500 + }, + { + "epoch": 24.91, + "learning_rate": 3.7551713460461846e-05, + "loss": 2.2866, + "step": 5027000 + }, + { + "epoch": 24.91, + "learning_rate": 3.755047487403576e-05, + "loss": 2.2931, + "step": 5027500 + }, + { + "epoch": 24.91, + "learning_rate": 3.754923628760968e-05, + "loss": 2.2865, + "step": 5028000 + }, + { + "epoch": 24.91, + "learning_rate": 3.7547997701183596e-05, + "loss": 2.2993, + "step": 5028500 + }, + { + "epoch": 24.92, + "learning_rate": 3.7546761591930365e-05, + "loss": 2.3104, + "step": 5029000 + }, + { + "epoch": 24.92, + "learning_rate": 3.754552300550428e-05, + "loss": 2.2981, + "step": 5029500 + }, + { + "epoch": 24.92, + "learning_rate": 3.75442844190782e-05, + "loss": 2.3134, + "step": 5030000 + }, + { + "epoch": 24.92, + "learning_rate": 3.7543045832652116e-05, + "loss": 2.3015, + "step": 5030500 + }, + { + "epoch": 24.93, + "learning_rate": 3.7541807246226026e-05, + "loss": 2.2976, + "step": 5031000 + }, + { + "epoch": 24.93, + "learning_rate": 3.754056865979994e-05, + "loss": 2.3091, + "step": 5031500 + }, + { + "epoch": 24.93, + "learning_rate": 3.753933007337386e-05, + "loss": 2.2988, + "step": 5032000 + }, + { + "epoch": 24.93, + "learning_rate": 3.753809396412063e-05, + "loss": 2.2839, + "step": 5032500 + }, + { + "epoch": 24.94, + "learning_rate": 3.75368578548674e-05, + "loss": 2.297, + "step": 5033000 + }, + { + "epoch": 24.94, + "learning_rate": 3.7535619268441315e-05, + "loss": 2.2998, + "step": 5033500 + }, + { + "epoch": 24.94, + "learning_rate": 3.753438068201523e-05, + "loss": 2.2997, + "step": 5034000 + }, + { + "epoch": 24.94, + "learning_rate": 3.753314209558915e-05, + "loss": 2.2748, + "step": 5034500 + }, + { + "epoch": 24.95, + "learning_rate": 3.7531903509163065e-05, + "loss": 2.3065, + "step": 5035000 + }, + { + "epoch": 24.95, + "learning_rate": 3.753066492273698e-05, + "loss": 2.3051, + "step": 5035500 + }, + { + "epoch": 24.95, + "learning_rate": 3.75294263363109e-05, + "loss": 2.2992, + "step": 5036000 + }, + { + "epoch": 24.95, + "learning_rate": 3.7528187749884816e-05, + "loss": 2.3059, + "step": 5036500 + }, + { + "epoch": 24.96, + "learning_rate": 3.752694916345873e-05, + "loss": 2.2977, + "step": 5037000 + }, + { + "epoch": 24.96, + "learning_rate": 3.752571057703265e-05, + "loss": 2.289, + "step": 5037500 + }, + { + "epoch": 24.96, + "learning_rate": 3.752447199060656e-05, + "loss": 2.2699, + "step": 5038000 + }, + { + "epoch": 24.96, + "learning_rate": 3.752323340418048e-05, + "loss": 2.2974, + "step": 5038500 + }, + { + "epoch": 24.96, + "learning_rate": 3.7521994817754394e-05, + "loss": 2.2897, + "step": 5039000 + }, + { + "epoch": 24.97, + "learning_rate": 3.752075623132831e-05, + "loss": 2.3178, + "step": 5039500 + }, + { + "epoch": 24.97, + "learning_rate": 3.751951764490223e-05, + "loss": 2.3062, + "step": 5040000 + }, + { + "epoch": 24.97, + "learning_rate": 3.7518279058476145e-05, + "loss": 2.3189, + "step": 5040500 + }, + { + "epoch": 24.97, + "learning_rate": 3.7517040472050055e-05, + "loss": 2.299, + "step": 5041000 + }, + { + "epoch": 24.98, + "learning_rate": 3.751580188562397e-05, + "loss": 2.3129, + "step": 5041500 + }, + { + "epoch": 24.98, + "learning_rate": 3.751456329919789e-05, + "loss": 2.3282, + "step": 5042000 + }, + { + "epoch": 24.98, + "learning_rate": 3.7513324712771806e-05, + "loss": 2.2736, + "step": 5042500 + }, + { + "epoch": 24.98, + "learning_rate": 3.751208612634572e-05, + "loss": 2.3151, + "step": 5043000 + }, + { + "epoch": 24.99, + "learning_rate": 3.751085249426535e-05, + "loss": 2.2894, + "step": 5043500 + }, + { + "epoch": 24.99, + "learning_rate": 3.750961390783927e-05, + "loss": 2.3101, + "step": 5044000 + }, + { + "epoch": 24.99, + "learning_rate": 3.750837532141318e-05, + "loss": 2.3174, + "step": 5044500 + }, + { + "epoch": 24.99, + "learning_rate": 3.7507136734987094e-05, + "loss": 2.311, + "step": 5045000 + }, + { + "epoch": 25.0, + "learning_rate": 3.750589814856101e-05, + "loss": 2.3085, + "step": 5045500 + }, + { + "epoch": 25.0, + "learning_rate": 3.750465956213493e-05, + "loss": 2.2712, + "step": 5046000 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.654987755169183, + "eval_accuracy_mlm": 0.6101664086430171, + "eval_accuracy_nsp": 0.8666530697092474, + "eval_loss": 2.337043523788452, + "eval_runtime": 145.9167, + "eval_samples_per_second": 1747.291, + "eval_steps_per_second": 72.809, + "step": 5046075 + }, + { + "epoch": 25.0, + "learning_rate": 3.7503420975708845e-05, + "loss": 2.2487, + "step": 5046500 + }, + { + "epoch": 25.0, + "learning_rate": 3.7502184866455614e-05, + "loss": 2.254, + "step": 5047000 + }, + { + "epoch": 25.01, + "learning_rate": 3.750094628002953e-05, + "loss": 2.2641, + "step": 5047500 + }, + { + "epoch": 25.01, + "learning_rate": 3.74997101707763e-05, + "loss": 2.2432, + "step": 5048000 + }, + { + "epoch": 25.01, + "learning_rate": 3.7498471584350216e-05, + "loss": 2.2432, + "step": 5048500 + }, + { + "epoch": 25.01, + "learning_rate": 3.749723299792413e-05, + "loss": 2.2643, + "step": 5049000 + }, + { + "epoch": 25.02, + "learning_rate": 3.749599441149805e-05, + "loss": 2.2568, + "step": 5049500 + }, + { + "epoch": 25.02, + "learning_rate": 3.749475582507197e-05, + "loss": 2.2875, + "step": 5050000 + }, + { + "epoch": 25.02, + "learning_rate": 3.749351971581873e-05, + "loss": 2.2394, + "step": 5050500 + }, + { + "epoch": 25.02, + "learning_rate": 3.74922836065655e-05, + "loss": 2.2771, + "step": 5051000 + }, + { + "epoch": 25.03, + "learning_rate": 3.7491045020139415e-05, + "loss": 2.3011, + "step": 5051500 + }, + { + "epoch": 25.03, + "learning_rate": 3.748980643371333e-05, + "loss": 2.2375, + "step": 5052000 + }, + { + "epoch": 25.03, + "learning_rate": 3.748856784728725e-05, + "loss": 2.2646, + "step": 5052500 + }, + { + "epoch": 25.03, + "learning_rate": 3.7487329260861166e-05, + "loss": 2.2578, + "step": 5053000 + }, + { + "epoch": 25.04, + "learning_rate": 3.748609067443508e-05, + "loss": 2.2628, + "step": 5053500 + }, + { + "epoch": 25.04, + "learning_rate": 3.7484852088009e-05, + "loss": 2.2763, + "step": 5054000 + }, + { + "epoch": 25.04, + "learning_rate": 3.7483613501582917e-05, + "loss": 2.2344, + "step": 5054500 + }, + { + "epoch": 25.04, + "learning_rate": 3.7482374915156833e-05, + "loss": 2.2395, + "step": 5055000 + }, + { + "epoch": 25.05, + "learning_rate": 3.7481138805903595e-05, + "loss": 2.2747, + "step": 5055500 + }, + { + "epoch": 25.05, + "learning_rate": 3.747990021947751e-05, + "loss": 2.2727, + "step": 5056000 + }, + { + "epoch": 25.05, + "learning_rate": 3.747866411022429e-05, + "loss": 2.2932, + "step": 5056500 + }, + { + "epoch": 25.05, + "learning_rate": 3.7477425523798205e-05, + "loss": 2.2713, + "step": 5057000 + }, + { + "epoch": 25.06, + "learning_rate": 3.7476186937372115e-05, + "loss": 2.2643, + "step": 5057500 + }, + { + "epoch": 25.06, + "learning_rate": 3.747494835094603e-05, + "loss": 2.2898, + "step": 5058000 + }, + { + "epoch": 25.06, + "learning_rate": 3.747370976451995e-05, + "loss": 2.2548, + "step": 5058500 + }, + { + "epoch": 25.06, + "learning_rate": 3.7472471178093866e-05, + "loss": 2.2443, + "step": 5059000 + }, + { + "epoch": 25.07, + "learning_rate": 3.747123259166778e-05, + "loss": 2.2565, + "step": 5059500 + }, + { + "epoch": 25.07, + "learning_rate": 3.74699940052417e-05, + "loss": 2.2639, + "step": 5060000 + }, + { + "epoch": 25.07, + "learning_rate": 3.746875541881562e-05, + "loss": 2.266, + "step": 5060500 + }, + { + "epoch": 25.07, + "learning_rate": 3.7467516832389534e-05, + "loss": 2.2691, + "step": 5061000 + }, + { + "epoch": 25.08, + "learning_rate": 3.746627824596345e-05, + "loss": 2.2544, + "step": 5061500 + }, + { + "epoch": 25.08, + "learning_rate": 3.746504213671021e-05, + "loss": 2.2926, + "step": 5062000 + }, + { + "epoch": 25.08, + "learning_rate": 3.746380355028413e-05, + "loss": 2.2788, + "step": 5062500 + }, + { + "epoch": 25.08, + "learning_rate": 3.7462564963858046e-05, + "loss": 2.2783, + "step": 5063000 + }, + { + "epoch": 25.09, + "learning_rate": 3.7461328854604815e-05, + "loss": 2.279, + "step": 5063500 + }, + { + "epoch": 25.09, + "learning_rate": 3.746009274535159e-05, + "loss": 2.3008, + "step": 5064000 + }, + { + "epoch": 25.09, + "learning_rate": 3.745885415892551e-05, + "loss": 2.2739, + "step": 5064500 + }, + { + "epoch": 25.09, + "learning_rate": 3.7457615572499425e-05, + "loss": 2.2622, + "step": 5065000 + }, + { + "epoch": 25.1, + "learning_rate": 3.745637698607334e-05, + "loss": 2.2654, + "step": 5065500 + }, + { + "epoch": 25.1, + "learning_rate": 3.745513839964725e-05, + "loss": 2.2805, + "step": 5066000 + }, + { + "epoch": 25.1, + "learning_rate": 3.745389981322117e-05, + "loss": 2.2743, + "step": 5066500 + }, + { + "epoch": 25.1, + "learning_rate": 3.7452661226795086e-05, + "loss": 2.2902, + "step": 5067000 + }, + { + "epoch": 25.11, + "learning_rate": 3.7451422640369e-05, + "loss": 2.2832, + "step": 5067500 + }, + { + "epoch": 25.11, + "learning_rate": 3.745018405394292e-05, + "loss": 2.2714, + "step": 5068000 + }, + { + "epoch": 25.11, + "learning_rate": 3.744894546751683e-05, + "loss": 2.2632, + "step": 5068500 + }, + { + "epoch": 25.11, + "learning_rate": 3.7447706881090747e-05, + "loss": 2.2622, + "step": 5069000 + }, + { + "epoch": 25.12, + "learning_rate": 3.7446468294664663e-05, + "loss": 2.2784, + "step": 5069500 + }, + { + "epoch": 25.12, + "learning_rate": 3.744522970823858e-05, + "loss": 2.257, + "step": 5070000 + }, + { + "epoch": 25.12, + "learning_rate": 3.744399607615821e-05, + "loss": 2.2936, + "step": 5070500 + }, + { + "epoch": 25.12, + "learning_rate": 3.7442757489732125e-05, + "loss": 2.2487, + "step": 5071000 + }, + { + "epoch": 25.13, + "learning_rate": 3.744151890330604e-05, + "loss": 2.281, + "step": 5071500 + }, + { + "epoch": 25.13, + "learning_rate": 3.744028031687996e-05, + "loss": 2.2606, + "step": 5072000 + }, + { + "epoch": 25.13, + "learning_rate": 3.743904173045387e-05, + "loss": 2.2643, + "step": 5072500 + }, + { + "epoch": 25.13, + "learning_rate": 3.7437803144027786e-05, + "loss": 2.2806, + "step": 5073000 + }, + { + "epoch": 25.14, + "learning_rate": 3.74365645576017e-05, + "loss": 2.2664, + "step": 5073500 + }, + { + "epoch": 25.14, + "learning_rate": 3.743532844834847e-05, + "loss": 2.261, + "step": 5074000 + }, + { + "epoch": 25.14, + "learning_rate": 3.743408986192239e-05, + "loss": 2.2754, + "step": 5074500 + }, + { + "epoch": 25.14, + "learning_rate": 3.743285375266916e-05, + "loss": 2.2802, + "step": 5075000 + }, + { + "epoch": 25.15, + "learning_rate": 3.7431615166243074e-05, + "loss": 2.2682, + "step": 5075500 + }, + { + "epoch": 25.15, + "learning_rate": 3.743037657981699e-05, + "loss": 2.2538, + "step": 5076000 + }, + { + "epoch": 25.15, + "learning_rate": 3.742913799339091e-05, + "loss": 2.2676, + "step": 5076500 + }, + { + "epoch": 25.15, + "learning_rate": 3.7427899406964825e-05, + "loss": 2.291, + "step": 5077000 + }, + { + "epoch": 25.16, + "learning_rate": 3.742666082053874e-05, + "loss": 2.2751, + "step": 5077500 + }, + { + "epoch": 25.16, + "learning_rate": 3.742542223411266e-05, + "loss": 2.2786, + "step": 5078000 + }, + { + "epoch": 25.16, + "learning_rate": 3.742418612485942e-05, + "loss": 2.2768, + "step": 5078500 + }, + { + "epoch": 25.16, + "learning_rate": 3.742294753843334e-05, + "loss": 2.2702, + "step": 5079000 + }, + { + "epoch": 25.17, + "learning_rate": 3.7421708952007255e-05, + "loss": 2.2698, + "step": 5079500 + }, + { + "epoch": 25.17, + "learning_rate": 3.7420472842754023e-05, + "loss": 2.2873, + "step": 5080000 + }, + { + "epoch": 25.17, + "learning_rate": 3.741923425632794e-05, + "loss": 2.2968, + "step": 5080500 + }, + { + "epoch": 25.17, + "learning_rate": 3.741799566990186e-05, + "loss": 2.2721, + "step": 5081000 + }, + { + "epoch": 25.18, + "learning_rate": 3.7416757083475774e-05, + "loss": 2.2607, + "step": 5081500 + }, + { + "epoch": 25.18, + "learning_rate": 3.741551849704969e-05, + "loss": 2.2801, + "step": 5082000 + }, + { + "epoch": 25.18, + "learning_rate": 3.741427991062361e-05, + "loss": 2.2657, + "step": 5082500 + }, + { + "epoch": 25.18, + "learning_rate": 3.7413041324197525e-05, + "loss": 2.2593, + "step": 5083000 + }, + { + "epoch": 25.19, + "learning_rate": 3.741180273777144e-05, + "loss": 2.2759, + "step": 5083500 + }, + { + "epoch": 25.19, + "learning_rate": 3.741056415134536e-05, + "loss": 2.2836, + "step": 5084000 + }, + { + "epoch": 25.19, + "learning_rate": 3.7409325564919276e-05, + "loss": 2.2825, + "step": 5084500 + }, + { + "epoch": 25.19, + "learning_rate": 3.740808697849319e-05, + "loss": 2.2724, + "step": 5085000 + }, + { + "epoch": 25.2, + "learning_rate": 3.74068483920671e-05, + "loss": 2.268, + "step": 5085500 + }, + { + "epoch": 25.2, + "learning_rate": 3.740560980564102e-05, + "loss": 2.2915, + "step": 5086000 + }, + { + "epoch": 25.2, + "learning_rate": 3.740437121921494e-05, + "loss": 2.2578, + "step": 5086500 + }, + { + "epoch": 25.2, + "learning_rate": 3.7403132632788854e-05, + "loss": 2.2763, + "step": 5087000 + }, + { + "epoch": 25.21, + "learning_rate": 3.740189404636277e-05, + "loss": 2.2847, + "step": 5087500 + }, + { + "epoch": 25.21, + "learning_rate": 3.740065545993669e-05, + "loss": 2.2892, + "step": 5088000 + }, + { + "epoch": 25.21, + "learning_rate": 3.73994168735106e-05, + "loss": 2.2638, + "step": 5088500 + }, + { + "epoch": 25.21, + "learning_rate": 3.7398178287084515e-05, + "loss": 2.2464, + "step": 5089000 + }, + { + "epoch": 25.22, + "learning_rate": 3.739693970065843e-05, + "loss": 2.2865, + "step": 5089500 + }, + { + "epoch": 25.22, + "learning_rate": 3.739570111423235e-05, + "loss": 2.2407, + "step": 5090000 + }, + { + "epoch": 25.22, + "learning_rate": 3.7394462527806265e-05, + "loss": 2.2847, + "step": 5090500 + }, + { + "epoch": 25.22, + "learning_rate": 3.739322394138018e-05, + "loss": 2.2936, + "step": 5091000 + }, + { + "epoch": 25.23, + "learning_rate": 3.73919853549541e-05, + "loss": 2.2751, + "step": 5091500 + }, + { + "epoch": 25.23, + "learning_rate": 3.7390746768528016e-05, + "loss": 2.2734, + "step": 5092000 + }, + { + "epoch": 25.23, + "learning_rate": 3.738951313644764e-05, + "loss": 2.2856, + "step": 5092500 + }, + { + "epoch": 25.23, + "learning_rate": 3.7388274550021554e-05, + "loss": 2.2731, + "step": 5093000 + }, + { + "epoch": 25.23, + "learning_rate": 3.738703596359547e-05, + "loss": 2.2916, + "step": 5093500 + }, + { + "epoch": 25.24, + "learning_rate": 3.738579737716939e-05, + "loss": 2.2879, + "step": 5094000 + }, + { + "epoch": 25.24, + "learning_rate": 3.7384561267916156e-05, + "loss": 2.2828, + "step": 5094500 + }, + { + "epoch": 25.24, + "learning_rate": 3.7383325158662925e-05, + "loss": 2.2666, + "step": 5095000 + }, + { + "epoch": 25.24, + "learning_rate": 3.738208657223684e-05, + "loss": 2.2738, + "step": 5095500 + }, + { + "epoch": 25.25, + "learning_rate": 3.738084798581076e-05, + "loss": 2.2947, + "step": 5096000 + }, + { + "epoch": 25.25, + "learning_rate": 3.7379609399384676e-05, + "loss": 2.2574, + "step": 5096500 + }, + { + "epoch": 25.25, + "learning_rate": 3.737837081295859e-05, + "loss": 2.2668, + "step": 5097000 + }, + { + "epoch": 25.25, + "learning_rate": 3.737713222653251e-05, + "loss": 2.2884, + "step": 5097500 + }, + { + "epoch": 25.26, + "learning_rate": 3.737589364010642e-05, + "loss": 2.2407, + "step": 5098000 + }, + { + "epoch": 25.26, + "learning_rate": 3.737465505368034e-05, + "loss": 2.2656, + "step": 5098500 + }, + { + "epoch": 25.26, + "learning_rate": 3.7373416467254254e-05, + "loss": 2.2799, + "step": 5099000 + }, + { + "epoch": 25.26, + "learning_rate": 3.737217788082817e-05, + "loss": 2.2688, + "step": 5099500 + }, + { + "epoch": 25.27, + "learning_rate": 3.737093929440209e-05, + "loss": 2.284, + "step": 5100000 + }, + { + "epoch": 25.27, + "learning_rate": 3.7369703185148857e-05, + "loss": 2.2631, + "step": 5100500 + }, + { + "epoch": 25.27, + "learning_rate": 3.736846459872277e-05, + "loss": 2.3051, + "step": 5101000 + }, + { + "epoch": 25.27, + "learning_rate": 3.7367226012296684e-05, + "loss": 2.2549, + "step": 5101500 + }, + { + "epoch": 25.28, + "learning_rate": 3.73659874258706e-05, + "loss": 2.2622, + "step": 5102000 + }, + { + "epoch": 25.28, + "learning_rate": 3.736474883944452e-05, + "loss": 2.2823, + "step": 5102500 + }, + { + "epoch": 25.28, + "learning_rate": 3.7363510253018434e-05, + "loss": 2.2786, + "step": 5103000 + }, + { + "epoch": 25.28, + "learning_rate": 3.736227166659235e-05, + "loss": 2.3078, + "step": 5103500 + }, + { + "epoch": 25.29, + "learning_rate": 3.736103803451197e-05, + "loss": 2.3087, + "step": 5104000 + }, + { + "epoch": 25.29, + "learning_rate": 3.735980192525874e-05, + "loss": 2.2929, + "step": 5104500 + }, + { + "epoch": 25.29, + "learning_rate": 3.735856333883266e-05, + "loss": 2.2852, + "step": 5105000 + }, + { + "epoch": 25.29, + "learning_rate": 3.7357324752406575e-05, + "loss": 2.2776, + "step": 5105500 + }, + { + "epoch": 25.3, + "learning_rate": 3.735608616598049e-05, + "loss": 2.2995, + "step": 5106000 + }, + { + "epoch": 25.3, + "learning_rate": 3.735484757955441e-05, + "loss": 2.2672, + "step": 5106500 + }, + { + "epoch": 25.3, + "learning_rate": 3.7353608993128326e-05, + "loss": 2.2895, + "step": 5107000 + }, + { + "epoch": 25.3, + "learning_rate": 3.735237040670224e-05, + "loss": 2.277, + "step": 5107500 + }, + { + "epoch": 25.31, + "learning_rate": 3.735113182027616e-05, + "loss": 2.275, + "step": 5108000 + }, + { + "epoch": 25.31, + "learning_rate": 3.7349893233850076e-05, + "loss": 2.2546, + "step": 5108500 + }, + { + "epoch": 25.31, + "learning_rate": 3.734865464742399e-05, + "loss": 2.295, + "step": 5109000 + }, + { + "epoch": 25.31, + "learning_rate": 3.7347418538170755e-05, + "loss": 2.2752, + "step": 5109500 + }, + { + "epoch": 25.32, + "learning_rate": 3.734617995174467e-05, + "loss": 2.2777, + "step": 5110000 + }, + { + "epoch": 25.32, + "learning_rate": 3.734494136531859e-05, + "loss": 2.2843, + "step": 5110500 + }, + { + "epoch": 25.32, + "learning_rate": 3.7343702778892506e-05, + "loss": 2.2714, + "step": 5111000 + }, + { + "epoch": 25.32, + "learning_rate": 3.734246419246642e-05, + "loss": 2.2732, + "step": 5111500 + }, + { + "epoch": 25.33, + "learning_rate": 3.734122560604034e-05, + "loss": 2.3063, + "step": 5112000 + }, + { + "epoch": 25.33, + "learning_rate": 3.733998701961426e-05, + "loss": 2.2975, + "step": 5112500 + }, + { + "epoch": 25.33, + "learning_rate": 3.7338748433188174e-05, + "loss": 2.291, + "step": 5113000 + }, + { + "epoch": 25.33, + "learning_rate": 3.7337509846762084e-05, + "loss": 2.2564, + "step": 5113500 + }, + { + "epoch": 25.34, + "learning_rate": 3.7336271260336e-05, + "loss": 2.2872, + "step": 5114000 + }, + { + "epoch": 25.34, + "learning_rate": 3.733503267390992e-05, + "loss": 2.2867, + "step": 5114500 + }, + { + "epoch": 25.34, + "learning_rate": 3.7333794087483835e-05, + "loss": 2.243, + "step": 5115000 + }, + { + "epoch": 25.34, + "learning_rate": 3.733255797823061e-05, + "loss": 2.2655, + "step": 5115500 + }, + { + "epoch": 25.35, + "learning_rate": 3.733131939180453e-05, + "loss": 2.2926, + "step": 5116000 + }, + { + "epoch": 25.35, + "learning_rate": 3.733008080537844e-05, + "loss": 2.2715, + "step": 5116500 + }, + { + "epoch": 25.35, + "learning_rate": 3.7328842218952354e-05, + "loss": 2.2757, + "step": 5117000 + }, + { + "epoch": 25.35, + "learning_rate": 3.732760363252627e-05, + "loss": 2.2806, + "step": 5117500 + }, + { + "epoch": 25.36, + "learning_rate": 3.732636752327304e-05, + "loss": 2.2805, + "step": 5118000 + }, + { + "epoch": 25.36, + "learning_rate": 3.732513141401981e-05, + "loss": 2.2542, + "step": 5118500 + }, + { + "epoch": 25.36, + "learning_rate": 3.7323892827593726e-05, + "loss": 2.277, + "step": 5119000 + }, + { + "epoch": 25.36, + "learning_rate": 3.732265424116764e-05, + "loss": 2.2745, + "step": 5119500 + }, + { + "epoch": 25.37, + "learning_rate": 3.732141565474156e-05, + "loss": 2.2858, + "step": 5120000 + }, + { + "epoch": 25.37, + "learning_rate": 3.7320177068315477e-05, + "loss": 2.2666, + "step": 5120500 + }, + { + "epoch": 25.37, + "learning_rate": 3.7318938481889393e-05, + "loss": 2.2903, + "step": 5121000 + }, + { + "epoch": 25.37, + "learning_rate": 3.731769989546331e-05, + "loss": 2.2642, + "step": 5121500 + }, + { + "epoch": 25.38, + "learning_rate": 3.731646130903723e-05, + "loss": 2.2661, + "step": 5122000 + }, + { + "epoch": 25.38, + "learning_rate": 3.7315222722611144e-05, + "loss": 2.2305, + "step": 5122500 + }, + { + "epoch": 25.38, + "learning_rate": 3.7313984136185054e-05, + "loss": 2.2755, + "step": 5123000 + }, + { + "epoch": 25.38, + "learning_rate": 3.731274554975897e-05, + "loss": 2.2832, + "step": 5123500 + }, + { + "epoch": 25.39, + "learning_rate": 3.731150696333289e-05, + "loss": 2.2592, + "step": 5124000 + }, + { + "epoch": 25.39, + "learning_rate": 3.7310268376906805e-05, + "loss": 2.2704, + "step": 5124500 + }, + { + "epoch": 25.39, + "learning_rate": 3.7309032267653574e-05, + "loss": 2.2827, + "step": 5125000 + }, + { + "epoch": 25.39, + "learning_rate": 3.730779368122749e-05, + "loss": 2.2715, + "step": 5125500 + }, + { + "epoch": 25.4, + "learning_rate": 3.73065550948014e-05, + "loss": 2.2773, + "step": 5126000 + }, + { + "epoch": 25.4, + "learning_rate": 3.730531650837532e-05, + "loss": 2.2667, + "step": 5126500 + }, + { + "epoch": 25.4, + "learning_rate": 3.7304080399122094e-05, + "loss": 2.3061, + "step": 5127000 + }, + { + "epoch": 25.4, + "learning_rate": 3.730284428986886e-05, + "loss": 2.2737, + "step": 5127500 + }, + { + "epoch": 25.41, + "learning_rate": 3.730160570344278e-05, + "loss": 2.2715, + "step": 5128000 + }, + { + "epoch": 25.41, + "learning_rate": 3.7300367117016696e-05, + "loss": 2.2898, + "step": 5128500 + }, + { + "epoch": 25.41, + "learning_rate": 3.729912853059061e-05, + "loss": 2.2592, + "step": 5129000 + }, + { + "epoch": 25.41, + "learning_rate": 3.729788994416453e-05, + "loss": 2.2816, + "step": 5129500 + }, + { + "epoch": 25.42, + "learning_rate": 3.729665135773844e-05, + "loss": 2.2692, + "step": 5130000 + }, + { + "epoch": 25.42, + "learning_rate": 3.729541277131236e-05, + "loss": 2.2715, + "step": 5130500 + }, + { + "epoch": 25.42, + "learning_rate": 3.7294174184886274e-05, + "loss": 2.28, + "step": 5131000 + }, + { + "epoch": 25.42, + "learning_rate": 3.729293807563304e-05, + "loss": 2.2883, + "step": 5131500 + }, + { + "epoch": 25.43, + "learning_rate": 3.729169948920696e-05, + "loss": 2.2569, + "step": 5132000 + }, + { + "epoch": 25.43, + "learning_rate": 3.729046337995373e-05, + "loss": 2.2704, + "step": 5132500 + }, + { + "epoch": 25.43, + "learning_rate": 3.7289224793527646e-05, + "loss": 2.279, + "step": 5133000 + }, + { + "epoch": 25.43, + "learning_rate": 3.728798620710156e-05, + "loss": 2.2866, + "step": 5133500 + }, + { + "epoch": 25.44, + "learning_rate": 3.728674762067548e-05, + "loss": 2.2773, + "step": 5134000 + }, + { + "epoch": 25.44, + "learning_rate": 3.7285509034249396e-05, + "loss": 2.3014, + "step": 5134500 + }, + { + "epoch": 25.44, + "learning_rate": 3.728427044782331e-05, + "loss": 2.269, + "step": 5135000 + }, + { + "epoch": 25.44, + "learning_rate": 3.728303186139723e-05, + "loss": 2.2995, + "step": 5135500 + }, + { + "epoch": 25.45, + "learning_rate": 3.728179327497115e-05, + "loss": 2.2661, + "step": 5136000 + }, + { + "epoch": 25.45, + "learning_rate": 3.7280554688545064e-05, + "loss": 2.2761, + "step": 5136500 + }, + { + "epoch": 25.45, + "learning_rate": 3.7279316102118974e-05, + "loss": 2.2874, + "step": 5137000 + }, + { + "epoch": 25.45, + "learning_rate": 3.727807751569289e-05, + "loss": 2.2651, + "step": 5137500 + }, + { + "epoch": 25.46, + "learning_rate": 3.727683892926681e-05, + "loss": 2.2692, + "step": 5138000 + }, + { + "epoch": 25.46, + "learning_rate": 3.7275600342840725e-05, + "loss": 2.2918, + "step": 5138500 + }, + { + "epoch": 25.46, + "learning_rate": 3.7274364233587494e-05, + "loss": 2.288, + "step": 5139000 + }, + { + "epoch": 25.46, + "learning_rate": 3.727312564716141e-05, + "loss": 2.2863, + "step": 5139500 + }, + { + "epoch": 25.47, + "learning_rate": 3.727188706073533e-05, + "loss": 2.2988, + "step": 5140000 + }, + { + "epoch": 25.47, + "learning_rate": 3.7270648474309245e-05, + "loss": 2.2731, + "step": 5140500 + }, + { + "epoch": 25.47, + "learning_rate": 3.726940988788316e-05, + "loss": 2.2773, + "step": 5141000 + }, + { + "epoch": 25.47, + "learning_rate": 3.726817130145707e-05, + "loss": 2.2942, + "step": 5141500 + }, + { + "epoch": 25.48, + "learning_rate": 3.726693519220385e-05, + "loss": 2.2827, + "step": 5142000 + }, + { + "epoch": 25.48, + "learning_rate": 3.7265696605777764e-05, + "loss": 2.2954, + "step": 5142500 + }, + { + "epoch": 25.48, + "learning_rate": 3.726445801935168e-05, + "loss": 2.2909, + "step": 5143000 + }, + { + "epoch": 25.48, + "learning_rate": 3.726322191009844e-05, + "loss": 2.3019, + "step": 5143500 + }, + { + "epoch": 25.49, + "learning_rate": 3.726198332367236e-05, + "loss": 2.2943, + "step": 5144000 + }, + { + "epoch": 25.49, + "learning_rate": 3.726074473724628e-05, + "loss": 2.2736, + "step": 5144500 + }, + { + "epoch": 25.49, + "learning_rate": 3.7259506150820194e-05, + "loss": 2.2876, + "step": 5145000 + }, + { + "epoch": 25.49, + "learning_rate": 3.725826756439411e-05, + "loss": 2.2856, + "step": 5145500 + }, + { + "epoch": 25.5, + "learning_rate": 3.725702897796803e-05, + "loss": 2.272, + "step": 5146000 + }, + { + "epoch": 25.5, + "learning_rate": 3.7255790391541945e-05, + "loss": 2.2921, + "step": 5146500 + }, + { + "epoch": 25.5, + "learning_rate": 3.725455180511586e-05, + "loss": 2.2833, + "step": 5147000 + }, + { + "epoch": 25.5, + "learning_rate": 3.725331321868978e-05, + "loss": 2.2587, + "step": 5147500 + }, + { + "epoch": 25.5, + "learning_rate": 3.725207463226369e-05, + "loss": 2.2883, + "step": 5148000 + }, + { + "epoch": 25.51, + "learning_rate": 3.7250836045837606e-05, + "loss": 2.2676, + "step": 5148500 + }, + { + "epoch": 25.51, + "learning_rate": 3.724959993658438e-05, + "loss": 2.2797, + "step": 5149000 + }, + { + "epoch": 25.51, + "learning_rate": 3.72483613501583e-05, + "loss": 2.2961, + "step": 5149500 + }, + { + "epoch": 25.51, + "learning_rate": 3.724712524090506e-05, + "loss": 2.2984, + "step": 5150000 + }, + { + "epoch": 25.52, + "learning_rate": 3.724588665447898e-05, + "loss": 2.2816, + "step": 5150500 + }, + { + "epoch": 25.52, + "learning_rate": 3.7244648068052894e-05, + "loss": 2.2632, + "step": 5151000 + }, + { + "epoch": 25.52, + "learning_rate": 3.724340948162681e-05, + "loss": 2.2847, + "step": 5151500 + }, + { + "epoch": 25.52, + "learning_rate": 3.724217089520073e-05, + "loss": 2.2749, + "step": 5152000 + }, + { + "epoch": 25.53, + "learning_rate": 3.7240932308774645e-05, + "loss": 2.3042, + "step": 5152500 + }, + { + "epoch": 25.53, + "learning_rate": 3.723969372234856e-05, + "loss": 2.2551, + "step": 5153000 + }, + { + "epoch": 25.53, + "learning_rate": 3.723845513592248e-05, + "loss": 2.2692, + "step": 5153500 + }, + { + "epoch": 25.53, + "learning_rate": 3.723721654949639e-05, + "loss": 2.2742, + "step": 5154000 + }, + { + "epoch": 25.54, + "learning_rate": 3.7235980440243164e-05, + "loss": 2.2607, + "step": 5154500 + }, + { + "epoch": 25.54, + "learning_rate": 3.7234744330989927e-05, + "loss": 2.3025, + "step": 5155000 + }, + { + "epoch": 25.54, + "learning_rate": 3.7233505744563843e-05, + "loss": 2.2741, + "step": 5155500 + }, + { + "epoch": 25.54, + "learning_rate": 3.723226715813776e-05, + "loss": 2.2887, + "step": 5156000 + }, + { + "epoch": 25.55, + "learning_rate": 3.723102857171168e-05, + "loss": 2.2849, + "step": 5156500 + }, + { + "epoch": 25.55, + "learning_rate": 3.7229789985285594e-05, + "loss": 2.2911, + "step": 5157000 + }, + { + "epoch": 25.55, + "learning_rate": 3.722855139885951e-05, + "loss": 2.266, + "step": 5157500 + }, + { + "epoch": 25.55, + "learning_rate": 3.722731281243343e-05, + "loss": 2.2631, + "step": 5158000 + }, + { + "epoch": 25.56, + "learning_rate": 3.7226074226007345e-05, + "loss": 2.2779, + "step": 5158500 + }, + { + "epoch": 25.56, + "learning_rate": 3.722483563958126e-05, + "loss": 2.2959, + "step": 5159000 + }, + { + "epoch": 25.56, + "learning_rate": 3.722359705315518e-05, + "loss": 2.306, + "step": 5159500 + }, + { + "epoch": 25.56, + "learning_rate": 3.7222358466729096e-05, + "loss": 2.2844, + "step": 5160000 + }, + { + "epoch": 25.57, + "learning_rate": 3.7221119880303006e-05, + "loss": 2.2897, + "step": 5160500 + }, + { + "epoch": 25.57, + "learning_rate": 3.721988377104978e-05, + "loss": 2.2778, + "step": 5161000 + }, + { + "epoch": 25.57, + "learning_rate": 3.7218647661796544e-05, + "loss": 2.2916, + "step": 5161500 + }, + { + "epoch": 25.57, + "learning_rate": 3.721740907537046e-05, + "loss": 2.2833, + "step": 5162000 + }, + { + "epoch": 25.58, + "learning_rate": 3.721617048894438e-05, + "loss": 2.2951, + "step": 5162500 + }, + { + "epoch": 25.58, + "learning_rate": 3.7214931902518294e-05, + "loss": 2.273, + "step": 5163000 + }, + { + "epoch": 25.58, + "learning_rate": 3.721369331609221e-05, + "loss": 2.2695, + "step": 5163500 + }, + { + "epoch": 25.58, + "learning_rate": 3.721245720683898e-05, + "loss": 2.2833, + "step": 5164000 + }, + { + "epoch": 25.59, + "learning_rate": 3.72112186204129e-05, + "loss": 2.2957, + "step": 5164500 + }, + { + "epoch": 25.59, + "learning_rate": 3.7209980033986814e-05, + "loss": 2.2693, + "step": 5165000 + }, + { + "epoch": 25.59, + "learning_rate": 3.720874144756073e-05, + "loss": 2.2783, + "step": 5165500 + }, + { + "epoch": 25.59, + "learning_rate": 3.720750286113465e-05, + "loss": 2.2865, + "step": 5166000 + }, + { + "epoch": 25.6, + "learning_rate": 3.7206264274708565e-05, + "loss": 2.2855, + "step": 5166500 + }, + { + "epoch": 25.6, + "learning_rate": 3.720502568828248e-05, + "loss": 2.2972, + "step": 5167000 + }, + { + "epoch": 25.6, + "learning_rate": 3.72037871018564e-05, + "loss": 2.2696, + "step": 5167500 + }, + { + "epoch": 25.6, + "learning_rate": 3.720255099260316e-05, + "loss": 2.2739, + "step": 5168000 + }, + { + "epoch": 25.61, + "learning_rate": 3.720131488334993e-05, + "loss": 2.2988, + "step": 5168500 + }, + { + "epoch": 25.61, + "learning_rate": 3.7200078774096705e-05, + "loss": 2.2543, + "step": 5169000 + }, + { + "epoch": 25.61, + "learning_rate": 3.719884018767062e-05, + "loss": 2.2768, + "step": 5169500 + }, + { + "epoch": 25.61, + "learning_rate": 3.719760160124454e-05, + "loss": 2.2944, + "step": 5170000 + }, + { + "epoch": 25.62, + "learning_rate": 3.719636301481845e-05, + "loss": 2.2929, + "step": 5170500 + }, + { + "epoch": 25.62, + "learning_rate": 3.7195124428392366e-05, + "loss": 2.273, + "step": 5171000 + }, + { + "epoch": 25.62, + "learning_rate": 3.719388584196628e-05, + "loss": 2.2615, + "step": 5171500 + }, + { + "epoch": 25.62, + "learning_rate": 3.71926472555402e-05, + "loss": 2.2707, + "step": 5172000 + }, + { + "epoch": 25.63, + "learning_rate": 3.719140866911412e-05, + "loss": 2.288, + "step": 5172500 + }, + { + "epoch": 25.63, + "learning_rate": 3.7190170082688034e-05, + "loss": 2.2828, + "step": 5173000 + }, + { + "epoch": 25.63, + "learning_rate": 3.718893149626195e-05, + "loss": 2.2858, + "step": 5173500 + }, + { + "epoch": 25.63, + "learning_rate": 3.718769290983586e-05, + "loss": 2.2594, + "step": 5174000 + }, + { + "epoch": 25.64, + "learning_rate": 3.718645432340978e-05, + "loss": 2.2852, + "step": 5174500 + }, + { + "epoch": 25.64, + "learning_rate": 3.7185215736983695e-05, + "loss": 2.2674, + "step": 5175000 + }, + { + "epoch": 25.64, + "learning_rate": 3.718398210490332e-05, + "loss": 2.2819, + "step": 5175500 + }, + { + "epoch": 25.64, + "learning_rate": 3.718274351847724e-05, + "loss": 2.2753, + "step": 5176000 + }, + { + "epoch": 25.65, + "learning_rate": 3.718150493205115e-05, + "loss": 2.2723, + "step": 5176500 + }, + { + "epoch": 25.65, + "learning_rate": 3.7180266345625066e-05, + "loss": 2.2838, + "step": 5177000 + }, + { + "epoch": 25.65, + "learning_rate": 3.717902775919898e-05, + "loss": 2.2957, + "step": 5177500 + }, + { + "epoch": 25.65, + "learning_rate": 3.717779164994575e-05, + "loss": 2.2762, + "step": 5178000 + }, + { + "epoch": 25.66, + "learning_rate": 3.717655306351967e-05, + "loss": 2.2829, + "step": 5178500 + }, + { + "epoch": 25.66, + "learning_rate": 3.7175314477093586e-05, + "loss": 2.2563, + "step": 5179000 + }, + { + "epoch": 25.66, + "learning_rate": 3.71740758906675e-05, + "loss": 2.2887, + "step": 5179500 + }, + { + "epoch": 25.66, + "learning_rate": 3.717283730424141e-05, + "loss": 2.283, + "step": 5180000 + }, + { + "epoch": 25.67, + "learning_rate": 3.717160119498819e-05, + "loss": 2.2762, + "step": 5180500 + }, + { + "epoch": 25.67, + "learning_rate": 3.7170362608562105e-05, + "loss": 2.2929, + "step": 5181000 + }, + { + "epoch": 25.67, + "learning_rate": 3.716912649930887e-05, + "loss": 2.2826, + "step": 5181500 + }, + { + "epoch": 25.67, + "learning_rate": 3.7167887912882784e-05, + "loss": 2.3166, + "step": 5182000 + }, + { + "epoch": 25.68, + "learning_rate": 3.71666493264567e-05, + "loss": 2.2852, + "step": 5182500 + }, + { + "epoch": 25.68, + "learning_rate": 3.716541074003062e-05, + "loss": 2.2891, + "step": 5183000 + }, + { + "epoch": 25.68, + "learning_rate": 3.7164172153604535e-05, + "loss": 2.2888, + "step": 5183500 + }, + { + "epoch": 25.68, + "learning_rate": 3.7162936044351304e-05, + "loss": 2.2808, + "step": 5184000 + }, + { + "epoch": 25.69, + "learning_rate": 3.716169745792522e-05, + "loss": 2.2693, + "step": 5184500 + }, + { + "epoch": 25.69, + "learning_rate": 3.716046134867199e-05, + "loss": 2.2618, + "step": 5185000 + }, + { + "epoch": 25.69, + "learning_rate": 3.7159222762245906e-05, + "loss": 2.2966, + "step": 5185500 + }, + { + "epoch": 25.69, + "learning_rate": 3.7157984175819823e-05, + "loss": 2.2962, + "step": 5186000 + }, + { + "epoch": 25.7, + "learning_rate": 3.715674558939374e-05, + "loss": 2.2637, + "step": 5186500 + }, + { + "epoch": 25.7, + "learning_rate": 3.715550700296766e-05, + "loss": 2.3125, + "step": 5187000 + }, + { + "epoch": 25.7, + "learning_rate": 3.715427089371442e-05, + "loss": 2.2799, + "step": 5187500 + }, + { + "epoch": 25.7, + "learning_rate": 3.7153032307288336e-05, + "loss": 2.3013, + "step": 5188000 + }, + { + "epoch": 25.71, + "learning_rate": 3.715179372086225e-05, + "loss": 2.2987, + "step": 5188500 + }, + { + "epoch": 25.71, + "learning_rate": 3.715055513443617e-05, + "loss": 2.3231, + "step": 5189000 + }, + { + "epoch": 25.71, + "learning_rate": 3.714931654801009e-05, + "loss": 2.2836, + "step": 5189500 + }, + { + "epoch": 25.71, + "learning_rate": 3.7148077961584004e-05, + "loss": 2.2828, + "step": 5190000 + }, + { + "epoch": 25.72, + "learning_rate": 3.7146844329503625e-05, + "loss": 2.2943, + "step": 5190500 + }, + { + "epoch": 25.72, + "learning_rate": 3.714560574307754e-05, + "loss": 2.2569, + "step": 5191000 + }, + { + "epoch": 25.72, + "learning_rate": 3.714436715665146e-05, + "loss": 2.2996, + "step": 5191500 + }, + { + "epoch": 25.72, + "learning_rate": 3.7143128570225375e-05, + "loss": 2.2977, + "step": 5192000 + }, + { + "epoch": 25.73, + "learning_rate": 3.714188998379929e-05, + "loss": 2.2883, + "step": 5192500 + }, + { + "epoch": 25.73, + "learning_rate": 3.71406513973732e-05, + "loss": 2.2551, + "step": 5193000 + }, + { + "epoch": 25.73, + "learning_rate": 3.713941281094712e-05, + "loss": 2.2708, + "step": 5193500 + }, + { + "epoch": 25.73, + "learning_rate": 3.7138174224521036e-05, + "loss": 2.2823, + "step": 5194000 + }, + { + "epoch": 25.74, + "learning_rate": 3.713693563809495e-05, + "loss": 2.274, + "step": 5194500 + }, + { + "epoch": 25.74, + "learning_rate": 3.713569705166887e-05, + "loss": 2.2976, + "step": 5195000 + }, + { + "epoch": 25.74, + "learning_rate": 3.713445846524279e-05, + "loss": 2.2881, + "step": 5195500 + }, + { + "epoch": 25.74, + "learning_rate": 3.7133219878816704e-05, + "loss": 2.2842, + "step": 5196000 + }, + { + "epoch": 25.75, + "learning_rate": 3.713198129239062e-05, + "loss": 2.2757, + "step": 5196500 + }, + { + "epoch": 25.75, + "learning_rate": 3.713074270596454e-05, + "loss": 2.2781, + "step": 5197000 + }, + { + "epoch": 25.75, + "learning_rate": 3.7129504119538455e-05, + "loss": 2.2648, + "step": 5197500 + }, + { + "epoch": 25.75, + "learning_rate": 3.712826553311237e-05, + "loss": 2.2698, + "step": 5198000 + }, + { + "epoch": 25.76, + "learning_rate": 3.712702942385914e-05, + "loss": 2.2989, + "step": 5198500 + }, + { + "epoch": 25.76, + "learning_rate": 3.712579083743306e-05, + "loss": 2.2976, + "step": 5199000 + }, + { + "epoch": 25.76, + "learning_rate": 3.7124552251006974e-05, + "loss": 2.2934, + "step": 5199500 + }, + { + "epoch": 25.76, + "learning_rate": 3.712331366458089e-05, + "loss": 2.2713, + "step": 5200000 + }, + { + "epoch": 25.77, + "learning_rate": 3.7122077555327653e-05, + "loss": 2.3012, + "step": 5200500 + }, + { + "epoch": 25.77, + "learning_rate": 3.712083896890157e-05, + "loss": 2.2833, + "step": 5201000 + }, + { + "epoch": 25.77, + "learning_rate": 3.711960038247549e-05, + "loss": 2.2911, + "step": 5201500 + }, + { + "epoch": 25.77, + "learning_rate": 3.7118361796049404e-05, + "loss": 2.2916, + "step": 5202000 + }, + { + "epoch": 25.77, + "learning_rate": 3.711712320962332e-05, + "loss": 2.2596, + "step": 5202500 + }, + { + "epoch": 25.78, + "learning_rate": 3.711588710037009e-05, + "loss": 2.2809, + "step": 5203000 + }, + { + "epoch": 25.78, + "learning_rate": 3.711464851394401e-05, + "loss": 2.3006, + "step": 5203500 + }, + { + "epoch": 25.78, + "learning_rate": 3.7113409927517924e-05, + "loss": 2.2742, + "step": 5204000 + }, + { + "epoch": 25.78, + "learning_rate": 3.711217134109184e-05, + "loss": 2.2681, + "step": 5204500 + }, + { + "epoch": 25.79, + "learning_rate": 3.711093275466576e-05, + "loss": 2.3087, + "step": 5205000 + }, + { + "epoch": 25.79, + "learning_rate": 3.7109694168239675e-05, + "loss": 2.3044, + "step": 5205500 + }, + { + "epoch": 25.79, + "learning_rate": 3.710845558181359e-05, + "loss": 2.277, + "step": 5206000 + }, + { + "epoch": 25.79, + "learning_rate": 3.710721699538751e-05, + "loss": 2.2873, + "step": 5206500 + }, + { + "epoch": 25.8, + "learning_rate": 3.710598088613427e-05, + "loss": 2.2831, + "step": 5207000 + }, + { + "epoch": 25.8, + "learning_rate": 3.710474229970819e-05, + "loss": 2.2702, + "step": 5207500 + }, + { + "epoch": 25.8, + "learning_rate": 3.710350619045496e-05, + "loss": 2.2777, + "step": 5208000 + }, + { + "epoch": 25.8, + "learning_rate": 3.710226760402887e-05, + "loss": 2.2703, + "step": 5208500 + }, + { + "epoch": 25.81, + "learning_rate": 3.710102901760279e-05, + "loss": 2.2656, + "step": 5209000 + }, + { + "epoch": 25.81, + "learning_rate": 3.709979043117671e-05, + "loss": 2.3058, + "step": 5209500 + }, + { + "epoch": 25.81, + "learning_rate": 3.7098551844750624e-05, + "loss": 2.3062, + "step": 5210000 + }, + { + "epoch": 25.81, + "learning_rate": 3.709731573549739e-05, + "loss": 2.2778, + "step": 5210500 + }, + { + "epoch": 25.82, + "learning_rate": 3.709607714907131e-05, + "loss": 2.2868, + "step": 5211000 + }, + { + "epoch": 25.82, + "learning_rate": 3.7094838562645227e-05, + "loss": 2.2837, + "step": 5211500 + }, + { + "epoch": 25.82, + "learning_rate": 3.709359997621914e-05, + "loss": 2.2617, + "step": 5212000 + }, + { + "epoch": 25.82, + "learning_rate": 3.7092361389793054e-05, + "loss": 2.2789, + "step": 5212500 + }, + { + "epoch": 25.83, + "learning_rate": 3.709112528053983e-05, + "loss": 2.3062, + "step": 5213000 + }, + { + "epoch": 25.83, + "learning_rate": 3.7089886694113746e-05, + "loss": 2.2737, + "step": 5213500 + }, + { + "epoch": 25.83, + "learning_rate": 3.708864810768766e-05, + "loss": 2.2862, + "step": 5214000 + }, + { + "epoch": 25.83, + "learning_rate": 3.708740952126158e-05, + "loss": 2.2864, + "step": 5214500 + }, + { + "epoch": 25.84, + "learning_rate": 3.708617093483549e-05, + "loss": 2.3172, + "step": 5215000 + }, + { + "epoch": 25.84, + "learning_rate": 3.708493234840941e-05, + "loss": 2.2644, + "step": 5215500 + }, + { + "epoch": 25.84, + "learning_rate": 3.7083693761983324e-05, + "loss": 2.2906, + "step": 5216000 + }, + { + "epoch": 25.84, + "learning_rate": 3.708245765273009e-05, + "loss": 2.277, + "step": 5216500 + }, + { + "epoch": 25.85, + "learning_rate": 3.708121906630401e-05, + "loss": 2.2867, + "step": 5217000 + }, + { + "epoch": 25.85, + "learning_rate": 3.707998047987793e-05, + "loss": 2.2931, + "step": 5217500 + }, + { + "epoch": 25.85, + "learning_rate": 3.7078744370624695e-05, + "loss": 2.2907, + "step": 5218000 + }, + { + "epoch": 25.85, + "learning_rate": 3.707750578419861e-05, + "loss": 2.2581, + "step": 5218500 + }, + { + "epoch": 25.86, + "learning_rate": 3.707626719777253e-05, + "loss": 2.3109, + "step": 5219000 + }, + { + "epoch": 25.86, + "learning_rate": 3.7075028611346446e-05, + "loss": 2.293, + "step": 5219500 + }, + { + "epoch": 25.86, + "learning_rate": 3.707379002492036e-05, + "loss": 2.311, + "step": 5220000 + }, + { + "epoch": 25.86, + "learning_rate": 3.707255143849428e-05, + "loss": 2.2732, + "step": 5220500 + }, + { + "epoch": 25.87, + "learning_rate": 3.707131285206819e-05, + "loss": 2.2977, + "step": 5221000 + }, + { + "epoch": 25.87, + "learning_rate": 3.707007426564211e-05, + "loss": 2.2984, + "step": 5221500 + }, + { + "epoch": 25.87, + "learning_rate": 3.706883815638888e-05, + "loss": 2.2718, + "step": 5222000 + }, + { + "epoch": 25.87, + "learning_rate": 3.7067602047135645e-05, + "loss": 2.2732, + "step": 5222500 + }, + { + "epoch": 25.88, + "learning_rate": 3.706636346070956e-05, + "loss": 2.2941, + "step": 5223000 + }, + { + "epoch": 25.88, + "learning_rate": 3.706512487428348e-05, + "loss": 2.2739, + "step": 5223500 + }, + { + "epoch": 25.88, + "learning_rate": 3.7063886287857396e-05, + "loss": 2.265, + "step": 5224000 + }, + { + "epoch": 25.88, + "learning_rate": 3.706264770143131e-05, + "loss": 2.2956, + "step": 5224500 + }, + { + "epoch": 25.89, + "learning_rate": 3.706140911500523e-05, + "loss": 2.2913, + "step": 5225000 + }, + { + "epoch": 25.89, + "learning_rate": 3.7060170528579146e-05, + "loss": 2.2667, + "step": 5225500 + }, + { + "epoch": 25.89, + "learning_rate": 3.705893194215306e-05, + "loss": 2.3244, + "step": 5226000 + }, + { + "epoch": 25.89, + "learning_rate": 3.705769335572698e-05, + "loss": 2.2802, + "step": 5226500 + }, + { + "epoch": 25.9, + "learning_rate": 3.70564547693009e-05, + "loss": 2.2817, + "step": 5227000 + }, + { + "epoch": 25.9, + "learning_rate": 3.705521618287481e-05, + "loss": 2.3061, + "step": 5227500 + }, + { + "epoch": 25.9, + "learning_rate": 3.7053977596448724e-05, + "loss": 2.2814, + "step": 5228000 + }, + { + "epoch": 25.9, + "learning_rate": 3.705273901002264e-05, + "loss": 2.2876, + "step": 5228500 + }, + { + "epoch": 25.91, + "learning_rate": 3.705150042359656e-05, + "loss": 2.2637, + "step": 5229000 + }, + { + "epoch": 25.91, + "learning_rate": 3.7050261837170475e-05, + "loss": 2.2767, + "step": 5229500 + }, + { + "epoch": 25.91, + "learning_rate": 3.704902325074439e-05, + "loss": 2.2874, + "step": 5230000 + }, + { + "epoch": 25.91, + "learning_rate": 3.704778466431831e-05, + "loss": 2.2886, + "step": 5230500 + }, + { + "epoch": 25.92, + "learning_rate": 3.7046546077892226e-05, + "loss": 2.2975, + "step": 5231000 + }, + { + "epoch": 25.92, + "learning_rate": 3.704530749146614e-05, + "loss": 2.2706, + "step": 5231500 + }, + { + "epoch": 25.92, + "learning_rate": 3.7044071382212905e-05, + "loss": 2.2877, + "step": 5232000 + }, + { + "epoch": 25.92, + "learning_rate": 3.704283279578682e-05, + "loss": 2.3207, + "step": 5232500 + }, + { + "epoch": 25.93, + "learning_rate": 3.704159420936074e-05, + "loss": 2.2841, + "step": 5233000 + }, + { + "epoch": 25.93, + "learning_rate": 3.7040355622934656e-05, + "loss": 2.2769, + "step": 5233500 + }, + { + "epoch": 25.93, + "learning_rate": 3.7039119513681424e-05, + "loss": 2.2918, + "step": 5234000 + }, + { + "epoch": 25.93, + "learning_rate": 3.703788092725534e-05, + "loss": 2.288, + "step": 5234500 + }, + { + "epoch": 25.94, + "learning_rate": 3.703664234082926e-05, + "loss": 2.2764, + "step": 5235000 + }, + { + "epoch": 25.94, + "learning_rate": 3.7035403754403175e-05, + "loss": 2.283, + "step": 5235500 + }, + { + "epoch": 25.94, + "learning_rate": 3.703416764514995e-05, + "loss": 2.2929, + "step": 5236000 + }, + { + "epoch": 25.94, + "learning_rate": 3.703292905872386e-05, + "loss": 2.2911, + "step": 5236500 + }, + { + "epoch": 25.95, + "learning_rate": 3.703169047229778e-05, + "loss": 2.2815, + "step": 5237000 + }, + { + "epoch": 25.95, + "learning_rate": 3.7030451885871695e-05, + "loss": 2.3038, + "step": 5237500 + }, + { + "epoch": 25.95, + "learning_rate": 3.702921329944561e-05, + "loss": 2.2901, + "step": 5238000 + }, + { + "epoch": 25.95, + "learning_rate": 3.702797471301953e-05, + "loss": 2.3041, + "step": 5238500 + }, + { + "epoch": 25.96, + "learning_rate": 3.702673612659344e-05, + "loss": 2.2909, + "step": 5239000 + }, + { + "epoch": 25.96, + "learning_rate": 3.7025497540167356e-05, + "loss": 2.3005, + "step": 5239500 + }, + { + "epoch": 25.96, + "learning_rate": 3.7024261430914125e-05, + "loss": 2.3043, + "step": 5240000 + }, + { + "epoch": 25.96, + "learning_rate": 3.702302284448804e-05, + "loss": 2.3069, + "step": 5240500 + }, + { + "epoch": 25.97, + "learning_rate": 3.702178425806196e-05, + "loss": 2.2887, + "step": 5241000 + }, + { + "epoch": 25.97, + "learning_rate": 3.7020545671635875e-05, + "loss": 2.2661, + "step": 5241500 + }, + { + "epoch": 25.97, + "learning_rate": 3.701930708520979e-05, + "loss": 2.2777, + "step": 5242000 + }, + { + "epoch": 25.97, + "learning_rate": 3.701806849878371e-05, + "loss": 2.2992, + "step": 5242500 + }, + { + "epoch": 25.98, + "learning_rate": 3.701683238953048e-05, + "loss": 2.2748, + "step": 5243000 + }, + { + "epoch": 25.98, + "learning_rate": 3.701559628027725e-05, + "loss": 2.2756, + "step": 5243500 + }, + { + "epoch": 25.98, + "learning_rate": 3.7014357693851164e-05, + "loss": 2.2784, + "step": 5244000 + }, + { + "epoch": 25.98, + "learning_rate": 3.701311910742508e-05, + "loss": 2.307, + "step": 5244500 + }, + { + "epoch": 25.99, + "learning_rate": 3.7011880520999e-05, + "loss": 2.2752, + "step": 5245000 + }, + { + "epoch": 25.99, + "learning_rate": 3.7010641934572914e-05, + "loss": 2.2948, + "step": 5245500 + }, + { + "epoch": 25.99, + "learning_rate": 3.7009403348146825e-05, + "loss": 2.3109, + "step": 5246000 + }, + { + "epoch": 25.99, + "learning_rate": 3.700816476172074e-05, + "loss": 2.2798, + "step": 5246500 + }, + { + "epoch": 26.0, + "learning_rate": 3.700692617529466e-05, + "loss": 2.3082, + "step": 5247000 + }, + { + "epoch": 26.0, + "learning_rate": 3.7005687588868575e-05, + "loss": 2.3089, + "step": 5247500 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.6544683759370017, + "eval_accuracy_mlm": 0.6095992095473112, + "eval_accuracy_nsp": 0.8660647398209124, + "eval_loss": 2.3457705974578857, + "eval_runtime": 146.2347, + "eval_samples_per_second": 1743.491, + "eval_steps_per_second": 72.65, + "step": 5247918 + }, + { + "epoch": 26.0, + "learning_rate": 3.700444900244249e-05, + "loss": 2.275, + "step": 5248000 + }, + { + "epoch": 26.0, + "learning_rate": 3.700321041601641e-05, + "loss": 2.2462, + "step": 5248500 + }, + { + "epoch": 26.01, + "learning_rate": 3.700197430676318e-05, + "loss": 2.2828, + "step": 5249000 + }, + { + "epoch": 26.01, + "learning_rate": 3.7000735720337095e-05, + "loss": 2.2497, + "step": 5249500 + }, + { + "epoch": 26.01, + "learning_rate": 3.699949713391101e-05, + "loss": 2.2561, + "step": 5250000 + }, + { + "epoch": 26.01, + "learning_rate": 3.699825854748493e-05, + "loss": 2.2692, + "step": 5250500 + }, + { + "epoch": 26.02, + "learning_rate": 3.6997019961058846e-05, + "loss": 2.2704, + "step": 5251000 + }, + { + "epoch": 26.02, + "learning_rate": 3.699578137463276e-05, + "loss": 2.2654, + "step": 5251500 + }, + { + "epoch": 26.02, + "learning_rate": 3.699454278820668e-05, + "loss": 2.2692, + "step": 5252000 + }, + { + "epoch": 26.02, + "learning_rate": 3.699330420178059e-05, + "loss": 2.2392, + "step": 5252500 + }, + { + "epoch": 26.03, + "learning_rate": 3.699206561535451e-05, + "loss": 2.2405, + "step": 5253000 + }, + { + "epoch": 26.03, + "learning_rate": 3.6990831983274134e-05, + "loss": 2.2633, + "step": 5253500 + }, + { + "epoch": 26.03, + "learning_rate": 3.6989595874020896e-05, + "loss": 2.2523, + "step": 5254000 + }, + { + "epoch": 26.03, + "learning_rate": 3.698835728759481e-05, + "loss": 2.2422, + "step": 5254500 + }, + { + "epoch": 26.04, + "learning_rate": 3.698711870116873e-05, + "loss": 2.2852, + "step": 5255000 + }, + { + "epoch": 26.04, + "learning_rate": 3.698588011474265e-05, + "loss": 2.2703, + "step": 5255500 + }, + { + "epoch": 26.04, + "learning_rate": 3.6984641528316564e-05, + "loss": 2.2614, + "step": 5256000 + }, + { + "epoch": 26.04, + "learning_rate": 3.698340541906333e-05, + "loss": 2.2694, + "step": 5256500 + }, + { + "epoch": 26.04, + "learning_rate": 3.698216683263725e-05, + "loss": 2.2442, + "step": 5257000 + }, + { + "epoch": 26.05, + "learning_rate": 3.698092824621117e-05, + "loss": 2.263, + "step": 5257500 + }, + { + "epoch": 26.05, + "learning_rate": 3.6979689659785084e-05, + "loss": 2.2511, + "step": 5258000 + }, + { + "epoch": 26.05, + "learning_rate": 3.6978451073359e-05, + "loss": 2.2405, + "step": 5258500 + }, + { + "epoch": 26.05, + "learning_rate": 3.697721248693292e-05, + "loss": 2.2604, + "step": 5259000 + }, + { + "epoch": 26.06, + "learning_rate": 3.6975973900506834e-05, + "loss": 2.2556, + "step": 5259500 + }, + { + "epoch": 26.06, + "learning_rate": 3.697473531408075e-05, + "loss": 2.2919, + "step": 5260000 + }, + { + "epoch": 26.06, + "learning_rate": 3.697349672765467e-05, + "loss": 2.2512, + "step": 5260500 + }, + { + "epoch": 26.06, + "learning_rate": 3.6972258141228585e-05, + "loss": 2.2531, + "step": 5261000 + }, + { + "epoch": 26.07, + "learning_rate": 3.697102203197535e-05, + "loss": 2.2625, + "step": 5261500 + }, + { + "epoch": 26.07, + "learning_rate": 3.6969783445549264e-05, + "loss": 2.2725, + "step": 5262000 + }, + { + "epoch": 26.07, + "learning_rate": 3.696854733629603e-05, + "loss": 2.2693, + "step": 5262500 + }, + { + "epoch": 26.07, + "learning_rate": 3.696730874986995e-05, + "loss": 2.2746, + "step": 5263000 + }, + { + "epoch": 26.08, + "learning_rate": 3.696607016344387e-05, + "loss": 2.2666, + "step": 5263500 + }, + { + "epoch": 26.08, + "learning_rate": 3.6964831577017784e-05, + "loss": 2.271, + "step": 5264000 + }, + { + "epoch": 26.08, + "learning_rate": 3.69635929905917e-05, + "loss": 2.2448, + "step": 5264500 + }, + { + "epoch": 26.08, + "learning_rate": 3.696235440416562e-05, + "loss": 2.283, + "step": 5265000 + }, + { + "epoch": 26.09, + "learning_rate": 3.6961118294912386e-05, + "loss": 2.2575, + "step": 5265500 + }, + { + "epoch": 26.09, + "learning_rate": 3.695988218565915e-05, + "loss": 2.2743, + "step": 5266000 + }, + { + "epoch": 26.09, + "learning_rate": 3.6958643599233065e-05, + "loss": 2.2608, + "step": 5266500 + }, + { + "epoch": 26.09, + "learning_rate": 3.695740501280698e-05, + "loss": 2.2905, + "step": 5267000 + }, + { + "epoch": 26.1, + "learning_rate": 3.69561664263809e-05, + "loss": 2.2947, + "step": 5267500 + }, + { + "epoch": 26.1, + "learning_rate": 3.6954927839954816e-05, + "loss": 2.2509, + "step": 5268000 + }, + { + "epoch": 26.1, + "learning_rate": 3.695368925352873e-05, + "loss": 2.2685, + "step": 5268500 + }, + { + "epoch": 26.1, + "learning_rate": 3.695245066710265e-05, + "loss": 2.2794, + "step": 5269000 + }, + { + "epoch": 26.11, + "learning_rate": 3.695121208067657e-05, + "loss": 2.2516, + "step": 5269500 + }, + { + "epoch": 26.11, + "learning_rate": 3.6949973494250484e-05, + "loss": 2.2744, + "step": 5270000 + }, + { + "epoch": 26.11, + "learning_rate": 3.69487349078244e-05, + "loss": 2.2572, + "step": 5270500 + }, + { + "epoch": 26.11, + "learning_rate": 3.694749632139832e-05, + "loss": 2.2396, + "step": 5271000 + }, + { + "epoch": 26.12, + "learning_rate": 3.6946257734972235e-05, + "loss": 2.2823, + "step": 5271500 + }, + { + "epoch": 26.12, + "learning_rate": 3.694501914854615e-05, + "loss": 2.2637, + "step": 5272000 + }, + { + "epoch": 26.12, + "learning_rate": 3.694378056212007e-05, + "loss": 2.2612, + "step": 5272500 + }, + { + "epoch": 26.12, + "learning_rate": 3.6942541975693985e-05, + "loss": 2.2399, + "step": 5273000 + }, + { + "epoch": 26.13, + "learning_rate": 3.69413033892679e-05, + "loss": 2.2862, + "step": 5273500 + }, + { + "epoch": 26.13, + "learning_rate": 3.6940067280014664e-05, + "loss": 2.284, + "step": 5274000 + }, + { + "epoch": 26.13, + "learning_rate": 3.693882869358858e-05, + "loss": 2.2642, + "step": 5274500 + }, + { + "epoch": 26.13, + "learning_rate": 3.69375901071625e-05, + "loss": 2.2629, + "step": 5275000 + }, + { + "epoch": 26.14, + "learning_rate": 3.6936351520736415e-05, + "loss": 2.2659, + "step": 5275500 + }, + { + "epoch": 26.14, + "learning_rate": 3.693511293431033e-05, + "loss": 2.2365, + "step": 5276000 + }, + { + "epoch": 26.14, + "learning_rate": 3.693387434788425e-05, + "loss": 2.2769, + "step": 5276500 + }, + { + "epoch": 26.14, + "learning_rate": 3.693263823863102e-05, + "loss": 2.2433, + "step": 5277000 + }, + { + "epoch": 26.15, + "learning_rate": 3.6931399652204935e-05, + "loss": 2.2665, + "step": 5277500 + }, + { + "epoch": 26.15, + "learning_rate": 3.693016106577885e-05, + "loss": 2.2601, + "step": 5278000 + }, + { + "epoch": 26.15, + "learning_rate": 3.692892247935277e-05, + "loss": 2.3155, + "step": 5278500 + }, + { + "epoch": 26.15, + "learning_rate": 3.6927683892926685e-05, + "loss": 2.2518, + "step": 5279000 + }, + { + "epoch": 26.16, + "learning_rate": 3.69264453065006e-05, + "loss": 2.2622, + "step": 5279500 + }, + { + "epoch": 26.16, + "learning_rate": 3.692520672007452e-05, + "loss": 2.2623, + "step": 5280000 + }, + { + "epoch": 26.16, + "learning_rate": 3.692397061082128e-05, + "loss": 2.2732, + "step": 5280500 + }, + { + "epoch": 26.16, + "learning_rate": 3.69227320243952e-05, + "loss": 2.2425, + "step": 5281000 + }, + { + "epoch": 26.17, + "learning_rate": 3.6921493437969115e-05, + "loss": 2.2449, + "step": 5281500 + }, + { + "epoch": 26.17, + "learning_rate": 3.692025485154303e-05, + "loss": 2.2487, + "step": 5282000 + }, + { + "epoch": 26.17, + "learning_rate": 3.691901626511695e-05, + "loss": 2.2494, + "step": 5282500 + }, + { + "epoch": 26.17, + "learning_rate": 3.6917777678690866e-05, + "loss": 2.2683, + "step": 5283000 + }, + { + "epoch": 26.18, + "learning_rate": 3.6916539092264776e-05, + "loss": 2.2414, + "step": 5283500 + }, + { + "epoch": 26.18, + "learning_rate": 3.691530050583869e-05, + "loss": 2.2555, + "step": 5284000 + }, + { + "epoch": 26.18, + "learning_rate": 3.691406439658547e-05, + "loss": 2.246, + "step": 5284500 + }, + { + "epoch": 26.18, + "learning_rate": 3.6912825810159386e-05, + "loss": 2.2715, + "step": 5285000 + }, + { + "epoch": 26.19, + "learning_rate": 3.69115872237333e-05, + "loss": 2.2586, + "step": 5285500 + }, + { + "epoch": 26.19, + "learning_rate": 3.691034863730722e-05, + "loss": 2.2493, + "step": 5286000 + }, + { + "epoch": 26.19, + "learning_rate": 3.690911500522683e-05, + "loss": 2.2628, + "step": 5286500 + }, + { + "epoch": 26.19, + "learning_rate": 3.690787641880075e-05, + "loss": 2.2646, + "step": 5287000 + }, + { + "epoch": 26.2, + "learning_rate": 3.690663783237467e-05, + "loss": 2.245, + "step": 5287500 + }, + { + "epoch": 26.2, + "learning_rate": 3.6905399245948584e-05, + "loss": 2.2657, + "step": 5288000 + }, + { + "epoch": 26.2, + "learning_rate": 3.69041606595225e-05, + "loss": 2.2547, + "step": 5288500 + }, + { + "epoch": 26.2, + "learning_rate": 3.690292207309642e-05, + "loss": 2.2863, + "step": 5289000 + }, + { + "epoch": 26.21, + "learning_rate": 3.6901683486670335e-05, + "loss": 2.2627, + "step": 5289500 + }, + { + "epoch": 26.21, + "learning_rate": 3.690044490024425e-05, + "loss": 2.2421, + "step": 5290000 + }, + { + "epoch": 26.21, + "learning_rate": 3.689920631381817e-05, + "loss": 2.2575, + "step": 5290500 + }, + { + "epoch": 26.21, + "learning_rate": 3.6897967727392086e-05, + "loss": 2.2554, + "step": 5291000 + }, + { + "epoch": 26.22, + "learning_rate": 3.6896729140966e-05, + "loss": 2.2682, + "step": 5291500 + }, + { + "epoch": 26.22, + "learning_rate": 3.689549055453992e-05, + "loss": 2.257, + "step": 5292000 + }, + { + "epoch": 26.22, + "learning_rate": 3.6894256922459534e-05, + "loss": 2.2817, + "step": 5292500 + }, + { + "epoch": 26.22, + "learning_rate": 3.689301833603345e-05, + "loss": 2.2867, + "step": 5293000 + }, + { + "epoch": 26.23, + "learning_rate": 3.689177974960737e-05, + "loss": 2.2552, + "step": 5293500 + }, + { + "epoch": 26.23, + "learning_rate": 3.6890541163181284e-05, + "loss": 2.2692, + "step": 5294000 + }, + { + "epoch": 26.23, + "learning_rate": 3.68893025767552e-05, + "loss": 2.2514, + "step": 5294500 + }, + { + "epoch": 26.23, + "learning_rate": 3.688806399032912e-05, + "loss": 2.2587, + "step": 5295000 + }, + { + "epoch": 26.24, + "learning_rate": 3.6886825403903035e-05, + "loss": 2.2817, + "step": 5295500 + }, + { + "epoch": 26.24, + "learning_rate": 3.688558681747695e-05, + "loss": 2.2631, + "step": 5296000 + }, + { + "epoch": 26.24, + "learning_rate": 3.688434823105087e-05, + "loss": 2.2627, + "step": 5296500 + }, + { + "epoch": 26.24, + "learning_rate": 3.688311212179764e-05, + "loss": 2.2805, + "step": 5297000 + }, + { + "epoch": 26.25, + "learning_rate": 3.6881873535371555e-05, + "loss": 2.2687, + "step": 5297500 + }, + { + "epoch": 26.25, + "learning_rate": 3.688063494894547e-05, + "loss": 2.2834, + "step": 5298000 + }, + { + "epoch": 26.25, + "learning_rate": 3.687939636251939e-05, + "loss": 2.2436, + "step": 5298500 + }, + { + "epoch": 26.25, + "learning_rate": 3.687816025326615e-05, + "loss": 2.2389, + "step": 5299000 + }, + { + "epoch": 26.26, + "learning_rate": 3.687692166684007e-05, + "loss": 2.2489, + "step": 5299500 + }, + { + "epoch": 26.26, + "learning_rate": 3.6875683080413984e-05, + "loss": 2.2658, + "step": 5300000 + }, + { + "epoch": 26.26, + "learning_rate": 3.68744444939879e-05, + "loss": 2.2578, + "step": 5300500 + }, + { + "epoch": 26.26, + "learning_rate": 3.687320590756182e-05, + "loss": 2.2667, + "step": 5301000 + }, + { + "epoch": 26.27, + "learning_rate": 3.6871967321135735e-05, + "loss": 2.2481, + "step": 5301500 + }, + { + "epoch": 26.27, + "learning_rate": 3.687072873470965e-05, + "loss": 2.2475, + "step": 5302000 + }, + { + "epoch": 26.27, + "learning_rate": 3.686949014828357e-05, + "loss": 2.2658, + "step": 5302500 + }, + { + "epoch": 26.27, + "learning_rate": 3.6868251561857486e-05, + "loss": 2.2964, + "step": 5303000 + }, + { + "epoch": 26.28, + "learning_rate": 3.68670129754314e-05, + "loss": 2.2901, + "step": 5303500 + }, + { + "epoch": 26.28, + "learning_rate": 3.686577438900532e-05, + "loss": 2.2668, + "step": 5304000 + }, + { + "epoch": 26.28, + "learning_rate": 3.686453827975209e-05, + "loss": 2.289, + "step": 5304500 + }, + { + "epoch": 26.28, + "learning_rate": 3.6863299693326006e-05, + "loss": 2.2806, + "step": 5305000 + }, + { + "epoch": 26.29, + "learning_rate": 3.686206110689992e-05, + "loss": 2.2532, + "step": 5305500 + }, + { + "epoch": 26.29, + "learning_rate": 3.686082252047384e-05, + "loss": 2.2487, + "step": 5306000 + }, + { + "epoch": 26.29, + "learning_rate": 3.6859583934047756e-05, + "loss": 2.2815, + "step": 5306500 + }, + { + "epoch": 26.29, + "learning_rate": 3.685834534762167e-05, + "loss": 2.2663, + "step": 5307000 + }, + { + "epoch": 26.3, + "learning_rate": 3.6857109238368435e-05, + "loss": 2.2659, + "step": 5307500 + }, + { + "epoch": 26.3, + "learning_rate": 3.685587065194235e-05, + "loss": 2.2551, + "step": 5308000 + }, + { + "epoch": 26.3, + "learning_rate": 3.685463206551627e-05, + "loss": 2.2631, + "step": 5308500 + }, + { + "epoch": 26.3, + "learning_rate": 3.6853393479090186e-05, + "loss": 2.2611, + "step": 5309000 + }, + { + "epoch": 26.31, + "learning_rate": 3.68521548926641e-05, + "loss": 2.2766, + "step": 5309500 + }, + { + "epoch": 26.31, + "learning_rate": 3.685091630623802e-05, + "loss": 2.2725, + "step": 5310000 + }, + { + "epoch": 26.31, + "learning_rate": 3.684967771981194e-05, + "loss": 2.2788, + "step": 5310500 + }, + { + "epoch": 26.31, + "learning_rate": 3.6848439133385854e-05, + "loss": 2.2321, + "step": 5311000 + }, + { + "epoch": 26.32, + "learning_rate": 3.684720302413262e-05, + "loss": 2.2733, + "step": 5311500 + }, + { + "epoch": 26.32, + "learning_rate": 3.684596443770654e-05, + "loss": 2.2867, + "step": 5312000 + }, + { + "epoch": 26.32, + "learning_rate": 3.6844725851280456e-05, + "loss": 2.2839, + "step": 5312500 + }, + { + "epoch": 26.32, + "learning_rate": 3.6843487264854373e-05, + "loss": 2.2721, + "step": 5313000 + }, + { + "epoch": 26.32, + "learning_rate": 3.6842251155601135e-05, + "loss": 2.2814, + "step": 5313500 + }, + { + "epoch": 26.33, + "learning_rate": 3.6841015046347904e-05, + "loss": 2.2994, + "step": 5314000 + }, + { + "epoch": 26.33, + "learning_rate": 3.683977645992182e-05, + "loss": 2.2346, + "step": 5314500 + }, + { + "epoch": 26.33, + "learning_rate": 3.683853787349574e-05, + "loss": 2.272, + "step": 5315000 + }, + { + "epoch": 26.33, + "learning_rate": 3.6837299287069655e-05, + "loss": 2.2787, + "step": 5315500 + }, + { + "epoch": 26.34, + "learning_rate": 3.683606070064357e-05, + "loss": 2.2815, + "step": 5316000 + }, + { + "epoch": 26.34, + "learning_rate": 3.683482211421749e-05, + "loss": 2.2951, + "step": 5316500 + }, + { + "epoch": 26.34, + "learning_rate": 3.6833583527791406e-05, + "loss": 2.2583, + "step": 5317000 + }, + { + "epoch": 26.34, + "learning_rate": 3.683234741853817e-05, + "loss": 2.2752, + "step": 5317500 + }, + { + "epoch": 26.35, + "learning_rate": 3.6831111309284943e-05, + "loss": 2.245, + "step": 5318000 + }, + { + "epoch": 26.35, + "learning_rate": 3.682987520003171e-05, + "loss": 2.2642, + "step": 5318500 + }, + { + "epoch": 26.35, + "learning_rate": 3.6828639090778474e-05, + "loss": 2.2769, + "step": 5319000 + }, + { + "epoch": 26.35, + "learning_rate": 3.682740050435239e-05, + "loss": 2.2546, + "step": 5319500 + }, + { + "epoch": 26.36, + "learning_rate": 3.682616191792631e-05, + "loss": 2.2726, + "step": 5320000 + }, + { + "epoch": 26.36, + "learning_rate": 3.6824923331500225e-05, + "loss": 2.247, + "step": 5320500 + }, + { + "epoch": 26.36, + "learning_rate": 3.682368474507414e-05, + "loss": 2.2547, + "step": 5321000 + }, + { + "epoch": 26.36, + "learning_rate": 3.682244615864806e-05, + "loss": 2.2627, + "step": 5321500 + }, + { + "epoch": 26.37, + "learning_rate": 3.6821207572221976e-05, + "loss": 2.2813, + "step": 5322000 + }, + { + "epoch": 26.37, + "learning_rate": 3.681996898579589e-05, + "loss": 2.2566, + "step": 5322500 + }, + { + "epoch": 26.37, + "learning_rate": 3.681873039936981e-05, + "loss": 2.2719, + "step": 5323000 + }, + { + "epoch": 26.37, + "learning_rate": 3.681749181294373e-05, + "loss": 2.2736, + "step": 5323500 + }, + { + "epoch": 26.38, + "learning_rate": 3.6816253226517644e-05, + "loss": 2.2618, + "step": 5324000 + }, + { + "epoch": 26.38, + "learning_rate": 3.681501464009156e-05, + "loss": 2.2542, + "step": 5324500 + }, + { + "epoch": 26.38, + "learning_rate": 3.681377605366547e-05, + "loss": 2.262, + "step": 5325000 + }, + { + "epoch": 26.38, + "learning_rate": 3.6812539944412246e-05, + "loss": 2.2745, + "step": 5325500 + }, + { + "epoch": 26.39, + "learning_rate": 3.681130135798616e-05, + "loss": 2.2565, + "step": 5326000 + }, + { + "epoch": 26.39, + "learning_rate": 3.681006277156008e-05, + "loss": 2.2563, + "step": 5326500 + }, + { + "epoch": 26.39, + "learning_rate": 3.6808824185134e-05, + "loss": 2.2395, + "step": 5327000 + }, + { + "epoch": 26.39, + "learning_rate": 3.6807585598707914e-05, + "loss": 2.2644, + "step": 5327500 + }, + { + "epoch": 26.4, + "learning_rate": 3.6806347012281824e-05, + "loss": 2.2437, + "step": 5328000 + }, + { + "epoch": 26.4, + "learning_rate": 3.680510842585574e-05, + "loss": 2.2721, + "step": 5328500 + }, + { + "epoch": 26.4, + "learning_rate": 3.680386983942966e-05, + "loss": 2.273, + "step": 5329000 + }, + { + "epoch": 26.4, + "learning_rate": 3.6802631253003575e-05, + "loss": 2.2778, + "step": 5329500 + }, + { + "epoch": 26.41, + "learning_rate": 3.680139266657749e-05, + "loss": 2.2782, + "step": 5330000 + }, + { + "epoch": 26.41, + "learning_rate": 3.680015408015141e-05, + "loss": 2.2676, + "step": 5330500 + }, + { + "epoch": 26.41, + "learning_rate": 3.679891549372532e-05, + "loss": 2.2632, + "step": 5331000 + }, + { + "epoch": 26.41, + "learning_rate": 3.6797676907299236e-05, + "loss": 2.2902, + "step": 5331500 + }, + { + "epoch": 26.42, + "learning_rate": 3.679643832087315e-05, + "loss": 2.2698, + "step": 5332000 + }, + { + "epoch": 26.42, + "learning_rate": 3.679519973444707e-05, + "loss": 2.2682, + "step": 5332500 + }, + { + "epoch": 26.42, + "learning_rate": 3.6793961148020987e-05, + "loss": 2.2695, + "step": 5333000 + }, + { + "epoch": 26.42, + "learning_rate": 3.6792722561594904e-05, + "loss": 2.2699, + "step": 5333500 + }, + { + "epoch": 26.43, + "learning_rate": 3.679148645234167e-05, + "loss": 2.2628, + "step": 5334000 + }, + { + "epoch": 26.43, + "learning_rate": 3.679025034308844e-05, + "loss": 2.2823, + "step": 5334500 + }, + { + "epoch": 26.43, + "learning_rate": 3.678901175666236e-05, + "loss": 2.2675, + "step": 5335000 + }, + { + "epoch": 26.43, + "learning_rate": 3.6787773170236275e-05, + "loss": 2.2469, + "step": 5335500 + }, + { + "epoch": 26.44, + "learning_rate": 3.678653458381019e-05, + "loss": 2.2607, + "step": 5336000 + }, + { + "epoch": 26.44, + "learning_rate": 3.678529599738411e-05, + "loss": 2.2687, + "step": 5336500 + }, + { + "epoch": 26.44, + "learning_rate": 3.6784057410958026e-05, + "loss": 2.2789, + "step": 5337000 + }, + { + "epoch": 26.44, + "learning_rate": 3.678281882453194e-05, + "loss": 2.2953, + "step": 5337500 + }, + { + "epoch": 26.45, + "learning_rate": 3.678158023810585e-05, + "loss": 2.2404, + "step": 5338000 + }, + { + "epoch": 26.45, + "learning_rate": 3.678034165167977e-05, + "loss": 2.282, + "step": 5338500 + }, + { + "epoch": 26.45, + "learning_rate": 3.677910306525369e-05, + "loss": 2.2767, + "step": 5339000 + }, + { + "epoch": 26.45, + "learning_rate": 3.6777864478827604e-05, + "loss": 2.2794, + "step": 5339500 + }, + { + "epoch": 26.46, + "learning_rate": 3.677662589240152e-05, + "loss": 2.2838, + "step": 5340000 + }, + { + "epoch": 26.46, + "learning_rate": 3.677538730597544e-05, + "loss": 2.2583, + "step": 5340500 + }, + { + "epoch": 26.46, + "learning_rate": 3.6774151196722206e-05, + "loss": 2.2644, + "step": 5341000 + }, + { + "epoch": 26.46, + "learning_rate": 3.677291261029612e-05, + "loss": 2.2895, + "step": 5341500 + }, + { + "epoch": 26.47, + "learning_rate": 3.677167402387004e-05, + "loss": 2.2884, + "step": 5342000 + }, + { + "epoch": 26.47, + "learning_rate": 3.677043543744396e-05, + "loss": 2.2567, + "step": 5342500 + }, + { + "epoch": 26.47, + "learning_rate": 3.6769196851017874e-05, + "loss": 2.2824, + "step": 5343000 + }, + { + "epoch": 26.47, + "learning_rate": 3.676796074176464e-05, + "loss": 2.2754, + "step": 5343500 + }, + { + "epoch": 26.48, + "learning_rate": 3.676672215533856e-05, + "loss": 2.2883, + "step": 5344000 + }, + { + "epoch": 26.48, + "learning_rate": 3.676548604608532e-05, + "loss": 2.2732, + "step": 5344500 + }, + { + "epoch": 26.48, + "learning_rate": 3.676424745965924e-05, + "loss": 2.2879, + "step": 5345000 + }, + { + "epoch": 26.48, + "learning_rate": 3.6763008873233156e-05, + "loss": 2.2564, + "step": 5345500 + }, + { + "epoch": 26.49, + "learning_rate": 3.676177028680707e-05, + "loss": 2.2885, + "step": 5346000 + }, + { + "epoch": 26.49, + "learning_rate": 3.676053170038099e-05, + "loss": 2.2796, + "step": 5346500 + }, + { + "epoch": 26.49, + "learning_rate": 3.6759293113954906e-05, + "loss": 2.2713, + "step": 5347000 + }, + { + "epoch": 26.49, + "learning_rate": 3.6758057004701675e-05, + "loss": 2.2911, + "step": 5347500 + }, + { + "epoch": 26.5, + "learning_rate": 3.6756823372621296e-05, + "loss": 2.2539, + "step": 5348000 + }, + { + "epoch": 26.5, + "learning_rate": 3.675558478619521e-05, + "loss": 2.2742, + "step": 5348500 + }, + { + "epoch": 26.5, + "learning_rate": 3.675434619976913e-05, + "loss": 2.246, + "step": 5349000 + }, + { + "epoch": 26.5, + "learning_rate": 3.675310761334305e-05, + "loss": 2.2775, + "step": 5349500 + }, + { + "epoch": 26.51, + "learning_rate": 3.6751871504089816e-05, + "loss": 2.2543, + "step": 5350000 + }, + { + "epoch": 26.51, + "learning_rate": 3.675063291766373e-05, + "loss": 2.2928, + "step": 5350500 + }, + { + "epoch": 26.51, + "learning_rate": 3.674939433123765e-05, + "loss": 2.2686, + "step": 5351000 + }, + { + "epoch": 26.51, + "learning_rate": 3.6748155744811566e-05, + "loss": 2.2733, + "step": 5351500 + }, + { + "epoch": 26.52, + "learning_rate": 3.674691715838548e-05, + "loss": 2.2779, + "step": 5352000 + }, + { + "epoch": 26.52, + "learning_rate": 3.6745678571959393e-05, + "loss": 2.2912, + "step": 5352500 + }, + { + "epoch": 26.52, + "learning_rate": 3.674444246270616e-05, + "loss": 2.2687, + "step": 5353000 + }, + { + "epoch": 26.52, + "learning_rate": 3.674320387628008e-05, + "loss": 2.2662, + "step": 5353500 + }, + { + "epoch": 26.53, + "learning_rate": 3.674196776702685e-05, + "loss": 2.2396, + "step": 5354000 + }, + { + "epoch": 26.53, + "learning_rate": 3.6740729180600765e-05, + "loss": 2.2742, + "step": 5354500 + }, + { + "epoch": 26.53, + "learning_rate": 3.673949059417468e-05, + "loss": 2.2846, + "step": 5355000 + }, + { + "epoch": 26.53, + "learning_rate": 3.67382520077486e-05, + "loss": 2.2667, + "step": 5355500 + }, + { + "epoch": 26.54, + "learning_rate": 3.6737013421322516e-05, + "loss": 2.2765, + "step": 5356000 + }, + { + "epoch": 26.54, + "learning_rate": 3.673577483489643e-05, + "loss": 2.251, + "step": 5356500 + }, + { + "epoch": 26.54, + "learning_rate": 3.673453624847035e-05, + "loss": 2.2586, + "step": 5357000 + }, + { + "epoch": 26.54, + "learning_rate": 3.6733297662044266e-05, + "loss": 2.2858, + "step": 5357500 + }, + { + "epoch": 26.55, + "learning_rate": 3.673205907561818e-05, + "loss": 2.2807, + "step": 5358000 + }, + { + "epoch": 26.55, + "learning_rate": 3.6730822966364945e-05, + "loss": 2.2795, + "step": 5358500 + }, + { + "epoch": 26.55, + "learning_rate": 3.672958437993886e-05, + "loss": 2.2698, + "step": 5359000 + }, + { + "epoch": 26.55, + "learning_rate": 3.672834579351278e-05, + "loss": 2.2692, + "step": 5359500 + }, + { + "epoch": 26.56, + "learning_rate": 3.6727107207086696e-05, + "loss": 2.2697, + "step": 5360000 + }, + { + "epoch": 26.56, + "learning_rate": 3.672586862066061e-05, + "loss": 2.2799, + "step": 5360500 + }, + { + "epoch": 26.56, + "learning_rate": 3.672463003423453e-05, + "loss": 2.2751, + "step": 5361000 + }, + { + "epoch": 26.56, + "learning_rate": 3.672339144780845e-05, + "loss": 2.2689, + "step": 5361500 + }, + { + "epoch": 26.57, + "learning_rate": 3.6722152861382364e-05, + "loss": 2.2778, + "step": 5362000 + }, + { + "epoch": 26.57, + "learning_rate": 3.672091675212913e-05, + "loss": 2.2961, + "step": 5362500 + }, + { + "epoch": 26.57, + "learning_rate": 3.671967816570305e-05, + "loss": 2.2765, + "step": 5363000 + }, + { + "epoch": 26.57, + "learning_rate": 3.6718439579276967e-05, + "loss": 2.2756, + "step": 5363500 + }, + { + "epoch": 26.58, + "learning_rate": 3.6717200992850884e-05, + "loss": 2.3013, + "step": 5364000 + }, + { + "epoch": 26.58, + "learning_rate": 3.67159624064248e-05, + "loss": 2.2892, + "step": 5364500 + }, + { + "epoch": 26.58, + "learning_rate": 3.671473125151727e-05, + "loss": 2.3011, + "step": 5365000 + }, + { + "epoch": 26.58, + "learning_rate": 3.671349266509119e-05, + "loss": 2.3148, + "step": 5365500 + }, + { + "epoch": 26.59, + "learning_rate": 3.671225407866511e-05, + "loss": 2.2778, + "step": 5366000 + }, + { + "epoch": 26.59, + "learning_rate": 3.671101549223902e-05, + "loss": 2.2993, + "step": 5366500 + }, + { + "epoch": 26.59, + "learning_rate": 3.6709776905812934e-05, + "loss": 2.2779, + "step": 5367000 + }, + { + "epoch": 26.59, + "learning_rate": 3.670853831938685e-05, + "loss": 2.2696, + "step": 5367500 + }, + { + "epoch": 26.59, + "learning_rate": 3.670729973296077e-05, + "loss": 2.2576, + "step": 5368000 + }, + { + "epoch": 26.6, + "learning_rate": 3.6706061146534685e-05, + "loss": 2.2611, + "step": 5368500 + }, + { + "epoch": 26.6, + "learning_rate": 3.6704825037281454e-05, + "loss": 2.2756, + "step": 5369000 + }, + { + "epoch": 26.6, + "learning_rate": 3.670358645085537e-05, + "loss": 2.2703, + "step": 5369500 + }, + { + "epoch": 26.6, + "learning_rate": 3.670234786442929e-05, + "loss": 2.2862, + "step": 5370000 + }, + { + "epoch": 26.61, + "learning_rate": 3.6701109278003204e-05, + "loss": 2.3082, + "step": 5370500 + }, + { + "epoch": 26.61, + "learning_rate": 3.669987069157712e-05, + "loss": 2.2528, + "step": 5371000 + }, + { + "epoch": 26.61, + "learning_rate": 3.669863210515104e-05, + "loss": 2.2901, + "step": 5371500 + }, + { + "epoch": 26.61, + "learning_rate": 3.6697393518724955e-05, + "loss": 2.2658, + "step": 5372000 + }, + { + "epoch": 26.62, + "learning_rate": 3.6696154932298865e-05, + "loss": 2.2874, + "step": 5372500 + }, + { + "epoch": 26.62, + "learning_rate": 3.669491634587278e-05, + "loss": 2.2565, + "step": 5373000 + }, + { + "epoch": 26.62, + "learning_rate": 3.66936777594467e-05, + "loss": 2.2681, + "step": 5373500 + }, + { + "epoch": 26.62, + "learning_rate": 3.669244412736632e-05, + "loss": 2.2731, + "step": 5374000 + }, + { + "epoch": 26.63, + "learning_rate": 3.669120554094024e-05, + "loss": 2.3088, + "step": 5374500 + }, + { + "epoch": 26.63, + "learning_rate": 3.6689966954514154e-05, + "loss": 2.2603, + "step": 5375000 + }, + { + "epoch": 26.63, + "learning_rate": 3.668872836808807e-05, + "loss": 2.2773, + "step": 5375500 + }, + { + "epoch": 26.63, + "learning_rate": 3.668748978166199e-05, + "loss": 2.2609, + "step": 5376000 + }, + { + "epoch": 26.64, + "learning_rate": 3.6686251195235904e-05, + "loss": 2.2708, + "step": 5376500 + }, + { + "epoch": 26.64, + "learning_rate": 3.668501260880982e-05, + "loss": 2.2872, + "step": 5377000 + }, + { + "epoch": 26.64, + "learning_rate": 3.668377402238374e-05, + "loss": 2.2752, + "step": 5377500 + }, + { + "epoch": 26.64, + "learning_rate": 3.6682535435957655e-05, + "loss": 2.2825, + "step": 5378000 + }, + { + "epoch": 26.65, + "learning_rate": 3.6681296849531565e-05, + "loss": 2.2562, + "step": 5378500 + }, + { + "epoch": 26.65, + "learning_rate": 3.668005826310548e-05, + "loss": 2.2711, + "step": 5379000 + }, + { + "epoch": 26.65, + "learning_rate": 3.667882215385226e-05, + "loss": 2.2773, + "step": 5379500 + }, + { + "epoch": 26.65, + "learning_rate": 3.667758356742617e-05, + "loss": 2.2731, + "step": 5380000 + }, + { + "epoch": 26.66, + "learning_rate": 3.6676344981000085e-05, + "loss": 2.253, + "step": 5380500 + }, + { + "epoch": 26.66, + "learning_rate": 3.6675106394574e-05, + "loss": 2.2824, + "step": 5381000 + }, + { + "epoch": 26.66, + "learning_rate": 3.667387028532077e-05, + "loss": 2.2933, + "step": 5381500 + }, + { + "epoch": 26.66, + "learning_rate": 3.667263417606754e-05, + "loss": 2.2788, + "step": 5382000 + }, + { + "epoch": 26.67, + "learning_rate": 3.6671395589641456e-05, + "loss": 2.2608, + "step": 5382500 + }, + { + "epoch": 26.67, + "learning_rate": 3.667015700321537e-05, + "loss": 2.2762, + "step": 5383000 + }, + { + "epoch": 26.67, + "learning_rate": 3.666891841678929e-05, + "loss": 2.2692, + "step": 5383500 + }, + { + "epoch": 26.67, + "learning_rate": 3.666767983036321e-05, + "loss": 2.2757, + "step": 5384000 + }, + { + "epoch": 26.68, + "learning_rate": 3.666644372110997e-05, + "loss": 2.2969, + "step": 5384500 + }, + { + "epoch": 26.68, + "learning_rate": 3.6665205134683886e-05, + "loss": 2.2756, + "step": 5385000 + }, + { + "epoch": 26.68, + "learning_rate": 3.66639665482578e-05, + "loss": 2.3052, + "step": 5385500 + }, + { + "epoch": 26.68, + "learning_rate": 3.666272796183172e-05, + "loss": 2.2807, + "step": 5386000 + }, + { + "epoch": 26.69, + "learning_rate": 3.666148937540564e-05, + "loss": 2.2769, + "step": 5386500 + }, + { + "epoch": 26.69, + "learning_rate": 3.6660250788979554e-05, + "loss": 2.2956, + "step": 5387000 + }, + { + "epoch": 26.69, + "learning_rate": 3.665901220255347e-05, + "loss": 2.2773, + "step": 5387500 + }, + { + "epoch": 26.69, + "learning_rate": 3.665777361612739e-05, + "loss": 2.2996, + "step": 5388000 + }, + { + "epoch": 26.7, + "learning_rate": 3.6656535029701305e-05, + "loss": 2.2919, + "step": 5388500 + }, + { + "epoch": 26.7, + "learning_rate": 3.665529644327522e-05, + "loss": 2.2895, + "step": 5389000 + }, + { + "epoch": 26.7, + "learning_rate": 3.665406033402199e-05, + "loss": 2.2703, + "step": 5389500 + }, + { + "epoch": 26.7, + "learning_rate": 3.665282174759591e-05, + "loss": 2.2458, + "step": 5390000 + }, + { + "epoch": 26.71, + "learning_rate": 3.6651583161169824e-05, + "loss": 2.2799, + "step": 5390500 + }, + { + "epoch": 26.71, + "learning_rate": 3.665034457474374e-05, + "loss": 2.272, + "step": 5391000 + }, + { + "epoch": 26.71, + "learning_rate": 3.664910598831766e-05, + "loss": 2.2755, + "step": 5391500 + }, + { + "epoch": 26.71, + "learning_rate": 3.6647867401891575e-05, + "loss": 2.284, + "step": 5392000 + }, + { + "epoch": 26.72, + "learning_rate": 3.664662881546549e-05, + "loss": 2.2968, + "step": 5392500 + }, + { + "epoch": 26.72, + "learning_rate": 3.664539022903941e-05, + "loss": 2.2897, + "step": 5393000 + }, + { + "epoch": 26.72, + "learning_rate": 3.664415164261332e-05, + "loss": 2.2828, + "step": 5393500 + }, + { + "epoch": 26.72, + "learning_rate": 3.6642913056187236e-05, + "loss": 2.2667, + "step": 5394000 + }, + { + "epoch": 26.73, + "learning_rate": 3.6641676946934005e-05, + "loss": 2.2811, + "step": 5394500 + }, + { + "epoch": 26.73, + "learning_rate": 3.6640440837680774e-05, + "loss": 2.2899, + "step": 5395000 + }, + { + "epoch": 26.73, + "learning_rate": 3.663920225125469e-05, + "loss": 2.2946, + "step": 5395500 + }, + { + "epoch": 26.73, + "learning_rate": 3.663796366482861e-05, + "loss": 2.2743, + "step": 5396000 + }, + { + "epoch": 26.74, + "learning_rate": 3.6636725078402524e-05, + "loss": 2.2719, + "step": 5396500 + }, + { + "epoch": 26.74, + "learning_rate": 3.663548649197644e-05, + "loss": 2.2736, + "step": 5397000 + }, + { + "epoch": 26.74, + "learning_rate": 3.663424790555036e-05, + "loss": 2.2547, + "step": 5397500 + }, + { + "epoch": 26.74, + "learning_rate": 3.6633009319124275e-05, + "loss": 2.2961, + "step": 5398000 + }, + { + "epoch": 26.75, + "learning_rate": 3.663177073269819e-05, + "loss": 2.2816, + "step": 5398500 + }, + { + "epoch": 26.75, + "learning_rate": 3.663053214627211e-05, + "loss": 2.2523, + "step": 5399000 + }, + { + "epoch": 26.75, + "learning_rate": 3.662929603701887e-05, + "loss": 2.2643, + "step": 5399500 + }, + { + "epoch": 26.75, + "learning_rate": 3.662805745059279e-05, + "loss": 2.2791, + "step": 5400000 + }, + { + "epoch": 26.76, + "learning_rate": 3.6626818864166705e-05, + "loss": 2.2853, + "step": 5400500 + }, + { + "epoch": 26.76, + "learning_rate": 3.662558027774062e-05, + "loss": 2.282, + "step": 5401000 + }, + { + "epoch": 26.76, + "learning_rate": 3.662434169131454e-05, + "loss": 2.2665, + "step": 5401500 + }, + { + "epoch": 26.76, + "learning_rate": 3.662310558206131e-05, + "loss": 2.2855, + "step": 5402000 + }, + { + "epoch": 26.77, + "learning_rate": 3.6621866995635225e-05, + "loss": 2.2654, + "step": 5402500 + }, + { + "epoch": 26.77, + "learning_rate": 3.662062840920914e-05, + "loss": 2.2647, + "step": 5403000 + }, + { + "epoch": 26.77, + "learning_rate": 3.661938982278306e-05, + "loss": 2.2608, + "step": 5403500 + }, + { + "epoch": 26.77, + "learning_rate": 3.661815371352982e-05, + "loss": 2.2639, + "step": 5404000 + }, + { + "epoch": 26.78, + "learning_rate": 3.661691512710374e-05, + "loss": 2.274, + "step": 5404500 + }, + { + "epoch": 26.78, + "learning_rate": 3.6615676540677654e-05, + "loss": 2.2579, + "step": 5405000 + }, + { + "epoch": 26.78, + "learning_rate": 3.661443795425157e-05, + "loss": 2.2698, + "step": 5405500 + }, + { + "epoch": 26.78, + "learning_rate": 3.661319936782549e-05, + "loss": 2.2665, + "step": 5406000 + }, + { + "epoch": 26.79, + "learning_rate": 3.6611960781399405e-05, + "loss": 2.2833, + "step": 5406500 + }, + { + "epoch": 26.79, + "learning_rate": 3.6610724672146174e-05, + "loss": 2.2807, + "step": 5407000 + }, + { + "epoch": 26.79, + "learning_rate": 3.660948608572009e-05, + "loss": 2.2814, + "step": 5407500 + }, + { + "epoch": 26.79, + "learning_rate": 3.660824749929401e-05, + "loss": 2.2917, + "step": 5408000 + }, + { + "epoch": 26.8, + "learning_rate": 3.6607008912867925e-05, + "loss": 2.2824, + "step": 5408500 + }, + { + "epoch": 26.8, + "learning_rate": 3.660577032644184e-05, + "loss": 2.2577, + "step": 5409000 + }, + { + "epoch": 26.8, + "learning_rate": 3.660453174001576e-05, + "loss": 2.2902, + "step": 5409500 + }, + { + "epoch": 26.8, + "learning_rate": 3.6603293153589675e-05, + "loss": 2.2665, + "step": 5410000 + }, + { + "epoch": 26.81, + "learning_rate": 3.660205456716359e-05, + "loss": 2.2558, + "step": 5410500 + }, + { + "epoch": 26.81, + "learning_rate": 3.660081598073751e-05, + "loss": 2.291, + "step": 5411000 + }, + { + "epoch": 26.81, + "learning_rate": 3.6599577394311426e-05, + "loss": 2.2716, + "step": 5411500 + }, + { + "epoch": 26.81, + "learning_rate": 3.659833880788534e-05, + "loss": 2.286, + "step": 5412000 + }, + { + "epoch": 26.82, + "learning_rate": 3.6597102698632105e-05, + "loss": 2.2877, + "step": 5412500 + }, + { + "epoch": 26.82, + "learning_rate": 3.659586411220602e-05, + "loss": 2.272, + "step": 5413000 + }, + { + "epoch": 26.82, + "learning_rate": 3.659462552577994e-05, + "loss": 2.2849, + "step": 5413500 + }, + { + "epoch": 26.82, + "learning_rate": 3.6593386939353856e-05, + "loss": 2.2769, + "step": 5414000 + }, + { + "epoch": 26.83, + "learning_rate": 3.659214835292777e-05, + "loss": 2.2784, + "step": 5414500 + }, + { + "epoch": 26.83, + "learning_rate": 3.659090976650169e-05, + "loss": 2.2649, + "step": 5415000 + }, + { + "epoch": 26.83, + "learning_rate": 3.658967365724846e-05, + "loss": 2.2982, + "step": 5415500 + }, + { + "epoch": 26.83, + "learning_rate": 3.658843754799523e-05, + "loss": 2.2614, + "step": 5416000 + }, + { + "epoch": 26.84, + "learning_rate": 3.6587198961569144e-05, + "loss": 2.3042, + "step": 5416500 + }, + { + "epoch": 26.84, + "learning_rate": 3.658596037514306e-05, + "loss": 2.3011, + "step": 5417000 + }, + { + "epoch": 26.84, + "learning_rate": 3.658472178871697e-05, + "loss": 2.2909, + "step": 5417500 + }, + { + "epoch": 26.84, + "learning_rate": 3.658348320229089e-05, + "loss": 2.2738, + "step": 5418000 + }, + { + "epoch": 26.85, + "learning_rate": 3.6582244615864805e-05, + "loss": 2.2702, + "step": 5418500 + }, + { + "epoch": 26.85, + "learning_rate": 3.658100602943872e-05, + "loss": 2.2858, + "step": 5419000 + }, + { + "epoch": 26.85, + "learning_rate": 3.657976744301264e-05, + "loss": 2.2621, + "step": 5419500 + }, + { + "epoch": 26.85, + "learning_rate": 3.6578528856586556e-05, + "loss": 2.2872, + "step": 5420000 + }, + { + "epoch": 26.86, + "learning_rate": 3.657729027016047e-05, + "loss": 2.2968, + "step": 5420500 + }, + { + "epoch": 26.86, + "learning_rate": 3.657605416090724e-05, + "loss": 2.2733, + "step": 5421000 + }, + { + "epoch": 26.86, + "learning_rate": 3.657481557448116e-05, + "loss": 2.2552, + "step": 5421500 + }, + { + "epoch": 26.86, + "learning_rate": 3.6573576988055076e-05, + "loss": 2.2804, + "step": 5422000 + }, + { + "epoch": 26.86, + "learning_rate": 3.657233840162899e-05, + "loss": 2.2929, + "step": 5422500 + }, + { + "epoch": 26.87, + "learning_rate": 3.657110229237576e-05, + "loss": 2.2556, + "step": 5423000 + }, + { + "epoch": 26.87, + "learning_rate": 3.656986370594968e-05, + "loss": 2.2761, + "step": 5423500 + }, + { + "epoch": 26.87, + "learning_rate": 3.656862511952359e-05, + "loss": 2.289, + "step": 5424000 + }, + { + "epoch": 26.87, + "learning_rate": 3.6567386533097505e-05, + "loss": 2.2732, + "step": 5424500 + }, + { + "epoch": 26.88, + "learning_rate": 3.6566150423844274e-05, + "loss": 2.3058, + "step": 5425000 + }, + { + "epoch": 26.88, + "learning_rate": 3.656491183741819e-05, + "loss": 2.2575, + "step": 5425500 + }, + { + "epoch": 26.88, + "learning_rate": 3.656367325099211e-05, + "loss": 2.2856, + "step": 5426000 + }, + { + "epoch": 26.88, + "learning_rate": 3.6562434664566025e-05, + "loss": 2.2945, + "step": 5426500 + }, + { + "epoch": 26.89, + "learning_rate": 3.656119607813994e-05, + "loss": 2.2855, + "step": 5427000 + }, + { + "epoch": 26.89, + "learning_rate": 3.655995749171386e-05, + "loss": 2.2709, + "step": 5427500 + }, + { + "epoch": 26.89, + "learning_rate": 3.655872138246063e-05, + "loss": 2.2528, + "step": 5428000 + }, + { + "epoch": 26.89, + "learning_rate": 3.6557485273207397e-05, + "loss": 2.2848, + "step": 5428500 + }, + { + "epoch": 26.9, + "learning_rate": 3.6556246686781313e-05, + "loss": 2.2729, + "step": 5429000 + }, + { + "epoch": 26.9, + "learning_rate": 3.6555008100355224e-05, + "loss": 2.3043, + "step": 5429500 + }, + { + "epoch": 26.9, + "learning_rate": 3.655376951392914e-05, + "loss": 2.2512, + "step": 5430000 + }, + { + "epoch": 26.9, + "learning_rate": 3.655253092750306e-05, + "loss": 2.2953, + "step": 5430500 + }, + { + "epoch": 26.91, + "learning_rate": 3.6551292341076974e-05, + "loss": 2.2616, + "step": 5431000 + }, + { + "epoch": 26.91, + "learning_rate": 3.655005375465089e-05, + "loss": 2.2824, + "step": 5431500 + }, + { + "epoch": 26.91, + "learning_rate": 3.654881516822481e-05, + "loss": 2.2861, + "step": 5432000 + }, + { + "epoch": 26.91, + "learning_rate": 3.6547576581798725e-05, + "loss": 2.2852, + "step": 5432500 + }, + { + "epoch": 26.92, + "learning_rate": 3.654633799537264e-05, + "loss": 2.2819, + "step": 5433000 + }, + { + "epoch": 26.92, + "learning_rate": 3.654509940894656e-05, + "loss": 2.2683, + "step": 5433500 + }, + { + "epoch": 26.92, + "learning_rate": 3.6543860822520476e-05, + "loss": 2.2694, + "step": 5434000 + }, + { + "epoch": 26.92, + "learning_rate": 3.654262223609439e-05, + "loss": 2.2698, + "step": 5434500 + }, + { + "epoch": 26.93, + "learning_rate": 3.654138364966831e-05, + "loss": 2.2743, + "step": 5435000 + }, + { + "epoch": 26.93, + "learning_rate": 3.654014506324223e-05, + "loss": 2.2672, + "step": 5435500 + }, + { + "epoch": 26.93, + "learning_rate": 3.6538906476816144e-05, + "loss": 2.2739, + "step": 5436000 + }, + { + "epoch": 26.93, + "learning_rate": 3.653767036756291e-05, + "loss": 2.2586, + "step": 5436500 + }, + { + "epoch": 26.94, + "learning_rate": 3.653643178113683e-05, + "loss": 2.29, + "step": 5437000 + }, + { + "epoch": 26.94, + "learning_rate": 3.653519319471074e-05, + "loss": 2.2828, + "step": 5437500 + }, + { + "epoch": 26.94, + "learning_rate": 3.6533954608284656e-05, + "loss": 2.2699, + "step": 5438000 + }, + { + "epoch": 26.94, + "learning_rate": 3.6532716021858573e-05, + "loss": 2.2833, + "step": 5438500 + }, + { + "epoch": 26.95, + "learning_rate": 3.653147743543249e-05, + "loss": 2.2968, + "step": 5439000 + }, + { + "epoch": 26.95, + "learning_rate": 3.653023884900641e-05, + "loss": 2.2807, + "step": 5439500 + }, + { + "epoch": 26.95, + "learning_rate": 3.6529000262580324e-05, + "loss": 2.2711, + "step": 5440000 + }, + { + "epoch": 26.95, + "learning_rate": 3.652776167615424e-05, + "loss": 2.2559, + "step": 5440500 + }, + { + "epoch": 26.96, + "learning_rate": 3.652652308972815e-05, + "loss": 2.2752, + "step": 5441000 + }, + { + "epoch": 26.96, + "learning_rate": 3.652528450330207e-05, + "loss": 2.2879, + "step": 5441500 + }, + { + "epoch": 26.96, + "learning_rate": 3.6524050871221696e-05, + "loss": 2.2852, + "step": 5442000 + }, + { + "epoch": 26.96, + "learning_rate": 3.652281228479561e-05, + "loss": 2.2834, + "step": 5442500 + }, + { + "epoch": 26.97, + "learning_rate": 3.652157369836953e-05, + "loss": 2.2683, + "step": 5443000 + }, + { + "epoch": 26.97, + "learning_rate": 3.6520335111943446e-05, + "loss": 2.2996, + "step": 5443500 + }, + { + "epoch": 26.97, + "learning_rate": 3.651909652551736e-05, + "loss": 2.2578, + "step": 5444000 + }, + { + "epoch": 26.97, + "learning_rate": 3.6517857939091274e-05, + "loss": 2.2798, + "step": 5444500 + }, + { + "epoch": 26.98, + "learning_rate": 3.651661935266519e-05, + "loss": 2.287, + "step": 5445000 + }, + { + "epoch": 26.98, + "learning_rate": 3.651538076623911e-05, + "loss": 2.2635, + "step": 5445500 + }, + { + "epoch": 26.98, + "learning_rate": 3.6514144656985876e-05, + "loss": 2.2767, + "step": 5446000 + }, + { + "epoch": 26.98, + "learning_rate": 3.651290607055979e-05, + "loss": 2.2714, + "step": 5446500 + }, + { + "epoch": 26.99, + "learning_rate": 3.651166748413371e-05, + "loss": 2.2901, + "step": 5447000 + }, + { + "epoch": 26.99, + "learning_rate": 3.651042889770763e-05, + "loss": 2.2857, + "step": 5447500 + }, + { + "epoch": 26.99, + "learning_rate": 3.6509190311281544e-05, + "loss": 2.2839, + "step": 5448000 + }, + { + "epoch": 26.99, + "learning_rate": 3.650795172485546e-05, + "loss": 2.2809, + "step": 5448500 + }, + { + "epoch": 27.0, + "learning_rate": 3.650671313842938e-05, + "loss": 2.2747, + "step": 5449000 + }, + { + "epoch": 27.0, + "learning_rate": 3.6505474552003295e-05, + "loss": 2.291, + "step": 5449500 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.6562619972139041, + "eval_accuracy_mlm": 0.6113849397655421, + "eval_accuracy_nsp": 0.8681356610278516, + "eval_loss": 2.3292908668518066, + "eval_runtime": 146.2467, + "eval_samples_per_second": 1743.349, + "eval_steps_per_second": 72.644, + "step": 5449761 + }, + { + "epoch": 27.0, + "learning_rate": 3.650423596557721e-05, + "loss": 2.2555, + "step": 5450000 + }, + { + "epoch": 27.0, + "learning_rate": 3.650299985632398e-05, + "loss": 2.257, + "step": 5450500 + }, + { + "epoch": 27.01, + "learning_rate": 3.6501766224243594e-05, + "loss": 2.2329, + "step": 5451000 + }, + { + "epoch": 27.01, + "learning_rate": 3.650052763781751e-05, + "loss": 2.2794, + "step": 5451500 + }, + { + "epoch": 27.01, + "learning_rate": 3.649928905139143e-05, + "loss": 2.2362, + "step": 5452000 + }, + { + "epoch": 27.01, + "learning_rate": 3.6498050464965345e-05, + "loss": 2.2553, + "step": 5452500 + }, + { + "epoch": 27.02, + "learning_rate": 3.649681187853926e-05, + "loss": 2.2592, + "step": 5453000 + }, + { + "epoch": 27.02, + "learning_rate": 3.649557329211318e-05, + "loss": 2.2158, + "step": 5453500 + }, + { + "epoch": 27.02, + "learning_rate": 3.6494334705687096e-05, + "loss": 2.2312, + "step": 5454000 + }, + { + "epoch": 27.02, + "learning_rate": 3.649309611926101e-05, + "loss": 2.2288, + "step": 5454500 + }, + { + "epoch": 27.03, + "learning_rate": 3.649185753283493e-05, + "loss": 2.2151, + "step": 5455000 + }, + { + "epoch": 27.03, + "learning_rate": 3.649061894640885e-05, + "loss": 2.2561, + "step": 5455500 + }, + { + "epoch": 27.03, + "learning_rate": 3.6489380359982764e-05, + "loss": 2.2237, + "step": 5456000 + }, + { + "epoch": 27.03, + "learning_rate": 3.648814177355668e-05, + "loss": 2.2344, + "step": 5456500 + }, + { + "epoch": 27.04, + "learning_rate": 3.64869031871306e-05, + "loss": 2.2628, + "step": 5457000 + }, + { + "epoch": 27.04, + "learning_rate": 3.648566707787736e-05, + "loss": 2.2354, + "step": 5457500 + }, + { + "epoch": 27.04, + "learning_rate": 3.6484428491451276e-05, + "loss": 2.2272, + "step": 5458000 + }, + { + "epoch": 27.04, + "learning_rate": 3.648318990502519e-05, + "loss": 2.2534, + "step": 5458500 + }, + { + "epoch": 27.05, + "learning_rate": 3.648195131859911e-05, + "loss": 2.2329, + "step": 5459000 + }, + { + "epoch": 27.05, + "learning_rate": 3.648071273217303e-05, + "loss": 2.2415, + "step": 5459500 + }, + { + "epoch": 27.05, + "learning_rate": 3.6479474145746944e-05, + "loss": 2.2537, + "step": 5460000 + }, + { + "epoch": 27.05, + "learning_rate": 3.647823803649371e-05, + "loss": 2.2636, + "step": 5460500 + }, + { + "epoch": 27.06, + "learning_rate": 3.647699945006763e-05, + "loss": 2.2439, + "step": 5461000 + }, + { + "epoch": 27.06, + "learning_rate": 3.647576086364155e-05, + "loss": 2.264, + "step": 5461500 + }, + { + "epoch": 27.06, + "learning_rate": 3.6474522277215464e-05, + "loss": 2.2518, + "step": 5462000 + }, + { + "epoch": 27.06, + "learning_rate": 3.6473286167962226e-05, + "loss": 2.2465, + "step": 5462500 + }, + { + "epoch": 27.07, + "learning_rate": 3.647204758153614e-05, + "loss": 2.246, + "step": 5463000 + }, + { + "epoch": 27.07, + "learning_rate": 3.647080899511006e-05, + "loss": 2.2395, + "step": 5463500 + }, + { + "epoch": 27.07, + "learning_rate": 3.6469570408683977e-05, + "loss": 2.2537, + "step": 5464000 + }, + { + "epoch": 27.07, + "learning_rate": 3.6468331822257893e-05, + "loss": 2.2474, + "step": 5464500 + }, + { + "epoch": 27.08, + "learning_rate": 3.646709323583181e-05, + "loss": 2.2647, + "step": 5465000 + }, + { + "epoch": 27.08, + "learning_rate": 3.646585712657858e-05, + "loss": 2.2496, + "step": 5465500 + }, + { + "epoch": 27.08, + "learning_rate": 3.646462101732535e-05, + "loss": 2.2495, + "step": 5466000 + }, + { + "epoch": 27.08, + "learning_rate": 3.6463382430899265e-05, + "loss": 2.2468, + "step": 5466500 + }, + { + "epoch": 27.09, + "learning_rate": 3.646214384447318e-05, + "loss": 2.2772, + "step": 5467000 + }, + { + "epoch": 27.09, + "learning_rate": 3.64609052580471e-05, + "loss": 2.2372, + "step": 5467500 + }, + { + "epoch": 27.09, + "learning_rate": 3.645966914879387e-05, + "loss": 2.2775, + "step": 5468000 + }, + { + "epoch": 27.09, + "learning_rate": 3.6458430562367785e-05, + "loss": 2.2419, + "step": 5468500 + }, + { + "epoch": 27.1, + "learning_rate": 3.64571919759417e-05, + "loss": 2.2398, + "step": 5469000 + }, + { + "epoch": 27.1, + "learning_rate": 3.645595586668847e-05, + "loss": 2.2505, + "step": 5469500 + }, + { + "epoch": 27.1, + "learning_rate": 3.645471728026239e-05, + "loss": 2.259, + "step": 5470000 + }, + { + "epoch": 27.1, + "learning_rate": 3.6453478693836304e-05, + "loss": 2.2549, + "step": 5470500 + }, + { + "epoch": 27.11, + "learning_rate": 3.645224010741022e-05, + "loss": 2.256, + "step": 5471000 + }, + { + "epoch": 27.11, + "learning_rate": 3.645100152098414e-05, + "loss": 2.2535, + "step": 5471500 + }, + { + "epoch": 27.11, + "learning_rate": 3.6449762934558055e-05, + "loss": 2.2698, + "step": 5472000 + }, + { + "epoch": 27.11, + "learning_rate": 3.6448524348131965e-05, + "loss": 2.2286, + "step": 5472500 + }, + { + "epoch": 27.12, + "learning_rate": 3.644728576170588e-05, + "loss": 2.2582, + "step": 5473000 + }, + { + "epoch": 27.12, + "learning_rate": 3.64460471752798e-05, + "loss": 2.2486, + "step": 5473500 + }, + { + "epoch": 27.12, + "learning_rate": 3.644481106602657e-05, + "loss": 2.261, + "step": 5474000 + }, + { + "epoch": 27.12, + "learning_rate": 3.6443572479600485e-05, + "loss": 2.2363, + "step": 5474500 + }, + { + "epoch": 27.13, + "learning_rate": 3.64423338931744e-05, + "loss": 2.2753, + "step": 5475000 + }, + { + "epoch": 27.13, + "learning_rate": 3.644109530674832e-05, + "loss": 2.2233, + "step": 5475500 + }, + { + "epoch": 27.13, + "learning_rate": 3.643985672032223e-05, + "loss": 2.2671, + "step": 5476000 + }, + { + "epoch": 27.13, + "learning_rate": 3.6438618133896146e-05, + "loss": 2.2589, + "step": 5476500 + }, + { + "epoch": 27.13, + "learning_rate": 3.643737954747006e-05, + "loss": 2.2706, + "step": 5477000 + }, + { + "epoch": 27.14, + "learning_rate": 3.643614096104398e-05, + "loss": 2.247, + "step": 5477500 + }, + { + "epoch": 27.14, + "learning_rate": 3.6434902374617896e-05, + "loss": 2.2405, + "step": 5478000 + }, + { + "epoch": 27.14, + "learning_rate": 3.643366378819181e-05, + "loss": 2.2682, + "step": 5478500 + }, + { + "epoch": 27.14, + "learning_rate": 3.643242520176573e-05, + "loss": 2.2618, + "step": 5479000 + }, + { + "epoch": 27.15, + "learning_rate": 3.643118661533965e-05, + "loss": 2.2553, + "step": 5479500 + }, + { + "epoch": 27.15, + "learning_rate": 3.6429948028913564e-05, + "loss": 2.2585, + "step": 5480000 + }, + { + "epoch": 27.15, + "learning_rate": 3.642870944248748e-05, + "loss": 2.2591, + "step": 5480500 + }, + { + "epoch": 27.15, + "learning_rate": 3.642747333323425e-05, + "loss": 2.2141, + "step": 5481000 + }, + { + "epoch": 27.16, + "learning_rate": 3.642623474680816e-05, + "loss": 2.2618, + "step": 5481500 + }, + { + "epoch": 27.16, + "learning_rate": 3.642499616038208e-05, + "loss": 2.2731, + "step": 5482000 + }, + { + "epoch": 27.16, + "learning_rate": 3.6423757573955994e-05, + "loss": 2.2593, + "step": 5482500 + }, + { + "epoch": 27.16, + "learning_rate": 3.642252146470276e-05, + "loss": 2.2595, + "step": 5483000 + }, + { + "epoch": 27.17, + "learning_rate": 3.642128287827668e-05, + "loss": 2.2376, + "step": 5483500 + }, + { + "epoch": 27.17, + "learning_rate": 3.6420044291850597e-05, + "loss": 2.2437, + "step": 5484000 + }, + { + "epoch": 27.17, + "learning_rate": 3.6418805705424513e-05, + "loss": 2.2427, + "step": 5484500 + }, + { + "epoch": 27.17, + "learning_rate": 3.641756711899843e-05, + "loss": 2.2614, + "step": 5485000 + }, + { + "epoch": 27.18, + "learning_rate": 3.641633348691805e-05, + "loss": 2.2662, + "step": 5485500 + }, + { + "epoch": 27.18, + "learning_rate": 3.641509490049197e-05, + "loss": 2.232, + "step": 5486000 + }, + { + "epoch": 27.18, + "learning_rate": 3.6413856314065885e-05, + "loss": 2.2682, + "step": 5486500 + }, + { + "epoch": 27.18, + "learning_rate": 3.64126177276398e-05, + "loss": 2.2515, + "step": 5487000 + }, + { + "epoch": 27.19, + "learning_rate": 3.641138161838657e-05, + "loss": 2.2346, + "step": 5487500 + }, + { + "epoch": 27.19, + "learning_rate": 3.641014303196049e-05, + "loss": 2.2527, + "step": 5488000 + }, + { + "epoch": 27.19, + "learning_rate": 3.6408904445534405e-05, + "loss": 2.2485, + "step": 5488500 + }, + { + "epoch": 27.19, + "learning_rate": 3.640766585910832e-05, + "loss": 2.2585, + "step": 5489000 + }, + { + "epoch": 27.2, + "learning_rate": 3.640642727268224e-05, + "loss": 2.2364, + "step": 5489500 + }, + { + "epoch": 27.2, + "learning_rate": 3.6405188686256155e-05, + "loss": 2.257, + "step": 5490000 + }, + { + "epoch": 27.2, + "learning_rate": 3.640395009983007e-05, + "loss": 2.2554, + "step": 5490500 + }, + { + "epoch": 27.2, + "learning_rate": 3.640271151340399e-05, + "loss": 2.2422, + "step": 5491000 + }, + { + "epoch": 27.21, + "learning_rate": 3.6401472926977906e-05, + "loss": 2.2549, + "step": 5491500 + }, + { + "epoch": 27.21, + "learning_rate": 3.640023681772467e-05, + "loss": 2.2699, + "step": 5492000 + }, + { + "epoch": 27.21, + "learning_rate": 3.6398998231298585e-05, + "loss": 2.266, + "step": 5492500 + }, + { + "epoch": 27.21, + "learning_rate": 3.63977596448725e-05, + "loss": 2.2627, + "step": 5493000 + }, + { + "epoch": 27.22, + "learning_rate": 3.639652105844642e-05, + "loss": 2.2377, + "step": 5493500 + }, + { + "epoch": 27.22, + "learning_rate": 3.6395282472020336e-05, + "loss": 2.2849, + "step": 5494000 + }, + { + "epoch": 27.22, + "learning_rate": 3.6394046362767105e-05, + "loss": 2.2537, + "step": 5494500 + }, + { + "epoch": 27.22, + "learning_rate": 3.6392810253513873e-05, + "loss": 2.2768, + "step": 5495000 + }, + { + "epoch": 27.23, + "learning_rate": 3.639157166708779e-05, + "loss": 2.2535, + "step": 5495500 + }, + { + "epoch": 27.23, + "learning_rate": 3.63903330806617e-05, + "loss": 2.2404, + "step": 5496000 + }, + { + "epoch": 27.23, + "learning_rate": 3.638909697140847e-05, + "loss": 2.2792, + "step": 5496500 + }, + { + "epoch": 27.23, + "learning_rate": 3.6387858384982386e-05, + "loss": 2.2611, + "step": 5497000 + }, + { + "epoch": 27.24, + "learning_rate": 3.63866197985563e-05, + "loss": 2.2887, + "step": 5497500 + }, + { + "epoch": 27.24, + "learning_rate": 3.638538121213022e-05, + "loss": 2.2696, + "step": 5498000 + }, + { + "epoch": 27.24, + "learning_rate": 3.638414262570414e-05, + "loss": 2.2436, + "step": 5498500 + }, + { + "epoch": 27.24, + "learning_rate": 3.6382904039278054e-05, + "loss": 2.2483, + "step": 5499000 + }, + { + "epoch": 27.25, + "learning_rate": 3.638166545285197e-05, + "loss": 2.2444, + "step": 5499500 + }, + { + "epoch": 27.25, + "learning_rate": 3.638042686642589e-05, + "loss": 2.2416, + "step": 5500000 + }, + { + "epoch": 27.25, + "learning_rate": 3.637919075717266e-05, + "loss": 2.2547, + "step": 5500500 + }, + { + "epoch": 27.25, + "learning_rate": 3.6377952170746574e-05, + "loss": 2.2881, + "step": 5501000 + }, + { + "epoch": 27.26, + "learning_rate": 3.637671358432049e-05, + "loss": 2.2535, + "step": 5501500 + }, + { + "epoch": 27.26, + "learning_rate": 3.637547747506725e-05, + "loss": 2.2535, + "step": 5502000 + }, + { + "epoch": 27.26, + "learning_rate": 3.637423888864117e-05, + "loss": 2.2756, + "step": 5502500 + }, + { + "epoch": 27.26, + "learning_rate": 3.6373000302215086e-05, + "loss": 2.2454, + "step": 5503000 + }, + { + "epoch": 27.27, + "learning_rate": 3.6371761715789e-05, + "loss": 2.2552, + "step": 5503500 + }, + { + "epoch": 27.27, + "learning_rate": 3.637052312936292e-05, + "loss": 2.2726, + "step": 5504000 + }, + { + "epoch": 27.27, + "learning_rate": 3.636928454293684e-05, + "loss": 2.2531, + "step": 5504500 + }, + { + "epoch": 27.27, + "learning_rate": 3.6368045956510754e-05, + "loss": 2.2683, + "step": 5505000 + }, + { + "epoch": 27.28, + "learning_rate": 3.636680737008467e-05, + "loss": 2.2521, + "step": 5505500 + }, + { + "epoch": 27.28, + "learning_rate": 3.636556878365859e-05, + "loss": 2.25, + "step": 5506000 + }, + { + "epoch": 27.28, + "learning_rate": 3.6364330197232505e-05, + "loss": 2.2742, + "step": 5506500 + }, + { + "epoch": 27.28, + "learning_rate": 3.636309161080642e-05, + "loss": 2.261, + "step": 5507000 + }, + { + "epoch": 27.29, + "learning_rate": 3.636185302438034e-05, + "loss": 2.2644, + "step": 5507500 + }, + { + "epoch": 27.29, + "learning_rate": 3.636061691512711e-05, + "loss": 2.2741, + "step": 5508000 + }, + { + "epoch": 27.29, + "learning_rate": 3.6359378328701024e-05, + "loss": 2.2615, + "step": 5508500 + }, + { + "epoch": 27.29, + "learning_rate": 3.6358142219447787e-05, + "loss": 2.235, + "step": 5509000 + }, + { + "epoch": 27.3, + "learning_rate": 3.6356903633021703e-05, + "loss": 2.2733, + "step": 5509500 + }, + { + "epoch": 27.3, + "learning_rate": 3.635566504659562e-05, + "loss": 2.254, + "step": 5510000 + }, + { + "epoch": 27.3, + "learning_rate": 3.635442646016954e-05, + "loss": 2.2436, + "step": 5510500 + }, + { + "epoch": 27.3, + "learning_rate": 3.6353187873743454e-05, + "loss": 2.2736, + "step": 5511000 + }, + { + "epoch": 27.31, + "learning_rate": 3.635194928731737e-05, + "loss": 2.2374, + "step": 5511500 + }, + { + "epoch": 27.31, + "learning_rate": 3.635071070089129e-05, + "loss": 2.2574, + "step": 5512000 + }, + { + "epoch": 27.31, + "learning_rate": 3.6349472114465205e-05, + "loss": 2.253, + "step": 5512500 + }, + { + "epoch": 27.31, + "learning_rate": 3.634823352803912e-05, + "loss": 2.2539, + "step": 5513000 + }, + { + "epoch": 27.32, + "learning_rate": 3.634699494161304e-05, + "loss": 2.2727, + "step": 5513500 + }, + { + "epoch": 27.32, + "learning_rate": 3.6345756355186956e-05, + "loss": 2.2654, + "step": 5514000 + }, + { + "epoch": 27.32, + "learning_rate": 3.634451776876087e-05, + "loss": 2.2546, + "step": 5514500 + }, + { + "epoch": 27.32, + "learning_rate": 3.634327918233479e-05, + "loss": 2.2389, + "step": 5515000 + }, + { + "epoch": 27.33, + "learning_rate": 3.6342040595908707e-05, + "loss": 2.2643, + "step": 5515500 + }, + { + "epoch": 27.33, + "learning_rate": 3.6340802009482624e-05, + "loss": 2.2547, + "step": 5516000 + }, + { + "epoch": 27.33, + "learning_rate": 3.633956342305654e-05, + "loss": 2.2357, + "step": 5516500 + }, + { + "epoch": 27.33, + "learning_rate": 3.633832483663045e-05, + "loss": 2.2576, + "step": 5517000 + }, + { + "epoch": 27.34, + "learning_rate": 3.633708625020437e-05, + "loss": 2.2713, + "step": 5517500 + }, + { + "epoch": 27.34, + "learning_rate": 3.633585261812399e-05, + "loss": 2.2306, + "step": 5518000 + }, + { + "epoch": 27.34, + "learning_rate": 3.6334614031697905e-05, + "loss": 2.2457, + "step": 5518500 + }, + { + "epoch": 27.34, + "learning_rate": 3.633337544527182e-05, + "loss": 2.2603, + "step": 5519000 + }, + { + "epoch": 27.35, + "learning_rate": 3.633213685884574e-05, + "loss": 2.2669, + "step": 5519500 + }, + { + "epoch": 27.35, + "learning_rate": 3.6330898272419656e-05, + "loss": 2.2569, + "step": 5520000 + }, + { + "epoch": 27.35, + "learning_rate": 3.632965968599357e-05, + "loss": 2.2568, + "step": 5520500 + }, + { + "epoch": 27.35, + "learning_rate": 3.632842109956749e-05, + "loss": 2.2558, + "step": 5521000 + }, + { + "epoch": 27.36, + "learning_rate": 3.632718251314141e-05, + "loss": 2.2727, + "step": 5521500 + }, + { + "epoch": 27.36, + "learning_rate": 3.6325943926715324e-05, + "loss": 2.2767, + "step": 5522000 + }, + { + "epoch": 27.36, + "learning_rate": 3.632470534028924e-05, + "loss": 2.2559, + "step": 5522500 + }, + { + "epoch": 27.36, + "learning_rate": 3.632346675386315e-05, + "loss": 2.2577, + "step": 5523000 + }, + { + "epoch": 27.37, + "learning_rate": 3.632223064460992e-05, + "loss": 2.2861, + "step": 5523500 + }, + { + "epoch": 27.37, + "learning_rate": 3.6320992058183836e-05, + "loss": 2.256, + "step": 5524000 + }, + { + "epoch": 27.37, + "learning_rate": 3.6319753471757753e-05, + "loss": 2.2271, + "step": 5524500 + }, + { + "epoch": 27.37, + "learning_rate": 3.631851488533167e-05, + "loss": 2.261, + "step": 5525000 + }, + { + "epoch": 27.38, + "learning_rate": 3.631727629890559e-05, + "loss": 2.245, + "step": 5525500 + }, + { + "epoch": 27.38, + "learning_rate": 3.63160377124795e-05, + "loss": 2.2664, + "step": 5526000 + }, + { + "epoch": 27.38, + "learning_rate": 3.6314799126053414e-05, + "loss": 2.2493, + "step": 5526500 + }, + { + "epoch": 27.38, + "learning_rate": 3.631356301680019e-05, + "loss": 2.2382, + "step": 5527000 + }, + { + "epoch": 27.39, + "learning_rate": 3.631232443037411e-05, + "loss": 2.252, + "step": 5527500 + }, + { + "epoch": 27.39, + "learning_rate": 3.6311085843948024e-05, + "loss": 2.2755, + "step": 5528000 + }, + { + "epoch": 27.39, + "learning_rate": 3.630984725752194e-05, + "loss": 2.2891, + "step": 5528500 + }, + { + "epoch": 27.39, + "learning_rate": 3.630861114826871e-05, + "loss": 2.2629, + "step": 5529000 + }, + { + "epoch": 27.4, + "learning_rate": 3.630737256184262e-05, + "loss": 2.2718, + "step": 5529500 + }, + { + "epoch": 27.4, + "learning_rate": 3.630613892976224e-05, + "loss": 2.2516, + "step": 5530000 + }, + { + "epoch": 27.4, + "learning_rate": 3.630490034333616e-05, + "loss": 2.2641, + "step": 5530500 + }, + { + "epoch": 27.4, + "learning_rate": 3.6303661756910074e-05, + "loss": 2.2786, + "step": 5531000 + }, + { + "epoch": 27.4, + "learning_rate": 3.630242317048399e-05, + "loss": 2.2593, + "step": 5531500 + }, + { + "epoch": 27.41, + "learning_rate": 3.630118458405791e-05, + "loss": 2.2493, + "step": 5532000 + }, + { + "epoch": 27.41, + "learning_rate": 3.6299945997631825e-05, + "loss": 2.2607, + "step": 5532500 + }, + { + "epoch": 27.41, + "learning_rate": 3.629870741120574e-05, + "loss": 2.2367, + "step": 5533000 + }, + { + "epoch": 27.41, + "learning_rate": 3.629746882477966e-05, + "loss": 2.2638, + "step": 5533500 + }, + { + "epoch": 27.42, + "learning_rate": 3.6296230238353576e-05, + "loss": 2.2509, + "step": 5534000 + }, + { + "epoch": 27.42, + "learning_rate": 3.629499165192749e-05, + "loss": 2.2706, + "step": 5534500 + }, + { + "epoch": 27.42, + "learning_rate": 3.6293755542674255e-05, + "loss": 2.2787, + "step": 5535000 + }, + { + "epoch": 27.42, + "learning_rate": 3.629251695624817e-05, + "loss": 2.254, + "step": 5535500 + }, + { + "epoch": 27.43, + "learning_rate": 3.629127836982209e-05, + "loss": 2.2543, + "step": 5536000 + }, + { + "epoch": 27.43, + "learning_rate": 3.629004226056886e-05, + "loss": 2.2467, + "step": 5536500 + }, + { + "epoch": 27.43, + "learning_rate": 3.6288803674142774e-05, + "loss": 2.2442, + "step": 5537000 + }, + { + "epoch": 27.43, + "learning_rate": 3.628756508771669e-05, + "loss": 2.2459, + "step": 5537500 + }, + { + "epoch": 27.44, + "learning_rate": 3.628632897846346e-05, + "loss": 2.285, + "step": 5538000 + }, + { + "epoch": 27.44, + "learning_rate": 3.628509039203738e-05, + "loss": 2.2431, + "step": 5538500 + }, + { + "epoch": 27.44, + "learning_rate": 3.6283851805611294e-05, + "loss": 2.2626, + "step": 5539000 + }, + { + "epoch": 27.44, + "learning_rate": 3.628261321918521e-05, + "loss": 2.2379, + "step": 5539500 + }, + { + "epoch": 27.45, + "learning_rate": 3.628137710993198e-05, + "loss": 2.2729, + "step": 5540000 + }, + { + "epoch": 27.45, + "learning_rate": 3.6280138523505897e-05, + "loss": 2.2661, + "step": 5540500 + }, + { + "epoch": 27.45, + "learning_rate": 3.6278899937079814e-05, + "loss": 2.2461, + "step": 5541000 + }, + { + "epoch": 27.45, + "learning_rate": 3.627766135065373e-05, + "loss": 2.2775, + "step": 5541500 + }, + { + "epoch": 27.46, + "learning_rate": 3.627642276422765e-05, + "loss": 2.2716, + "step": 5542000 + }, + { + "epoch": 27.46, + "learning_rate": 3.627518417780156e-05, + "loss": 2.2505, + "step": 5542500 + }, + { + "epoch": 27.46, + "learning_rate": 3.6273945591375474e-05, + "loss": 2.2486, + "step": 5543000 + }, + { + "epoch": 27.46, + "learning_rate": 3.627270700494939e-05, + "loss": 2.2634, + "step": 5543500 + }, + { + "epoch": 27.47, + "learning_rate": 3.627146841852331e-05, + "loss": 2.2735, + "step": 5544000 + }, + { + "epoch": 27.47, + "learning_rate": 3.6270229832097225e-05, + "loss": 2.2689, + "step": 5544500 + }, + { + "epoch": 27.47, + "learning_rate": 3.626899124567114e-05, + "loss": 2.2402, + "step": 5545000 + }, + { + "epoch": 27.47, + "learning_rate": 3.626775265924506e-05, + "loss": 2.2754, + "step": 5545500 + }, + { + "epoch": 27.48, + "learning_rate": 3.6266514072818976e-05, + "loss": 2.2596, + "step": 5546000 + }, + { + "epoch": 27.48, + "learning_rate": 3.626527548639289e-05, + "loss": 2.2925, + "step": 5546500 + }, + { + "epoch": 27.48, + "learning_rate": 3.626403689996681e-05, + "loss": 2.2827, + "step": 5547000 + }, + { + "epoch": 27.48, + "learning_rate": 3.626279831354073e-05, + "loss": 2.2446, + "step": 5547500 + }, + { + "epoch": 27.49, + "learning_rate": 3.6261559727114644e-05, + "loss": 2.2501, + "step": 5548000 + }, + { + "epoch": 27.49, + "learning_rate": 3.626032114068856e-05, + "loss": 2.2617, + "step": 5548500 + }, + { + "epoch": 27.49, + "learning_rate": 3.625908255426248e-05, + "loss": 2.2591, + "step": 5549000 + }, + { + "epoch": 27.49, + "learning_rate": 3.6257843967836395e-05, + "loss": 2.2573, + "step": 5549500 + }, + { + "epoch": 27.5, + "learning_rate": 3.6256605381410305e-05, + "loss": 2.2601, + "step": 5550000 + }, + { + "epoch": 27.5, + "learning_rate": 3.625536679498422e-05, + "loss": 2.2669, + "step": 5550500 + }, + { + "epoch": 27.5, + "learning_rate": 3.625412820855814e-05, + "loss": 2.2688, + "step": 5551000 + }, + { + "epoch": 27.5, + "learning_rate": 3.625289209930491e-05, + "loss": 2.256, + "step": 5551500 + }, + { + "epoch": 27.51, + "learning_rate": 3.6251653512878824e-05, + "loss": 2.2706, + "step": 5552000 + }, + { + "epoch": 27.51, + "learning_rate": 3.625041492645274e-05, + "loss": 2.2595, + "step": 5552500 + }, + { + "epoch": 27.51, + "learning_rate": 3.624917634002666e-05, + "loss": 2.271, + "step": 5553000 + }, + { + "epoch": 27.51, + "learning_rate": 3.6247937753600575e-05, + "loss": 2.2814, + "step": 5553500 + }, + { + "epoch": 27.52, + "learning_rate": 3.624669916717449e-05, + "loss": 2.277, + "step": 5554000 + }, + { + "epoch": 27.52, + "learning_rate": 3.62454605807484e-05, + "loss": 2.2732, + "step": 5554500 + }, + { + "epoch": 27.52, + "learning_rate": 3.624422199432232e-05, + "loss": 2.2527, + "step": 5555000 + }, + { + "epoch": 27.52, + "learning_rate": 3.624298836224194e-05, + "loss": 2.2457, + "step": 5555500 + }, + { + "epoch": 27.53, + "learning_rate": 3.624174977581586e-05, + "loss": 2.2607, + "step": 5556000 + }, + { + "epoch": 27.53, + "learning_rate": 3.6240511189389774e-05, + "loss": 2.2696, + "step": 5556500 + }, + { + "epoch": 27.53, + "learning_rate": 3.623927260296369e-05, + "loss": 2.2735, + "step": 5557000 + }, + { + "epoch": 27.53, + "learning_rate": 3.623803401653761e-05, + "loss": 2.2682, + "step": 5557500 + }, + { + "epoch": 27.54, + "learning_rate": 3.6236795430111524e-05, + "loss": 2.2556, + "step": 5558000 + }, + { + "epoch": 27.54, + "learning_rate": 3.623555684368544e-05, + "loss": 2.263, + "step": 5558500 + }, + { + "epoch": 27.54, + "learning_rate": 3.623432073443221e-05, + "loss": 2.2585, + "step": 5559000 + }, + { + "epoch": 27.54, + "learning_rate": 3.623308214800613e-05, + "loss": 2.2543, + "step": 5559500 + }, + { + "epoch": 27.55, + "learning_rate": 3.6231843561580044e-05, + "loss": 2.2579, + "step": 5560000 + }, + { + "epoch": 27.55, + "learning_rate": 3.623060497515396e-05, + "loss": 2.2519, + "step": 5560500 + }, + { + "epoch": 27.55, + "learning_rate": 3.622936638872788e-05, + "loss": 2.2483, + "step": 5561000 + }, + { + "epoch": 27.55, + "learning_rate": 3.6228127802301795e-05, + "loss": 2.2644, + "step": 5561500 + }, + { + "epoch": 27.56, + "learning_rate": 3.622688921587571e-05, + "loss": 2.2766, + "step": 5562000 + }, + { + "epoch": 27.56, + "learning_rate": 3.622565062944963e-05, + "loss": 2.2594, + "step": 5562500 + }, + { + "epoch": 27.56, + "learning_rate": 3.6224412043023546e-05, + "loss": 2.2581, + "step": 5563000 + }, + { + "epoch": 27.56, + "learning_rate": 3.6223173456597456e-05, + "loss": 2.2555, + "step": 5563500 + }, + { + "epoch": 27.57, + "learning_rate": 3.6221937347344225e-05, + "loss": 2.2705, + "step": 5564000 + }, + { + "epoch": 27.57, + "learning_rate": 3.622069876091814e-05, + "loss": 2.2884, + "step": 5564500 + }, + { + "epoch": 27.57, + "learning_rate": 3.621946017449206e-05, + "loss": 2.2761, + "step": 5565000 + }, + { + "epoch": 27.57, + "learning_rate": 3.6218221588065975e-05, + "loss": 2.2661, + "step": 5565500 + }, + { + "epoch": 27.58, + "learning_rate": 3.621698300163989e-05, + "loss": 2.2659, + "step": 5566000 + }, + { + "epoch": 27.58, + "learning_rate": 3.621574441521381e-05, + "loss": 2.2419, + "step": 5566500 + }, + { + "epoch": 27.58, + "learning_rate": 3.621450582878772e-05, + "loss": 2.2708, + "step": 5567000 + }, + { + "epoch": 27.58, + "learning_rate": 3.6213269719534495e-05, + "loss": 2.2641, + "step": 5567500 + }, + { + "epoch": 27.59, + "learning_rate": 3.621203113310841e-05, + "loss": 2.2466, + "step": 5568000 + }, + { + "epoch": 27.59, + "learning_rate": 3.621079254668233e-05, + "loss": 2.2507, + "step": 5568500 + }, + { + "epoch": 27.59, + "learning_rate": 3.6209553960256246e-05, + "loss": 2.2843, + "step": 5569000 + }, + { + "epoch": 27.59, + "learning_rate": 3.620831785100301e-05, + "loss": 2.2604, + "step": 5569500 + }, + { + "epoch": 27.6, + "learning_rate": 3.6207079264576925e-05, + "loss": 2.2593, + "step": 5570000 + }, + { + "epoch": 27.6, + "learning_rate": 3.620584067815084e-05, + "loss": 2.2368, + "step": 5570500 + }, + { + "epoch": 27.6, + "learning_rate": 3.620460209172476e-05, + "loss": 2.2762, + "step": 5571000 + }, + { + "epoch": 27.6, + "learning_rate": 3.6203363505298675e-05, + "loss": 2.2359, + "step": 5571500 + }, + { + "epoch": 27.61, + "learning_rate": 3.6202127396045444e-05, + "loss": 2.2649, + "step": 5572000 + }, + { + "epoch": 27.61, + "learning_rate": 3.6200893763965065e-05, + "loss": 2.2789, + "step": 5572500 + }, + { + "epoch": 27.61, + "learning_rate": 3.619965517753898e-05, + "loss": 2.2694, + "step": 5573000 + }, + { + "epoch": 27.61, + "learning_rate": 3.61984165911129e-05, + "loss": 2.3009, + "step": 5573500 + }, + { + "epoch": 27.62, + "learning_rate": 3.619717800468681e-05, + "loss": 2.2679, + "step": 5574000 + }, + { + "epoch": 27.62, + "learning_rate": 3.6195939418260726e-05, + "loss": 2.2534, + "step": 5574500 + }, + { + "epoch": 27.62, + "learning_rate": 3.619470083183464e-05, + "loss": 2.2697, + "step": 5575000 + }, + { + "epoch": 27.62, + "learning_rate": 3.619346224540856e-05, + "loss": 2.2451, + "step": 5575500 + }, + { + "epoch": 27.63, + "learning_rate": 3.619222365898248e-05, + "loss": 2.2733, + "step": 5576000 + }, + { + "epoch": 27.63, + "learning_rate": 3.6190985072556394e-05, + "loss": 2.2543, + "step": 5576500 + }, + { + "epoch": 27.63, + "learning_rate": 3.618974896330316e-05, + "loss": 2.2541, + "step": 5577000 + }, + { + "epoch": 27.63, + "learning_rate": 3.618851037687708e-05, + "loss": 2.2615, + "step": 5577500 + }, + { + "epoch": 27.64, + "learning_rate": 3.6187271790450996e-05, + "loss": 2.2746, + "step": 5578000 + }, + { + "epoch": 27.64, + "learning_rate": 3.6186035681197765e-05, + "loss": 2.2567, + "step": 5578500 + }, + { + "epoch": 27.64, + "learning_rate": 3.6184799571944534e-05, + "loss": 2.2616, + "step": 5579000 + }, + { + "epoch": 27.64, + "learning_rate": 3.618356098551845e-05, + "loss": 2.2827, + "step": 5579500 + }, + { + "epoch": 27.65, + "learning_rate": 3.618232239909237e-05, + "loss": 2.2811, + "step": 5580000 + }, + { + "epoch": 27.65, + "learning_rate": 3.6181083812666285e-05, + "loss": 2.2912, + "step": 5580500 + }, + { + "epoch": 27.65, + "learning_rate": 3.61798452262402e-05, + "loss": 2.31, + "step": 5581000 + }, + { + "epoch": 27.65, + "learning_rate": 3.617860663981412e-05, + "loss": 2.2568, + "step": 5581500 + }, + { + "epoch": 27.66, + "learning_rate": 3.6177368053388035e-05, + "loss": 2.273, + "step": 5582000 + }, + { + "epoch": 27.66, + "learning_rate": 3.617612946696195e-05, + "loss": 2.2506, + "step": 5582500 + }, + { + "epoch": 27.66, + "learning_rate": 3.617489088053586e-05, + "loss": 2.2841, + "step": 5583000 + }, + { + "epoch": 27.66, + "learning_rate": 3.617365477128263e-05, + "loss": 2.2624, + "step": 5583500 + }, + { + "epoch": 27.67, + "learning_rate": 3.617241618485655e-05, + "loss": 2.2689, + "step": 5584000 + }, + { + "epoch": 27.67, + "learning_rate": 3.6171177598430465e-05, + "loss": 2.262, + "step": 5584500 + }, + { + "epoch": 27.67, + "learning_rate": 3.616993901200438e-05, + "loss": 2.2513, + "step": 5585000 + }, + { + "epoch": 27.67, + "learning_rate": 3.61687004255783e-05, + "loss": 2.2472, + "step": 5585500 + }, + { + "epoch": 27.67, + "learning_rate": 3.6167461839152216e-05, + "loss": 2.2559, + "step": 5586000 + }, + { + "epoch": 27.68, + "learning_rate": 3.6166223252726126e-05, + "loss": 2.2452, + "step": 5586500 + }, + { + "epoch": 27.68, + "learning_rate": 3.616498466630004e-05, + "loss": 2.2461, + "step": 5587000 + }, + { + "epoch": 27.68, + "learning_rate": 3.616374607987396e-05, + "loss": 2.2635, + "step": 5587500 + }, + { + "epoch": 27.68, + "learning_rate": 3.616251244779358e-05, + "loss": 2.2708, + "step": 5588000 + }, + { + "epoch": 27.69, + "learning_rate": 3.61612738613675e-05, + "loss": 2.2443, + "step": 5588500 + }, + { + "epoch": 27.69, + "learning_rate": 3.6160035274941415e-05, + "loss": 2.2791, + "step": 5589000 + }, + { + "epoch": 27.69, + "learning_rate": 3.615879668851533e-05, + "loss": 2.2916, + "step": 5589500 + }, + { + "epoch": 27.69, + "learning_rate": 3.615755810208925e-05, + "loss": 2.2805, + "step": 5590000 + }, + { + "epoch": 27.7, + "learning_rate": 3.6156319515663165e-05, + "loss": 2.2724, + "step": 5590500 + }, + { + "epoch": 27.7, + "learning_rate": 3.615508092923708e-05, + "loss": 2.2532, + "step": 5591000 + }, + { + "epoch": 27.7, + "learning_rate": 3.6153842342811e-05, + "loss": 2.2645, + "step": 5591500 + }, + { + "epoch": 27.7, + "learning_rate": 3.6152603756384916e-05, + "loss": 2.264, + "step": 5592000 + }, + { + "epoch": 27.71, + "learning_rate": 3.6151365169958826e-05, + "loss": 2.2729, + "step": 5592500 + }, + { + "epoch": 27.71, + "learning_rate": 3.615012658353274e-05, + "loss": 2.3001, + "step": 5593000 + }, + { + "epoch": 27.71, + "learning_rate": 3.614888799710666e-05, + "loss": 2.2556, + "step": 5593500 + }, + { + "epoch": 27.71, + "learning_rate": 3.6147651887853436e-05, + "loss": 2.2536, + "step": 5594000 + }, + { + "epoch": 27.72, + "learning_rate": 3.614641330142735e-05, + "loss": 2.2779, + "step": 5594500 + }, + { + "epoch": 27.72, + "learning_rate": 3.614517471500127e-05, + "loss": 2.2704, + "step": 5595000 + }, + { + "epoch": 27.72, + "learning_rate": 3.614393612857518e-05, + "loss": 2.2851, + "step": 5595500 + }, + { + "epoch": 27.72, + "learning_rate": 3.6142697542149097e-05, + "loss": 2.2879, + "step": 5596000 + }, + { + "epoch": 27.73, + "learning_rate": 3.6141461432895865e-05, + "loss": 2.2571, + "step": 5596500 + }, + { + "epoch": 27.73, + "learning_rate": 3.614022284646978e-05, + "loss": 2.2481, + "step": 5597000 + }, + { + "epoch": 27.73, + "learning_rate": 3.61389842600437e-05, + "loss": 2.2497, + "step": 5597500 + }, + { + "epoch": 27.73, + "learning_rate": 3.6137745673617616e-05, + "loss": 2.2729, + "step": 5598000 + }, + { + "epoch": 27.74, + "learning_rate": 3.6136509564364385e-05, + "loss": 2.2782, + "step": 5598500 + }, + { + "epoch": 27.74, + "learning_rate": 3.61352709779383e-05, + "loss": 2.2591, + "step": 5599000 + }, + { + "epoch": 27.74, + "learning_rate": 3.613403239151222e-05, + "loss": 2.2868, + "step": 5599500 + }, + { + "epoch": 27.74, + "learning_rate": 3.6132793805086136e-05, + "loss": 2.2517, + "step": 5600000 + }, + { + "epoch": 27.75, + "learning_rate": 3.613155521866005e-05, + "loss": 2.2596, + "step": 5600500 + }, + { + "epoch": 27.75, + "learning_rate": 3.613031663223397e-05, + "loss": 2.2664, + "step": 5601000 + }, + { + "epoch": 27.75, + "learning_rate": 3.6129078045807887e-05, + "loss": 2.2664, + "step": 5601500 + }, + { + "epoch": 27.75, + "learning_rate": 3.61278394593818e-05, + "loss": 2.2545, + "step": 5602000 + }, + { + "epoch": 27.76, + "learning_rate": 3.6126600872955714e-05, + "loss": 2.2648, + "step": 5602500 + }, + { + "epoch": 27.76, + "learning_rate": 3.612536476370248e-05, + "loss": 2.2745, + "step": 5603000 + }, + { + "epoch": 27.76, + "learning_rate": 3.612412865444925e-05, + "loss": 2.2538, + "step": 5603500 + }, + { + "epoch": 27.76, + "learning_rate": 3.612289006802317e-05, + "loss": 2.2435, + "step": 5604000 + }, + { + "epoch": 27.77, + "learning_rate": 3.6121651481597085e-05, + "loss": 2.2784, + "step": 5604500 + }, + { + "epoch": 27.77, + "learning_rate": 3.6120412895171e-05, + "loss": 2.2727, + "step": 5605000 + }, + { + "epoch": 27.77, + "learning_rate": 3.611917430874492e-05, + "loss": 2.2786, + "step": 5605500 + }, + { + "epoch": 27.77, + "learning_rate": 3.6117935722318836e-05, + "loss": 2.2708, + "step": 5606000 + }, + { + "epoch": 27.78, + "learning_rate": 3.6116699613065605e-05, + "loss": 2.268, + "step": 5606500 + }, + { + "epoch": 27.78, + "learning_rate": 3.611546102663952e-05, + "loss": 2.2608, + "step": 5607000 + }, + { + "epoch": 27.78, + "learning_rate": 3.611422244021344e-05, + "loss": 2.2927, + "step": 5607500 + }, + { + "epoch": 27.78, + "learning_rate": 3.6112983853787356e-05, + "loss": 2.2539, + "step": 5608000 + }, + { + "epoch": 27.79, + "learning_rate": 3.6111745267361266e-05, + "loss": 2.2372, + "step": 5608500 + }, + { + "epoch": 27.79, + "learning_rate": 3.611050668093518e-05, + "loss": 2.2446, + "step": 5609000 + }, + { + "epoch": 27.79, + "learning_rate": 3.61092680945091e-05, + "loss": 2.2775, + "step": 5609500 + }, + { + "epoch": 27.79, + "learning_rate": 3.610803446242872e-05, + "loss": 2.2643, + "step": 5610000 + }, + { + "epoch": 27.8, + "learning_rate": 3.610679587600264e-05, + "loss": 2.2885, + "step": 5610500 + }, + { + "epoch": 27.8, + "learning_rate": 3.6105557289576554e-05, + "loss": 2.2784, + "step": 5611000 + }, + { + "epoch": 27.8, + "learning_rate": 3.610431870315047e-05, + "loss": 2.2686, + "step": 5611500 + }, + { + "epoch": 27.8, + "learning_rate": 3.610308011672439e-05, + "loss": 2.2745, + "step": 5612000 + }, + { + "epoch": 27.81, + "learning_rate": 3.6101841530298305e-05, + "loss": 2.2697, + "step": 5612500 + }, + { + "epoch": 27.81, + "learning_rate": 3.610060294387222e-05, + "loss": 2.2637, + "step": 5613000 + }, + { + "epoch": 27.81, + "learning_rate": 3.609936435744614e-05, + "loss": 2.2522, + "step": 5613500 + }, + { + "epoch": 27.81, + "learning_rate": 3.6098125771020056e-05, + "loss": 2.2637, + "step": 5614000 + }, + { + "epoch": 27.82, + "learning_rate": 3.609688718459397e-05, + "loss": 2.275, + "step": 5614500 + }, + { + "epoch": 27.82, + "learning_rate": 3.6095653552513586e-05, + "loss": 2.2715, + "step": 5615000 + }, + { + "epoch": 27.82, + "learning_rate": 3.6094414966087503e-05, + "loss": 2.2733, + "step": 5615500 + }, + { + "epoch": 27.82, + "learning_rate": 3.609317637966142e-05, + "loss": 2.2502, + "step": 5616000 + }, + { + "epoch": 27.83, + "learning_rate": 3.609193779323534e-05, + "loss": 2.2829, + "step": 5616500 + }, + { + "epoch": 27.83, + "learning_rate": 3.6090699206809254e-05, + "loss": 2.2659, + "step": 5617000 + }, + { + "epoch": 27.83, + "learning_rate": 3.608946062038317e-05, + "loss": 2.2579, + "step": 5617500 + }, + { + "epoch": 27.83, + "learning_rate": 3.608822203395709e-05, + "loss": 2.2465, + "step": 5618000 + }, + { + "epoch": 27.84, + "learning_rate": 3.6086983447531005e-05, + "loss": 2.2796, + "step": 5618500 + }, + { + "epoch": 27.84, + "learning_rate": 3.608574486110492e-05, + "loss": 2.2645, + "step": 5619000 + }, + { + "epoch": 27.84, + "learning_rate": 3.608450627467884e-05, + "loss": 2.2538, + "step": 5619500 + }, + { + "epoch": 27.84, + "learning_rate": 3.6083267688252756e-05, + "loss": 2.2571, + "step": 5620000 + }, + { + "epoch": 27.85, + "learning_rate": 3.608202910182667e-05, + "loss": 2.2596, + "step": 5620500 + }, + { + "epoch": 27.85, + "learning_rate": 3.608079051540059e-05, + "loss": 2.2624, + "step": 5621000 + }, + { + "epoch": 27.85, + "learning_rate": 3.6079551928974507e-05, + "loss": 2.2717, + "step": 5621500 + }, + { + "epoch": 27.85, + "learning_rate": 3.607831829689412e-05, + "loss": 2.2788, + "step": 5622000 + }, + { + "epoch": 27.86, + "learning_rate": 3.607707971046804e-05, + "loss": 2.2695, + "step": 5622500 + }, + { + "epoch": 27.86, + "learning_rate": 3.6075841124041954e-05, + "loss": 2.2771, + "step": 5623000 + }, + { + "epoch": 27.86, + "learning_rate": 3.607460253761587e-05, + "loss": 2.2837, + "step": 5623500 + }, + { + "epoch": 27.86, + "learning_rate": 3.607336395118979e-05, + "loss": 2.2757, + "step": 5624000 + }, + { + "epoch": 27.87, + "learning_rate": 3.6072125364763705e-05, + "loss": 2.2864, + "step": 5624500 + }, + { + "epoch": 27.87, + "learning_rate": 3.607088677833762e-05, + "loss": 2.2869, + "step": 5625000 + }, + { + "epoch": 27.87, + "learning_rate": 3.606964819191154e-05, + "loss": 2.2667, + "step": 5625500 + }, + { + "epoch": 27.87, + "learning_rate": 3.6068409605485456e-05, + "loss": 2.2932, + "step": 5626000 + }, + { + "epoch": 27.88, + "learning_rate": 3.606717349623222e-05, + "loss": 2.2459, + "step": 5626500 + }, + { + "epoch": 27.88, + "learning_rate": 3.6065934909806135e-05, + "loss": 2.2514, + "step": 5627000 + }, + { + "epoch": 27.88, + "learning_rate": 3.606469632338005e-05, + "loss": 2.2673, + "step": 5627500 + }, + { + "epoch": 27.88, + "learning_rate": 3.606345773695397e-05, + "loss": 2.2727, + "step": 5628000 + }, + { + "epoch": 27.89, + "learning_rate": 3.6062219150527886e-05, + "loss": 2.2612, + "step": 5628500 + }, + { + "epoch": 27.89, + "learning_rate": 3.6060983041274654e-05, + "loss": 2.2404, + "step": 5629000 + }, + { + "epoch": 27.89, + "learning_rate": 3.605974445484857e-05, + "loss": 2.2698, + "step": 5629500 + }, + { + "epoch": 27.89, + "learning_rate": 3.605850834559534e-05, + "loss": 2.2836, + "step": 5630000 + }, + { + "epoch": 27.9, + "learning_rate": 3.605726975916926e-05, + "loss": 2.2787, + "step": 5630500 + }, + { + "epoch": 27.9, + "learning_rate": 3.6056031172743174e-05, + "loss": 2.2894, + "step": 5631000 + }, + { + "epoch": 27.9, + "learning_rate": 3.605479506348994e-05, + "loss": 2.249, + "step": 5631500 + }, + { + "epoch": 27.9, + "learning_rate": 3.605355647706386e-05, + "loss": 2.2716, + "step": 5632000 + }, + { + "epoch": 27.91, + "learning_rate": 3.605231789063778e-05, + "loss": 2.2512, + "step": 5632500 + }, + { + "epoch": 27.91, + "learning_rate": 3.6051079304211694e-05, + "loss": 2.2884, + "step": 5633000 + }, + { + "epoch": 27.91, + "learning_rate": 3.604984071778561e-05, + "loss": 2.2531, + "step": 5633500 + }, + { + "epoch": 27.91, + "learning_rate": 3.604860213135952e-05, + "loss": 2.2674, + "step": 5634000 + }, + { + "epoch": 27.92, + "learning_rate": 3.604736354493344e-05, + "loss": 2.2461, + "step": 5634500 + }, + { + "epoch": 27.92, + "learning_rate": 3.604612743568021e-05, + "loss": 2.2815, + "step": 5635000 + }, + { + "epoch": 27.92, + "learning_rate": 3.604488884925413e-05, + "loss": 2.2463, + "step": 5635500 + }, + { + "epoch": 27.92, + "learning_rate": 3.604365026282804e-05, + "loss": 2.2841, + "step": 5636000 + }, + { + "epoch": 27.93, + "learning_rate": 3.604241167640196e-05, + "loss": 2.2488, + "step": 5636500 + }, + { + "epoch": 27.93, + "learning_rate": 3.6041173089975874e-05, + "loss": 2.2632, + "step": 5637000 + }, + { + "epoch": 27.93, + "learning_rate": 3.603993698072264e-05, + "loss": 2.2737, + "step": 5637500 + }, + { + "epoch": 27.93, + "learning_rate": 3.603869839429656e-05, + "loss": 2.2868, + "step": 5638000 + }, + { + "epoch": 27.94, + "learning_rate": 3.603745980787048e-05, + "loss": 2.244, + "step": 5638500 + }, + { + "epoch": 27.94, + "learning_rate": 3.6036221221444394e-05, + "loss": 2.2787, + "step": 5639000 + }, + { + "epoch": 27.94, + "learning_rate": 3.603498263501831e-05, + "loss": 2.2593, + "step": 5639500 + }, + { + "epoch": 27.94, + "learning_rate": 3.603374404859222e-05, + "loss": 2.2728, + "step": 5640000 + }, + { + "epoch": 27.94, + "learning_rate": 3.603250546216614e-05, + "loss": 2.2731, + "step": 5640500 + }, + { + "epoch": 27.95, + "learning_rate": 3.6031266875740055e-05, + "loss": 2.2936, + "step": 5641000 + }, + { + "epoch": 27.95, + "learning_rate": 3.603003076648683e-05, + "loss": 2.2646, + "step": 5641500 + }, + { + "epoch": 27.95, + "learning_rate": 3.602879218006075e-05, + "loss": 2.3049, + "step": 5642000 + }, + { + "epoch": 27.95, + "learning_rate": 3.6027553593634664e-05, + "loss": 2.2743, + "step": 5642500 + }, + { + "epoch": 27.96, + "learning_rate": 3.6026315007208574e-05, + "loss": 2.2442, + "step": 5643000 + }, + { + "epoch": 27.96, + "learning_rate": 3.602507642078249e-05, + "loss": 2.2908, + "step": 5643500 + }, + { + "epoch": 27.96, + "learning_rate": 3.602383783435641e-05, + "loss": 2.2864, + "step": 5644000 + }, + { + "epoch": 27.96, + "learning_rate": 3.6022599247930325e-05, + "loss": 2.2823, + "step": 5644500 + }, + { + "epoch": 27.97, + "learning_rate": 3.602136066150424e-05, + "loss": 2.2878, + "step": 5645000 + }, + { + "epoch": 27.97, + "learning_rate": 3.602012207507815e-05, + "loss": 2.2742, + "step": 5645500 + }, + { + "epoch": 27.97, + "learning_rate": 3.601888844299778e-05, + "loss": 2.2998, + "step": 5646000 + }, + { + "epoch": 27.97, + "learning_rate": 3.6017649856571697e-05, + "loss": 2.2767, + "step": 5646500 + }, + { + "epoch": 27.98, + "learning_rate": 3.6016411270145613e-05, + "loss": 2.264, + "step": 5647000 + }, + { + "epoch": 27.98, + "learning_rate": 3.601517268371953e-05, + "loss": 2.2855, + "step": 5647500 + }, + { + "epoch": 27.98, + "learning_rate": 3.601393409729345e-05, + "loss": 2.2364, + "step": 5648000 + }, + { + "epoch": 27.98, + "learning_rate": 3.6012695510867364e-05, + "loss": 2.2728, + "step": 5648500 + }, + { + "epoch": 27.99, + "learning_rate": 3.601145692444128e-05, + "loss": 2.2712, + "step": 5649000 + }, + { + "epoch": 27.99, + "learning_rate": 3.601021833801519e-05, + "loss": 2.2799, + "step": 5649500 + }, + { + "epoch": 27.99, + "learning_rate": 3.600897975158911e-05, + "loss": 2.2532, + "step": 5650000 + }, + { + "epoch": 27.99, + "learning_rate": 3.6007741165163025e-05, + "loss": 2.2599, + "step": 5650500 + }, + { + "epoch": 28.0, + "learning_rate": 3.600650257873694e-05, + "loss": 2.2928, + "step": 5651000 + }, + { + "epoch": 28.0, + "learning_rate": 3.600526399231086e-05, + "loss": 2.2934, + "step": 5651500 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.6571310181504103, + "eval_accuracy_mlm": 0.6123743861697156, + "eval_accuracy_nsp": 0.8682572492047741, + "eval_loss": 2.3233656883239746, + "eval_runtime": 146.0438, + "eval_samples_per_second": 1745.771, + "eval_steps_per_second": 72.745, + "step": 5651604 + }, + { + "epoch": 28.0, + "learning_rate": 3.600402540588477e-05, + "loss": 2.2378, + "step": 5652000 + }, + { + "epoch": 28.0, + "learning_rate": 3.6002786819458686e-05, + "loss": 2.2507, + "step": 5652500 + }, + { + "epoch": 28.01, + "learning_rate": 3.6001550710205455e-05, + "loss": 2.2047, + "step": 5653000 + }, + { + "epoch": 28.01, + "learning_rate": 3.600031212377937e-05, + "loss": 2.2663, + "step": 5653500 + }, + { + "epoch": 28.01, + "learning_rate": 3.599907353735329e-05, + "loss": 2.2253, + "step": 5654000 + }, + { + "epoch": 28.01, + "learning_rate": 3.5997834950927206e-05, + "loss": 2.2129, + "step": 5654500 + }, + { + "epoch": 28.02, + "learning_rate": 3.599659636450112e-05, + "loss": 2.2342, + "step": 5655000 + }, + { + "epoch": 28.02, + "learning_rate": 3.599535777807504e-05, + "loss": 2.2216, + "step": 5655500 + }, + { + "epoch": 28.02, + "learning_rate": 3.5994119191648957e-05, + "loss": 2.211, + "step": 5656000 + }, + { + "epoch": 28.02, + "learning_rate": 3.5992883082395725e-05, + "loss": 2.2144, + "step": 5656500 + }, + { + "epoch": 28.03, + "learning_rate": 3.599164449596964e-05, + "loss": 2.2563, + "step": 5657000 + }, + { + "epoch": 28.03, + "learning_rate": 3.599040590954356e-05, + "loss": 2.2285, + "step": 5657500 + }, + { + "epoch": 28.03, + "learning_rate": 3.5989167323117476e-05, + "loss": 2.2173, + "step": 5658000 + }, + { + "epoch": 28.03, + "learning_rate": 3.598792873669139e-05, + "loss": 2.2218, + "step": 5658500 + }, + { + "epoch": 28.04, + "learning_rate": 3.5986692627438155e-05, + "loss": 2.2319, + "step": 5659000 + }, + { + "epoch": 28.04, + "learning_rate": 3.598545404101207e-05, + "loss": 2.2306, + "step": 5659500 + }, + { + "epoch": 28.04, + "learning_rate": 3.598421793175885e-05, + "loss": 2.2447, + "step": 5660000 + }, + { + "epoch": 28.04, + "learning_rate": 3.5982979345332765e-05, + "loss": 2.26, + "step": 5660500 + }, + { + "epoch": 28.05, + "learning_rate": 3.598174075890668e-05, + "loss": 2.2248, + "step": 5661000 + }, + { + "epoch": 28.05, + "learning_rate": 3.59805021724806e-05, + "loss": 2.2226, + "step": 5661500 + }, + { + "epoch": 28.05, + "learning_rate": 3.597926358605451e-05, + "loss": 2.2503, + "step": 5662000 + }, + { + "epoch": 28.05, + "learning_rate": 3.5978024999628425e-05, + "loss": 2.2375, + "step": 5662500 + }, + { + "epoch": 28.06, + "learning_rate": 3.597678641320234e-05, + "loss": 2.2391, + "step": 5663000 + }, + { + "epoch": 28.06, + "learning_rate": 3.597554782677626e-05, + "loss": 2.2428, + "step": 5663500 + }, + { + "epoch": 28.06, + "learning_rate": 3.5974309240350176e-05, + "loss": 2.2463, + "step": 5664000 + }, + { + "epoch": 28.06, + "learning_rate": 3.597307065392409e-05, + "loss": 2.2465, + "step": 5664500 + }, + { + "epoch": 28.07, + "learning_rate": 3.597183206749801e-05, + "loss": 2.2468, + "step": 5665000 + }, + { + "epoch": 28.07, + "learning_rate": 3.597059348107192e-05, + "loss": 2.2688, + "step": 5665500 + }, + { + "epoch": 28.07, + "learning_rate": 3.596935737181869e-05, + "loss": 2.2244, + "step": 5666000 + }, + { + "epoch": 28.07, + "learning_rate": 3.5968118785392606e-05, + "loss": 2.2423, + "step": 5666500 + }, + { + "epoch": 28.08, + "learning_rate": 3.596688019896652e-05, + "loss": 2.2341, + "step": 5667000 + }, + { + "epoch": 28.08, + "learning_rate": 3.596564161254044e-05, + "loss": 2.2489, + "step": 5667500 + }, + { + "epoch": 28.08, + "learning_rate": 3.596440302611436e-05, + "loss": 2.2332, + "step": 5668000 + }, + { + "epoch": 28.08, + "learning_rate": 3.596316939403398e-05, + "loss": 2.2225, + "step": 5668500 + }, + { + "epoch": 28.09, + "learning_rate": 3.5961930807607894e-05, + "loss": 2.2174, + "step": 5669000 + }, + { + "epoch": 28.09, + "learning_rate": 3.596069222118181e-05, + "loss": 2.2281, + "step": 5669500 + }, + { + "epoch": 28.09, + "learning_rate": 3.595945363475573e-05, + "loss": 2.2196, + "step": 5670000 + }, + { + "epoch": 28.09, + "learning_rate": 3.5958215048329645e-05, + "loss": 2.273, + "step": 5670500 + }, + { + "epoch": 28.1, + "learning_rate": 3.595697646190356e-05, + "loss": 2.2395, + "step": 5671000 + }, + { + "epoch": 28.1, + "learning_rate": 3.595573787547747e-05, + "loss": 2.2184, + "step": 5671500 + }, + { + "epoch": 28.1, + "learning_rate": 3.595449928905139e-05, + "loss": 2.2504, + "step": 5672000 + }, + { + "epoch": 28.1, + "learning_rate": 3.5953260702625306e-05, + "loss": 2.2414, + "step": 5672500 + }, + { + "epoch": 28.11, + "learning_rate": 3.595202459337208e-05, + "loss": 2.2489, + "step": 5673000 + }, + { + "epoch": 28.11, + "learning_rate": 3.5950786006946e-05, + "loss": 2.2339, + "step": 5673500 + }, + { + "epoch": 28.11, + "learning_rate": 3.5949547420519916e-05, + "loss": 2.2422, + "step": 5674000 + }, + { + "epoch": 28.11, + "learning_rate": 3.594831626561239e-05, + "loss": 2.2523, + "step": 5674500 + }, + { + "epoch": 28.12, + "learning_rate": 3.59470776791863e-05, + "loss": 2.2267, + "step": 5675000 + }, + { + "epoch": 28.12, + "learning_rate": 3.594584156993307e-05, + "loss": 2.2262, + "step": 5675500 + }, + { + "epoch": 28.12, + "learning_rate": 3.5944602983506984e-05, + "loss": 2.2335, + "step": 5676000 + }, + { + "epoch": 28.12, + "learning_rate": 3.59433643970809e-05, + "loss": 2.2257, + "step": 5676500 + }, + { + "epoch": 28.13, + "learning_rate": 3.594212581065482e-05, + "loss": 2.2219, + "step": 5677000 + }, + { + "epoch": 28.13, + "learning_rate": 3.5940887224228735e-05, + "loss": 2.2501, + "step": 5677500 + }, + { + "epoch": 28.13, + "learning_rate": 3.5939648637802645e-05, + "loss": 2.2395, + "step": 5678000 + }, + { + "epoch": 28.13, + "learning_rate": 3.593841005137656e-05, + "loss": 2.2518, + "step": 5678500 + }, + { + "epoch": 28.14, + "learning_rate": 3.593717394212334e-05, + "loss": 2.2567, + "step": 5679000 + }, + { + "epoch": 28.14, + "learning_rate": 3.5935935355697254e-05, + "loss": 2.2426, + "step": 5679500 + }, + { + "epoch": 28.14, + "learning_rate": 3.593469676927117e-05, + "loss": 2.2331, + "step": 5680000 + }, + { + "epoch": 28.14, + "learning_rate": 3.593346066001794e-05, + "loss": 2.2449, + "step": 5680500 + }, + { + "epoch": 28.15, + "learning_rate": 3.593222207359185e-05, + "loss": 2.2522, + "step": 5681000 + }, + { + "epoch": 28.15, + "learning_rate": 3.593098348716577e-05, + "loss": 2.238, + "step": 5681500 + }, + { + "epoch": 28.15, + "learning_rate": 3.5929744900739684e-05, + "loss": 2.2348, + "step": 5682000 + }, + { + "epoch": 28.15, + "learning_rate": 3.59285063143136e-05, + "loss": 2.2584, + "step": 5682500 + }, + { + "epoch": 28.16, + "learning_rate": 3.592726772788752e-05, + "loss": 2.2451, + "step": 5683000 + }, + { + "epoch": 28.16, + "learning_rate": 3.5926029141461435e-05, + "loss": 2.2386, + "step": 5683500 + }, + { + "epoch": 28.16, + "learning_rate": 3.592479055503535e-05, + "loss": 2.2602, + "step": 5684000 + }, + { + "epoch": 28.16, + "learning_rate": 3.592355196860926e-05, + "loss": 2.2475, + "step": 5684500 + }, + { + "epoch": 28.17, + "learning_rate": 3.592231338218318e-05, + "loss": 2.2594, + "step": 5685000 + }, + { + "epoch": 28.17, + "learning_rate": 3.5921074795757096e-05, + "loss": 2.2606, + "step": 5685500 + }, + { + "epoch": 28.17, + "learning_rate": 3.591983620933101e-05, + "loss": 2.2404, + "step": 5686000 + }, + { + "epoch": 28.17, + "learning_rate": 3.591859762290493e-05, + "loss": 2.219, + "step": 5686500 + }, + { + "epoch": 28.18, + "learning_rate": 3.591735903647885e-05, + "loss": 2.2316, + "step": 5687000 + }, + { + "epoch": 28.18, + "learning_rate": 3.5916122927225615e-05, + "loss": 2.2293, + "step": 5687500 + }, + { + "epoch": 28.18, + "learning_rate": 3.591488434079953e-05, + "loss": 2.2417, + "step": 5688000 + }, + { + "epoch": 28.18, + "learning_rate": 3.591364575437345e-05, + "loss": 2.2511, + "step": 5688500 + }, + { + "epoch": 28.19, + "learning_rate": 3.5912407167947366e-05, + "loss": 2.2389, + "step": 5689000 + }, + { + "epoch": 28.19, + "learning_rate": 3.591116858152128e-05, + "loss": 2.2493, + "step": 5689500 + }, + { + "epoch": 28.19, + "learning_rate": 3.59099299950952e-05, + "loss": 2.2295, + "step": 5690000 + }, + { + "epoch": 28.19, + "learning_rate": 3.590869140866912e-05, + "loss": 2.2424, + "step": 5690500 + }, + { + "epoch": 28.2, + "learning_rate": 3.5907452822243034e-05, + "loss": 2.267, + "step": 5691000 + }, + { + "epoch": 28.2, + "learning_rate": 3.590621423581695e-05, + "loss": 2.2283, + "step": 5691500 + }, + { + "epoch": 28.2, + "learning_rate": 3.590497564939087e-05, + "loss": 2.2434, + "step": 5692000 + }, + { + "epoch": 28.2, + "learning_rate": 3.590373954013763e-05, + "loss": 2.2637, + "step": 5692500 + }, + { + "epoch": 28.21, + "learning_rate": 3.590250095371155e-05, + "loss": 2.2305, + "step": 5693000 + }, + { + "epoch": 28.21, + "learning_rate": 3.5901262367285464e-05, + "loss": 2.2433, + "step": 5693500 + }, + { + "epoch": 28.21, + "learning_rate": 3.590002873520509e-05, + "loss": 2.2236, + "step": 5694000 + }, + { + "epoch": 28.21, + "learning_rate": 3.5898790148779e-05, + "loss": 2.261, + "step": 5694500 + }, + { + "epoch": 28.21, + "learning_rate": 3.589755156235292e-05, + "loss": 2.2614, + "step": 5695000 + }, + { + "epoch": 28.22, + "learning_rate": 3.5896312975926835e-05, + "loss": 2.2338, + "step": 5695500 + }, + { + "epoch": 28.22, + "learning_rate": 3.589507438950075e-05, + "loss": 2.2459, + "step": 5696000 + }, + { + "epoch": 28.22, + "learning_rate": 3.589383580307467e-05, + "loss": 2.2287, + "step": 5696500 + }, + { + "epoch": 28.22, + "learning_rate": 3.589259721664858e-05, + "loss": 2.2181, + "step": 5697000 + }, + { + "epoch": 28.23, + "learning_rate": 3.5891358630222496e-05, + "loss": 2.2656, + "step": 5697500 + }, + { + "epoch": 28.23, + "learning_rate": 3.589012252096927e-05, + "loss": 2.2618, + "step": 5698000 + }, + { + "epoch": 28.23, + "learning_rate": 3.588888393454319e-05, + "loss": 2.2414, + "step": 5698500 + }, + { + "epoch": 28.23, + "learning_rate": 3.5887645348117106e-05, + "loss": 2.2461, + "step": 5699000 + }, + { + "epoch": 28.24, + "learning_rate": 3.588640676169102e-05, + "loss": 2.2572, + "step": 5699500 + }, + { + "epoch": 28.24, + "learning_rate": 3.588516817526493e-05, + "loss": 2.2578, + "step": 5700000 + }, + { + "epoch": 28.24, + "learning_rate": 3.588392958883885e-05, + "loss": 2.2657, + "step": 5700500 + }, + { + "epoch": 28.24, + "learning_rate": 3.5882691002412766e-05, + "loss": 2.2494, + "step": 5701000 + }, + { + "epoch": 28.25, + "learning_rate": 3.5881452415986683e-05, + "loss": 2.259, + "step": 5701500 + }, + { + "epoch": 28.25, + "learning_rate": 3.58802138295606e-05, + "loss": 2.2313, + "step": 5702000 + }, + { + "epoch": 28.25, + "learning_rate": 3.587897772030737e-05, + "loss": 2.2567, + "step": 5702500 + }, + { + "epoch": 28.25, + "learning_rate": 3.5877739133881286e-05, + "loss": 2.2508, + "step": 5703000 + }, + { + "epoch": 28.26, + "learning_rate": 3.58765005474552e-05, + "loss": 2.2611, + "step": 5703500 + }, + { + "epoch": 28.26, + "learning_rate": 3.587526196102911e-05, + "loss": 2.2495, + "step": 5704000 + }, + { + "epoch": 28.26, + "learning_rate": 3.587402337460303e-05, + "loss": 2.2509, + "step": 5704500 + }, + { + "epoch": 28.26, + "learning_rate": 3.587278478817695e-05, + "loss": 2.2434, + "step": 5705000 + }, + { + "epoch": 28.27, + "learning_rate": 3.5871546201750864e-05, + "loss": 2.2263, + "step": 5705500 + }, + { + "epoch": 28.27, + "learning_rate": 3.587030761532478e-05, + "loss": 2.2574, + "step": 5706000 + }, + { + "epoch": 28.27, + "learning_rate": 3.58690690288987e-05, + "loss": 2.2488, + "step": 5706500 + }, + { + "epoch": 28.27, + "learning_rate": 3.5867835396818325e-05, + "loss": 2.2452, + "step": 5707000 + }, + { + "epoch": 28.28, + "learning_rate": 3.586659681039224e-05, + "loss": 2.2347, + "step": 5707500 + }, + { + "epoch": 28.28, + "learning_rate": 3.586535822396615e-05, + "loss": 2.2418, + "step": 5708000 + }, + { + "epoch": 28.28, + "learning_rate": 3.586411963754007e-05, + "loss": 2.2475, + "step": 5708500 + }, + { + "epoch": 28.28, + "learning_rate": 3.5862881051113986e-05, + "loss": 2.2471, + "step": 5709000 + }, + { + "epoch": 28.29, + "learning_rate": 3.58616424646879e-05, + "loss": 2.2539, + "step": 5709500 + }, + { + "epoch": 28.29, + "learning_rate": 3.586040387826182e-05, + "loss": 2.2612, + "step": 5710000 + }, + { + "epoch": 28.29, + "learning_rate": 3.585916529183573e-05, + "loss": 2.248, + "step": 5710500 + }, + { + "epoch": 28.29, + "learning_rate": 3.5857929182582506e-05, + "loss": 2.2418, + "step": 5711000 + }, + { + "epoch": 28.3, + "learning_rate": 3.585669059615642e-05, + "loss": 2.2432, + "step": 5711500 + }, + { + "epoch": 28.3, + "learning_rate": 3.585545200973034e-05, + "loss": 2.2694, + "step": 5712000 + }, + { + "epoch": 28.3, + "learning_rate": 3.585421342330425e-05, + "loss": 2.2284, + "step": 5712500 + }, + { + "epoch": 28.3, + "learning_rate": 3.585297483687817e-05, + "loss": 2.2573, + "step": 5713000 + }, + { + "epoch": 28.31, + "learning_rate": 3.585173872762494e-05, + "loss": 2.2553, + "step": 5713500 + }, + { + "epoch": 28.31, + "learning_rate": 3.585050014119886e-05, + "loss": 2.2186, + "step": 5714000 + }, + { + "epoch": 28.31, + "learning_rate": 3.5849261554772776e-05, + "loss": 2.2606, + "step": 5714500 + }, + { + "epoch": 28.31, + "learning_rate": 3.5848022968346686e-05, + "loss": 2.2582, + "step": 5715000 + }, + { + "epoch": 28.32, + "learning_rate": 3.58467843819206e-05, + "loss": 2.2227, + "step": 5715500 + }, + { + "epoch": 28.32, + "learning_rate": 3.584554579549452e-05, + "loss": 2.254, + "step": 5716000 + }, + { + "epoch": 28.32, + "learning_rate": 3.584430720906844e-05, + "loss": 2.2496, + "step": 5716500 + }, + { + "epoch": 28.32, + "learning_rate": 3.5843068622642354e-05, + "loss": 2.2388, + "step": 5717000 + }, + { + "epoch": 28.33, + "learning_rate": 3.5841830036216264e-05, + "loss": 2.243, + "step": 5717500 + }, + { + "epoch": 28.33, + "learning_rate": 3.584059144979018e-05, + "loss": 2.2567, + "step": 5718000 + }, + { + "epoch": 28.33, + "learning_rate": 3.58393528633641e-05, + "loss": 2.2261, + "step": 5718500 + }, + { + "epoch": 28.33, + "learning_rate": 3.5838114276938015e-05, + "loss": 2.2367, + "step": 5719000 + }, + { + "epoch": 28.34, + "learning_rate": 3.5836878167684784e-05, + "loss": 2.272, + "step": 5719500 + }, + { + "epoch": 28.34, + "learning_rate": 3.583564205843156e-05, + "loss": 2.2329, + "step": 5720000 + }, + { + "epoch": 28.34, + "learning_rate": 3.583440594917832e-05, + "loss": 2.2338, + "step": 5720500 + }, + { + "epoch": 28.34, + "learning_rate": 3.583316736275224e-05, + "loss": 2.2532, + "step": 5721000 + }, + { + "epoch": 28.35, + "learning_rate": 3.583193125349901e-05, + "loss": 2.2604, + "step": 5721500 + }, + { + "epoch": 28.35, + "learning_rate": 3.5830692667072924e-05, + "loss": 2.274, + "step": 5722000 + }, + { + "epoch": 28.35, + "learning_rate": 3.582945408064684e-05, + "loss": 2.243, + "step": 5722500 + }, + { + "epoch": 28.35, + "learning_rate": 3.582821549422076e-05, + "loss": 2.2256, + "step": 5723000 + }, + { + "epoch": 28.36, + "learning_rate": 3.5826976907794675e-05, + "loss": 2.2452, + "step": 5723500 + }, + { + "epoch": 28.36, + "learning_rate": 3.582573832136859e-05, + "loss": 2.2638, + "step": 5724000 + }, + { + "epoch": 28.36, + "learning_rate": 3.582449973494251e-05, + "loss": 2.2412, + "step": 5724500 + }, + { + "epoch": 28.36, + "learning_rate": 3.5823261148516426e-05, + "loss": 2.2771, + "step": 5725000 + }, + { + "epoch": 28.37, + "learning_rate": 3.582202256209034e-05, + "loss": 2.2621, + "step": 5725500 + }, + { + "epoch": 28.37, + "learning_rate": 3.5820788930009956e-05, + "loss": 2.23, + "step": 5726000 + }, + { + "epoch": 28.37, + "learning_rate": 3.5819550343583873e-05, + "loss": 2.2686, + "step": 5726500 + }, + { + "epoch": 28.37, + "learning_rate": 3.581831175715779e-05, + "loss": 2.2551, + "step": 5727000 + }, + { + "epoch": 28.38, + "learning_rate": 3.581707317073171e-05, + "loss": 2.2563, + "step": 5727500 + }, + { + "epoch": 28.38, + "learning_rate": 3.5815834584305624e-05, + "loss": 2.2388, + "step": 5728000 + }, + { + "epoch": 28.38, + "learning_rate": 3.581459599787954e-05, + "loss": 2.2399, + "step": 5728500 + }, + { + "epoch": 28.38, + "learning_rate": 3.581335741145346e-05, + "loss": 2.2514, + "step": 5729000 + }, + { + "epoch": 28.39, + "learning_rate": 3.581212130220023e-05, + "loss": 2.2183, + "step": 5729500 + }, + { + "epoch": 28.39, + "learning_rate": 3.5810885192946996e-05, + "loss": 2.236, + "step": 5730000 + }, + { + "epoch": 28.39, + "learning_rate": 3.580964660652091e-05, + "loss": 2.2386, + "step": 5730500 + }, + { + "epoch": 28.39, + "learning_rate": 3.580840802009483e-05, + "loss": 2.251, + "step": 5731000 + }, + { + "epoch": 28.4, + "learning_rate": 3.5807169433668746e-05, + "loss": 2.2459, + "step": 5731500 + }, + { + "epoch": 28.4, + "learning_rate": 3.5805930847242657e-05, + "loss": 2.2252, + "step": 5732000 + }, + { + "epoch": 28.4, + "learning_rate": 3.5804692260816574e-05, + "loss": 2.247, + "step": 5732500 + }, + { + "epoch": 28.4, + "learning_rate": 3.580345367439049e-05, + "loss": 2.2305, + "step": 5733000 + }, + { + "epoch": 28.41, + "learning_rate": 3.580221508796441e-05, + "loss": 2.2407, + "step": 5733500 + }, + { + "epoch": 28.41, + "learning_rate": 3.5800976501538324e-05, + "loss": 2.251, + "step": 5734000 + }, + { + "epoch": 28.41, + "learning_rate": 3.579973791511224e-05, + "loss": 2.2658, + "step": 5734500 + }, + { + "epoch": 28.41, + "learning_rate": 3.579849932868616e-05, + "loss": 2.2874, + "step": 5735000 + }, + { + "epoch": 28.42, + "learning_rate": 3.5797260742260075e-05, + "loss": 2.2551, + "step": 5735500 + }, + { + "epoch": 28.42, + "learning_rate": 3.579602215583399e-05, + "loss": 2.2694, + "step": 5736000 + }, + { + "epoch": 28.42, + "learning_rate": 3.579478356940791e-05, + "loss": 2.2369, + "step": 5736500 + }, + { + "epoch": 28.42, + "learning_rate": 3.5793544982981826e-05, + "loss": 2.2427, + "step": 5737000 + }, + { + "epoch": 28.43, + "learning_rate": 3.579230639655574e-05, + "loss": 2.2528, + "step": 5737500 + }, + { + "epoch": 28.43, + "learning_rate": 3.579106781012966e-05, + "loss": 2.252, + "step": 5738000 + }, + { + "epoch": 28.43, + "learning_rate": 3.578982922370358e-05, + "loss": 2.25, + "step": 5738500 + }, + { + "epoch": 28.43, + "learning_rate": 3.5788590637277494e-05, + "loss": 2.278, + "step": 5739000 + }, + { + "epoch": 28.44, + "learning_rate": 3.578735205085141e-05, + "loss": 2.2768, + "step": 5739500 + }, + { + "epoch": 28.44, + "learning_rate": 3.578611346442533e-05, + "loss": 2.2398, + "step": 5740000 + }, + { + "epoch": 28.44, + "learning_rate": 3.578487735517209e-05, + "loss": 2.2404, + "step": 5740500 + }, + { + "epoch": 28.44, + "learning_rate": 3.5783638768746006e-05, + "loss": 2.2455, + "step": 5741000 + }, + { + "epoch": 28.45, + "learning_rate": 3.578240018231992e-05, + "loss": 2.2767, + "step": 5741500 + }, + { + "epoch": 28.45, + "learning_rate": 3.578116159589384e-05, + "loss": 2.2687, + "step": 5742000 + }, + { + "epoch": 28.45, + "learning_rate": 3.577992300946776e-05, + "loss": 2.2557, + "step": 5742500 + }, + { + "epoch": 28.45, + "learning_rate": 3.5778684423041674e-05, + "loss": 2.25, + "step": 5743000 + }, + { + "epoch": 28.46, + "learning_rate": 3.577744583661559e-05, + "loss": 2.2547, + "step": 5743500 + }, + { + "epoch": 28.46, + "learning_rate": 3.57762072501895e-05, + "loss": 2.2511, + "step": 5744000 + }, + { + "epoch": 28.46, + "learning_rate": 3.577497114093628e-05, + "loss": 2.2485, + "step": 5744500 + }, + { + "epoch": 28.46, + "learning_rate": 3.577373503168304e-05, + "loss": 2.2567, + "step": 5745000 + }, + { + "epoch": 28.47, + "learning_rate": 3.577249892242981e-05, + "loss": 2.2858, + "step": 5745500 + }, + { + "epoch": 28.47, + "learning_rate": 3.5771260336003725e-05, + "loss": 2.2511, + "step": 5746000 + }, + { + "epoch": 28.47, + "learning_rate": 3.577002174957764e-05, + "loss": 2.248, + "step": 5746500 + }, + { + "epoch": 28.47, + "learning_rate": 3.576878564032442e-05, + "loss": 2.2604, + "step": 5747000 + }, + { + "epoch": 28.48, + "learning_rate": 3.576754705389833e-05, + "loss": 2.2871, + "step": 5747500 + }, + { + "epoch": 28.48, + "learning_rate": 3.5766308467472244e-05, + "loss": 2.2474, + "step": 5748000 + }, + { + "epoch": 28.48, + "learning_rate": 3.576506988104616e-05, + "loss": 2.2581, + "step": 5748500 + }, + { + "epoch": 28.48, + "learning_rate": 3.576383129462008e-05, + "loss": 2.228, + "step": 5749000 + }, + { + "epoch": 28.49, + "learning_rate": 3.5762592708193995e-05, + "loss": 2.2505, + "step": 5749500 + }, + { + "epoch": 28.49, + "learning_rate": 3.576135412176791e-05, + "loss": 2.2651, + "step": 5750000 + }, + { + "epoch": 28.49, + "learning_rate": 3.576011553534183e-05, + "loss": 2.24, + "step": 5750500 + }, + { + "epoch": 28.49, + "learning_rate": 3.5758876948915746e-05, + "loss": 2.2268, + "step": 5751000 + }, + { + "epoch": 28.49, + "learning_rate": 3.575763836248966e-05, + "loss": 2.2565, + "step": 5751500 + }, + { + "epoch": 28.5, + "learning_rate": 3.575639977606357e-05, + "loss": 2.25, + "step": 5752000 + }, + { + "epoch": 28.5, + "learning_rate": 3.575516118963749e-05, + "loss": 2.2529, + "step": 5752500 + }, + { + "epoch": 28.5, + "learning_rate": 3.575392508038426e-05, + "loss": 2.2352, + "step": 5753000 + }, + { + "epoch": 28.5, + "learning_rate": 3.5752686493958175e-05, + "loss": 2.2627, + "step": 5753500 + }, + { + "epoch": 28.51, + "learning_rate": 3.575144790753209e-05, + "loss": 2.2574, + "step": 5754000 + }, + { + "epoch": 28.51, + "learning_rate": 3.575020932110601e-05, + "loss": 2.2656, + "step": 5754500 + }, + { + "epoch": 28.51, + "learning_rate": 3.574897321185278e-05, + "loss": 2.2663, + "step": 5755000 + }, + { + "epoch": 28.51, + "learning_rate": 3.5747734625426695e-05, + "loss": 2.2273, + "step": 5755500 + }, + { + "epoch": 28.52, + "learning_rate": 3.574649603900061e-05, + "loss": 2.2567, + "step": 5756000 + }, + { + "epoch": 28.52, + "learning_rate": 3.574525745257453e-05, + "loss": 2.2755, + "step": 5756500 + }, + { + "epoch": 28.52, + "learning_rate": 3.5744018866148446e-05, + "loss": 2.246, + "step": 5757000 + }, + { + "epoch": 28.52, + "learning_rate": 3.574278027972236e-05, + "loss": 2.2647, + "step": 5757500 + }, + { + "epoch": 28.53, + "learning_rate": 3.574154169329628e-05, + "loss": 2.2595, + "step": 5758000 + }, + { + "epoch": 28.53, + "learning_rate": 3.574030558404304e-05, + "loss": 2.2468, + "step": 5758500 + }, + { + "epoch": 28.53, + "learning_rate": 3.573906699761696e-05, + "loss": 2.2384, + "step": 5759000 + }, + { + "epoch": 28.53, + "learning_rate": 3.5737828411190876e-05, + "loss": 2.2599, + "step": 5759500 + }, + { + "epoch": 28.54, + "learning_rate": 3.573658982476479e-05, + "loss": 2.2706, + "step": 5760000 + }, + { + "epoch": 28.54, + "learning_rate": 3.573535123833871e-05, + "loss": 2.2765, + "step": 5760500 + }, + { + "epoch": 28.54, + "learning_rate": 3.5734112651912626e-05, + "loss": 2.2622, + "step": 5761000 + }, + { + "epoch": 28.54, + "learning_rate": 3.573287406548654e-05, + "loss": 2.2588, + "step": 5761500 + }, + { + "epoch": 28.55, + "learning_rate": 3.573163547906046e-05, + "loss": 2.2274, + "step": 5762000 + }, + { + "epoch": 28.55, + "learning_rate": 3.573039689263438e-05, + "loss": 2.264, + "step": 5762500 + }, + { + "epoch": 28.55, + "learning_rate": 3.5729158306208294e-05, + "loss": 2.2544, + "step": 5763000 + }, + { + "epoch": 28.55, + "learning_rate": 3.572792219695506e-05, + "loss": 2.2477, + "step": 5763500 + }, + { + "epoch": 28.56, + "learning_rate": 3.572668361052898e-05, + "loss": 2.2682, + "step": 5764000 + }, + { + "epoch": 28.56, + "learning_rate": 3.57254450241029e-05, + "loss": 2.238, + "step": 5764500 + }, + { + "epoch": 28.56, + "learning_rate": 3.5724206437676814e-05, + "loss": 2.2259, + "step": 5765000 + }, + { + "epoch": 28.56, + "learning_rate": 3.5722970328423576e-05, + "loss": 2.2703, + "step": 5765500 + }, + { + "epoch": 28.57, + "learning_rate": 3.572173174199749e-05, + "loss": 2.2302, + "step": 5766000 + }, + { + "epoch": 28.57, + "learning_rate": 3.572049315557141e-05, + "loss": 2.2499, + "step": 5766500 + }, + { + "epoch": 28.57, + "learning_rate": 3.5719254569145326e-05, + "loss": 2.2483, + "step": 5767000 + }, + { + "epoch": 28.57, + "learning_rate": 3.5718015982719243e-05, + "loss": 2.272, + "step": 5767500 + }, + { + "epoch": 28.58, + "learning_rate": 3.571677987346601e-05, + "loss": 2.2575, + "step": 5768000 + }, + { + "epoch": 28.58, + "learning_rate": 3.571554128703993e-05, + "loss": 2.2488, + "step": 5768500 + }, + { + "epoch": 28.58, + "learning_rate": 3.5714302700613846e-05, + "loss": 2.2621, + "step": 5769000 + }, + { + "epoch": 28.58, + "learning_rate": 3.571306411418776e-05, + "loss": 2.2267, + "step": 5769500 + }, + { + "epoch": 28.59, + "learning_rate": 3.571182552776168e-05, + "loss": 2.2648, + "step": 5770000 + }, + { + "epoch": 28.59, + "learning_rate": 3.57105869413356e-05, + "loss": 2.235, + "step": 5770500 + }, + { + "epoch": 28.59, + "learning_rate": 3.5709348354909514e-05, + "loss": 2.2069, + "step": 5771000 + }, + { + "epoch": 28.59, + "learning_rate": 3.570810976848343e-05, + "loss": 2.2329, + "step": 5771500 + }, + { + "epoch": 28.6, + "learning_rate": 3.570687365923019e-05, + "loss": 2.2703, + "step": 5772000 + }, + { + "epoch": 28.6, + "learning_rate": 3.570563507280411e-05, + "loss": 2.2646, + "step": 5772500 + }, + { + "epoch": 28.6, + "learning_rate": 3.570439648637803e-05, + "loss": 2.2603, + "step": 5773000 + }, + { + "epoch": 28.6, + "learning_rate": 3.5703160377124795e-05, + "loss": 2.2573, + "step": 5773500 + }, + { + "epoch": 28.61, + "learning_rate": 3.5701924267871564e-05, + "loss": 2.2794, + "step": 5774000 + }, + { + "epoch": 28.61, + "learning_rate": 3.570068568144548e-05, + "loss": 2.247, + "step": 5774500 + }, + { + "epoch": 28.61, + "learning_rate": 3.56994470950194e-05, + "loss": 2.2595, + "step": 5775000 + }, + { + "epoch": 28.61, + "learning_rate": 3.569820850859331e-05, + "loss": 2.2653, + "step": 5775500 + }, + { + "epoch": 28.62, + "learning_rate": 3.5696972399340084e-05, + "loss": 2.2649, + "step": 5776000 + }, + { + "epoch": 28.62, + "learning_rate": 3.5695733812914e-05, + "loss": 2.2686, + "step": 5776500 + }, + { + "epoch": 28.62, + "learning_rate": 3.569449522648792e-05, + "loss": 2.2608, + "step": 5777000 + }, + { + "epoch": 28.62, + "learning_rate": 3.5693256640061835e-05, + "loss": 2.2304, + "step": 5777500 + }, + { + "epoch": 28.63, + "learning_rate": 3.5692020530808603e-05, + "loss": 2.2519, + "step": 5778000 + }, + { + "epoch": 28.63, + "learning_rate": 3.569078194438252e-05, + "loss": 2.2614, + "step": 5778500 + }, + { + "epoch": 28.63, + "learning_rate": 3.568954335795644e-05, + "loss": 2.2653, + "step": 5779000 + }, + { + "epoch": 28.63, + "learning_rate": 3.5688304771530354e-05, + "loss": 2.2743, + "step": 5779500 + }, + { + "epoch": 28.64, + "learning_rate": 3.5687066185104264e-05, + "loss": 2.2705, + "step": 5780000 + }, + { + "epoch": 28.64, + "learning_rate": 3.568583007585103e-05, + "loss": 2.2531, + "step": 5780500 + }, + { + "epoch": 28.64, + "learning_rate": 3.568459148942495e-05, + "loss": 2.2766, + "step": 5781000 + }, + { + "epoch": 28.64, + "learning_rate": 3.568335290299887e-05, + "loss": 2.2445, + "step": 5781500 + }, + { + "epoch": 28.65, + "learning_rate": 3.5682114316572784e-05, + "loss": 2.2499, + "step": 5782000 + }, + { + "epoch": 28.65, + "learning_rate": 3.56808757301467e-05, + "loss": 2.2532, + "step": 5782500 + }, + { + "epoch": 28.65, + "learning_rate": 3.567963714372062e-05, + "loss": 2.2463, + "step": 5783000 + }, + { + "epoch": 28.65, + "learning_rate": 3.5678398557294535e-05, + "loss": 2.2268, + "step": 5783500 + }, + { + "epoch": 28.66, + "learning_rate": 3.567715997086845e-05, + "loss": 2.2623, + "step": 5784000 + }, + { + "epoch": 28.66, + "learning_rate": 3.567592138444237e-05, + "loss": 2.2633, + "step": 5784500 + }, + { + "epoch": 28.66, + "learning_rate": 3.567468279801628e-05, + "loss": 2.2451, + "step": 5785000 + }, + { + "epoch": 28.66, + "learning_rate": 3.5673444211590196e-05, + "loss": 2.2643, + "step": 5785500 + }, + { + "epoch": 28.67, + "learning_rate": 3.567220562516411e-05, + "loss": 2.2295, + "step": 5786000 + }, + { + "epoch": 28.67, + "learning_rate": 3.567096703873803e-05, + "loss": 2.2327, + "step": 5786500 + }, + { + "epoch": 28.67, + "learning_rate": 3.5669728452311946e-05, + "loss": 2.2591, + "step": 5787000 + }, + { + "epoch": 28.67, + "learning_rate": 3.5668489865885863e-05, + "loss": 2.2324, + "step": 5787500 + }, + { + "epoch": 28.68, + "learning_rate": 3.566725375663263e-05, + "loss": 2.279, + "step": 5788000 + }, + { + "epoch": 28.68, + "learning_rate": 3.566601517020655e-05, + "loss": 2.2687, + "step": 5788500 + }, + { + "epoch": 28.68, + "learning_rate": 3.566477658378046e-05, + "loss": 2.2327, + "step": 5789000 + }, + { + "epoch": 28.68, + "learning_rate": 3.5663537997354376e-05, + "loss": 2.2751, + "step": 5789500 + }, + { + "epoch": 28.69, + "learning_rate": 3.566230188810115e-05, + "loss": 2.2476, + "step": 5790000 + }, + { + "epoch": 28.69, + "learning_rate": 3.566106330167507e-05, + "loss": 2.2564, + "step": 5790500 + }, + { + "epoch": 28.69, + "learning_rate": 3.5659824715248986e-05, + "loss": 2.2629, + "step": 5791000 + }, + { + "epoch": 28.69, + "learning_rate": 3.5658586128822896e-05, + "loss": 2.2568, + "step": 5791500 + }, + { + "epoch": 28.7, + "learning_rate": 3.565735001956967e-05, + "loss": 2.2414, + "step": 5792000 + }, + { + "epoch": 28.7, + "learning_rate": 3.565611143314359e-05, + "loss": 2.2787, + "step": 5792500 + }, + { + "epoch": 28.7, + "learning_rate": 3.565487532389035e-05, + "loss": 2.2533, + "step": 5793000 + }, + { + "epoch": 28.7, + "learning_rate": 3.565363673746427e-05, + "loss": 2.2566, + "step": 5793500 + }, + { + "epoch": 28.71, + "learning_rate": 3.5652400628211036e-05, + "loss": 2.2658, + "step": 5794000 + }, + { + "epoch": 28.71, + "learning_rate": 3.565116204178495e-05, + "loss": 2.2591, + "step": 5794500 + }, + { + "epoch": 28.71, + "learning_rate": 3.564992345535887e-05, + "loss": 2.2673, + "step": 5795000 + }, + { + "epoch": 28.71, + "learning_rate": 3.564868486893279e-05, + "loss": 2.2577, + "step": 5795500 + }, + { + "epoch": 28.72, + "learning_rate": 3.5647446282506704e-05, + "loss": 2.2438, + "step": 5796000 + }, + { + "epoch": 28.72, + "learning_rate": 3.564620769608062e-05, + "loss": 2.2575, + "step": 5796500 + }, + { + "epoch": 28.72, + "learning_rate": 3.564496910965454e-05, + "loss": 2.2445, + "step": 5797000 + }, + { + "epoch": 28.72, + "learning_rate": 3.5643730523228455e-05, + "loss": 2.2777, + "step": 5797500 + }, + { + "epoch": 28.73, + "learning_rate": 3.564249193680237e-05, + "loss": 2.2571, + "step": 5798000 + }, + { + "epoch": 28.73, + "learning_rate": 3.564125335037629e-05, + "loss": 2.2519, + "step": 5798500 + }, + { + "epoch": 28.73, + "learning_rate": 3.5640014763950205e-05, + "loss": 2.2186, + "step": 5799000 + }, + { + "epoch": 28.73, + "learning_rate": 3.563877617752412e-05, + "loss": 2.2601, + "step": 5799500 + }, + { + "epoch": 28.74, + "learning_rate": 3.563753759109803e-05, + "loss": 2.2653, + "step": 5800000 + }, + { + "epoch": 28.74, + "learning_rate": 3.563629900467195e-05, + "loss": 2.2519, + "step": 5800500 + }, + { + "epoch": 28.74, + "learning_rate": 3.563506289541872e-05, + "loss": 2.2545, + "step": 5801000 + }, + { + "epoch": 28.74, + "learning_rate": 3.5633824308992635e-05, + "loss": 2.2799, + "step": 5801500 + }, + { + "epoch": 28.75, + "learning_rate": 3.563258572256655e-05, + "loss": 2.2447, + "step": 5802000 + }, + { + "epoch": 28.75, + "learning_rate": 3.563134961331332e-05, + "loss": 2.297, + "step": 5802500 + }, + { + "epoch": 28.75, + "learning_rate": 3.563011102688724e-05, + "loss": 2.2558, + "step": 5803000 + }, + { + "epoch": 28.75, + "learning_rate": 3.5628872440461155e-05, + "loss": 2.2682, + "step": 5803500 + }, + { + "epoch": 28.76, + "learning_rate": 3.562763385403507e-05, + "loss": 2.2454, + "step": 5804000 + }, + { + "epoch": 28.76, + "learning_rate": 3.562639526760899e-05, + "loss": 2.2382, + "step": 5804500 + }, + { + "epoch": 28.76, + "learning_rate": 3.5625156681182905e-05, + "loss": 2.2444, + "step": 5805000 + }, + { + "epoch": 28.76, + "learning_rate": 3.562391809475682e-05, + "loss": 2.2791, + "step": 5805500 + }, + { + "epoch": 28.76, + "learning_rate": 3.562267950833074e-05, + "loss": 2.2489, + "step": 5806000 + }, + { + "epoch": 28.77, + "learning_rate": 3.56214433990775e-05, + "loss": 2.2662, + "step": 5806500 + }, + { + "epoch": 28.77, + "learning_rate": 3.562020481265142e-05, + "loss": 2.241, + "step": 5807000 + }, + { + "epoch": 28.77, + "learning_rate": 3.5618966226225335e-05, + "loss": 2.2429, + "step": 5807500 + }, + { + "epoch": 28.77, + "learning_rate": 3.561772763979925e-05, + "loss": 2.2777, + "step": 5808000 + }, + { + "epoch": 28.78, + "learning_rate": 3.561648905337317e-05, + "loss": 2.2731, + "step": 5808500 + }, + { + "epoch": 28.78, + "learning_rate": 3.5615250466947086e-05, + "loss": 2.2572, + "step": 5809000 + }, + { + "epoch": 28.78, + "learning_rate": 3.5614014357693855e-05, + "loss": 2.2283, + "step": 5809500 + }, + { + "epoch": 28.78, + "learning_rate": 3.561277577126777e-05, + "loss": 2.2644, + "step": 5810000 + }, + { + "epoch": 28.79, + "learning_rate": 3.561153718484169e-05, + "loss": 2.2584, + "step": 5810500 + }, + { + "epoch": 28.79, + "learning_rate": 3.5610298598415606e-05, + "loss": 2.2588, + "step": 5811000 + }, + { + "epoch": 28.79, + "learning_rate": 3.560906001198952e-05, + "loss": 2.263, + "step": 5811500 + }, + { + "epoch": 28.79, + "learning_rate": 3.5607826379909136e-05, + "loss": 2.2713, + "step": 5812000 + }, + { + "epoch": 28.8, + "learning_rate": 3.560658779348305e-05, + "loss": 2.2487, + "step": 5812500 + }, + { + "epoch": 28.8, + "learning_rate": 3.560534920705697e-05, + "loss": 2.2539, + "step": 5813000 + }, + { + "epoch": 28.8, + "learning_rate": 3.560411062063089e-05, + "loss": 2.2511, + "step": 5813500 + }, + { + "epoch": 28.8, + "learning_rate": 3.5602872034204804e-05, + "loss": 2.2785, + "step": 5814000 + }, + { + "epoch": 28.81, + "learning_rate": 3.560163592495157e-05, + "loss": 2.2514, + "step": 5814500 + }, + { + "epoch": 28.81, + "learning_rate": 3.560039733852549e-05, + "loss": 2.2603, + "step": 5815000 + }, + { + "epoch": 28.81, + "learning_rate": 3.559915875209941e-05, + "loss": 2.2606, + "step": 5815500 + }, + { + "epoch": 28.81, + "learning_rate": 3.5597920165673324e-05, + "loss": 2.247, + "step": 5816000 + }, + { + "epoch": 28.82, + "learning_rate": 3.559668157924724e-05, + "loss": 2.2564, + "step": 5816500 + }, + { + "epoch": 28.82, + "learning_rate": 3.559544299282115e-05, + "loss": 2.2528, + "step": 5817000 + }, + { + "epoch": 28.82, + "learning_rate": 3.559420688356792e-05, + "loss": 2.2559, + "step": 5817500 + }, + { + "epoch": 28.82, + "learning_rate": 3.5592968297141837e-05, + "loss": 2.2693, + "step": 5818000 + }, + { + "epoch": 28.83, + "learning_rate": 3.559173218788861e-05, + "loss": 2.2497, + "step": 5818500 + }, + { + "epoch": 28.83, + "learning_rate": 3.559049360146253e-05, + "loss": 2.2595, + "step": 5819000 + }, + { + "epoch": 28.83, + "learning_rate": 3.5589255015036446e-05, + "loss": 2.2414, + "step": 5819500 + }, + { + "epoch": 28.83, + "learning_rate": 3.5588016428610356e-05, + "loss": 2.2523, + "step": 5820000 + }, + { + "epoch": 28.84, + "learning_rate": 3.558677784218427e-05, + "loss": 2.2844, + "step": 5820500 + }, + { + "epoch": 28.84, + "learning_rate": 3.558553925575819e-05, + "loss": 2.2532, + "step": 5821000 + }, + { + "epoch": 28.84, + "learning_rate": 3.558430314650496e-05, + "loss": 2.245, + "step": 5821500 + }, + { + "epoch": 28.84, + "learning_rate": 3.5583064560078876e-05, + "loss": 2.2501, + "step": 5822000 + }, + { + "epoch": 28.85, + "learning_rate": 3.558182597365279e-05, + "loss": 2.2561, + "step": 5822500 + }, + { + "epoch": 28.85, + "learning_rate": 3.558058738722671e-05, + "loss": 2.2602, + "step": 5823000 + }, + { + "epoch": 28.85, + "learning_rate": 3.557935127797348e-05, + "loss": 2.2512, + "step": 5823500 + }, + { + "epoch": 28.85, + "learning_rate": 3.5578112691547395e-05, + "loss": 2.2484, + "step": 5824000 + }, + { + "epoch": 28.86, + "learning_rate": 3.557687410512131e-05, + "loss": 2.2785, + "step": 5824500 + }, + { + "epoch": 28.86, + "learning_rate": 3.557563551869523e-05, + "loss": 2.2271, + "step": 5825000 + }, + { + "epoch": 28.86, + "learning_rate": 3.5574396932269146e-05, + "loss": 2.264, + "step": 5825500 + }, + { + "epoch": 28.86, + "learning_rate": 3.557315834584306e-05, + "loss": 2.2774, + "step": 5826000 + }, + { + "epoch": 28.87, + "learning_rate": 3.557191975941697e-05, + "loss": 2.2599, + "step": 5826500 + }, + { + "epoch": 28.87, + "learning_rate": 3.557068117299089e-05, + "loss": 2.2745, + "step": 5827000 + }, + { + "epoch": 28.87, + "learning_rate": 3.556944258656481e-05, + "loss": 2.2626, + "step": 5827500 + }, + { + "epoch": 28.87, + "learning_rate": 3.5568206477311576e-05, + "loss": 2.2708, + "step": 5828000 + }, + { + "epoch": 28.88, + "learning_rate": 3.556696789088549e-05, + "loss": 2.277, + "step": 5828500 + }, + { + "epoch": 28.88, + "learning_rate": 3.556572930445941e-05, + "loss": 2.2848, + "step": 5829000 + }, + { + "epoch": 28.88, + "learning_rate": 3.556449319520618e-05, + "loss": 2.2448, + "step": 5829500 + }, + { + "epoch": 28.88, + "learning_rate": 3.5563254608780095e-05, + "loss": 2.2617, + "step": 5830000 + }, + { + "epoch": 28.89, + "learning_rate": 3.556201602235401e-05, + "loss": 2.2809, + "step": 5830500 + }, + { + "epoch": 28.89, + "learning_rate": 3.556077743592793e-05, + "loss": 2.2586, + "step": 5831000 + }, + { + "epoch": 28.89, + "learning_rate": 3.5559538849501846e-05, + "loss": 2.232, + "step": 5831500 + }, + { + "epoch": 28.89, + "learning_rate": 3.555830026307576e-05, + "loss": 2.2542, + "step": 5832000 + }, + { + "epoch": 28.9, + "learning_rate": 3.555706167664967e-05, + "loss": 2.2311, + "step": 5832500 + }, + { + "epoch": 28.9, + "learning_rate": 3.555582556739644e-05, + "loss": 2.2316, + "step": 5833000 + }, + { + "epoch": 28.9, + "learning_rate": 3.555458945814321e-05, + "loss": 2.2274, + "step": 5833500 + }, + { + "epoch": 28.9, + "learning_rate": 3.555335087171713e-05, + "loss": 2.2638, + "step": 5834000 + }, + { + "epoch": 28.91, + "learning_rate": 3.5552112285291045e-05, + "loss": 2.2413, + "step": 5834500 + }, + { + "epoch": 28.91, + "learning_rate": 3.555087369886496e-05, + "loss": 2.2671, + "step": 5835000 + }, + { + "epoch": 28.91, + "learning_rate": 3.554963511243888e-05, + "loss": 2.2601, + "step": 5835500 + }, + { + "epoch": 28.91, + "learning_rate": 3.5548396526012796e-05, + "loss": 2.2831, + "step": 5836000 + }, + { + "epoch": 28.92, + "learning_rate": 3.554715793958671e-05, + "loss": 2.2749, + "step": 5836500 + }, + { + "epoch": 28.92, + "learning_rate": 3.554591935316063e-05, + "loss": 2.2563, + "step": 5837000 + }, + { + "epoch": 28.92, + "learning_rate": 3.5544680766734546e-05, + "loss": 2.285, + "step": 5837500 + }, + { + "epoch": 28.92, + "learning_rate": 3.554344713465416e-05, + "loss": 2.2429, + "step": 5838000 + }, + { + "epoch": 28.93, + "learning_rate": 3.554220854822808e-05, + "loss": 2.2898, + "step": 5838500 + }, + { + "epoch": 28.93, + "learning_rate": 3.5540969961801994e-05, + "loss": 2.243, + "step": 5839000 + }, + { + "epoch": 28.93, + "learning_rate": 3.553973137537591e-05, + "loss": 2.2507, + "step": 5839500 + }, + { + "epoch": 28.93, + "learning_rate": 3.553849278894983e-05, + "loss": 2.2531, + "step": 5840000 + }, + { + "epoch": 28.94, + "learning_rate": 3.5537254202523745e-05, + "loss": 2.2665, + "step": 5840500 + }, + { + "epoch": 28.94, + "learning_rate": 3.553601561609766e-05, + "loss": 2.2682, + "step": 5841000 + }, + { + "epoch": 28.94, + "learning_rate": 3.553477702967158e-05, + "loss": 2.2489, + "step": 5841500 + }, + { + "epoch": 28.94, + "learning_rate": 3.5533538443245496e-05, + "loss": 2.2418, + "step": 5842000 + }, + { + "epoch": 28.95, + "learning_rate": 3.5532302333992265e-05, + "loss": 2.2649, + "step": 5842500 + }, + { + "epoch": 28.95, + "learning_rate": 3.553106374756618e-05, + "loss": 2.2535, + "step": 5843000 + }, + { + "epoch": 28.95, + "learning_rate": 3.55298251611401e-05, + "loss": 2.2434, + "step": 5843500 + }, + { + "epoch": 28.95, + "learning_rate": 3.5528586574714015e-05, + "loss": 2.2766, + "step": 5844000 + }, + { + "epoch": 28.96, + "learning_rate": 3.552734798828793e-05, + "loss": 2.2398, + "step": 5844500 + }, + { + "epoch": 28.96, + "learning_rate": 3.552610940186184e-05, + "loss": 2.2472, + "step": 5845000 + }, + { + "epoch": 28.96, + "learning_rate": 3.552487081543576e-05, + "loss": 2.2474, + "step": 5845500 + }, + { + "epoch": 28.96, + "learning_rate": 3.5523632229009676e-05, + "loss": 2.2566, + "step": 5846000 + }, + { + "epoch": 28.97, + "learning_rate": 3.552239364258359e-05, + "loss": 2.2593, + "step": 5846500 + }, + { + "epoch": 28.97, + "learning_rate": 3.552115505615751e-05, + "loss": 2.2721, + "step": 5847000 + }, + { + "epoch": 28.97, + "learning_rate": 3.551992142407713e-05, + "loss": 2.2743, + "step": 5847500 + }, + { + "epoch": 28.97, + "learning_rate": 3.551868283765105e-05, + "loss": 2.2448, + "step": 5848000 + }, + { + "epoch": 28.98, + "learning_rate": 3.5517444251224965e-05, + "loss": 2.289, + "step": 5848500 + }, + { + "epoch": 28.98, + "learning_rate": 3.551620566479888e-05, + "loss": 2.264, + "step": 5849000 + }, + { + "epoch": 28.98, + "learning_rate": 3.55149670783728e-05, + "loss": 2.2393, + "step": 5849500 + }, + { + "epoch": 28.98, + "learning_rate": 3.5513728491946715e-05, + "loss": 2.241, + "step": 5850000 + }, + { + "epoch": 28.99, + "learning_rate": 3.551249238269348e-05, + "loss": 2.232, + "step": 5850500 + }, + { + "epoch": 28.99, + "learning_rate": 3.5511253796267394e-05, + "loss": 2.2528, + "step": 5851000 + }, + { + "epoch": 28.99, + "learning_rate": 3.551001520984131e-05, + "loss": 2.2253, + "step": 5851500 + }, + { + "epoch": 28.99, + "learning_rate": 3.550877662341523e-05, + "loss": 2.2537, + "step": 5852000 + }, + { + "epoch": 29.0, + "learning_rate": 3.5507538036989145e-05, + "loss": 2.2496, + "step": 5852500 + }, + { + "epoch": 29.0, + "learning_rate": 3.550629945056306e-05, + "loss": 2.2503, + "step": 5853000 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.6575265605183357, + "eval_accuracy_mlm": 0.6131820592582494, + "eval_accuracy_nsp": 0.8667511246906365, + "eval_loss": 2.3342039585113525, + "eval_runtime": 146.0117, + "eval_samples_per_second": 1746.155, + "eval_steps_per_second": 72.761, + "step": 5853447 + }, + { + "epoch": 29.0, + "learning_rate": 3.550506581848268e-05, + "loss": 2.2472, + "step": 5853500 + }, + { + "epoch": 29.0, + "learning_rate": 3.55038272320566e-05, + "loss": 2.2281, + "step": 5854000 + }, + { + "epoch": 29.01, + "learning_rate": 3.550258864563052e-05, + "loss": 2.233, + "step": 5854500 + }, + { + "epoch": 29.01, + "learning_rate": 3.550135005920443e-05, + "loss": 2.2422, + "step": 5855000 + }, + { + "epoch": 29.01, + "learning_rate": 3.5500111472778344e-05, + "loss": 2.2156, + "step": 5855500 + }, + { + "epoch": 29.01, + "learning_rate": 3.549887288635226e-05, + "loss": 2.2308, + "step": 5856000 + }, + { + "epoch": 29.02, + "learning_rate": 3.549763429992618e-05, + "loss": 2.2383, + "step": 5856500 + }, + { + "epoch": 29.02, + "learning_rate": 3.5496395713500095e-05, + "loss": 2.2424, + "step": 5857000 + }, + { + "epoch": 29.02, + "learning_rate": 3.549515712707401e-05, + "loss": 2.2365, + "step": 5857500 + }, + { + "epoch": 29.02, + "learning_rate": 3.549391854064793e-05, + "loss": 2.2243, + "step": 5858000 + }, + { + "epoch": 29.03, + "learning_rate": 3.54926824313947e-05, + "loss": 2.2149, + "step": 5858500 + }, + { + "epoch": 29.03, + "learning_rate": 3.549144632214147e-05, + "loss": 2.2196, + "step": 5859000 + }, + { + "epoch": 29.03, + "learning_rate": 3.549020773571538e-05, + "loss": 2.2177, + "step": 5859500 + }, + { + "epoch": 29.03, + "learning_rate": 3.54889691492893e-05, + "loss": 2.2114, + "step": 5860000 + }, + { + "epoch": 29.03, + "learning_rate": 3.548773056286322e-05, + "loss": 2.2293, + "step": 5860500 + }, + { + "epoch": 29.04, + "learning_rate": 3.5486491976437134e-05, + "loss": 2.2463, + "step": 5861000 + }, + { + "epoch": 29.04, + "learning_rate": 3.548525339001105e-05, + "loss": 2.2409, + "step": 5861500 + }, + { + "epoch": 29.04, + "learning_rate": 3.548401480358496e-05, + "loss": 2.2488, + "step": 5862000 + }, + { + "epoch": 29.04, + "learning_rate": 3.548277621715888e-05, + "loss": 2.2524, + "step": 5862500 + }, + { + "epoch": 29.05, + "learning_rate": 3.5481537630732795e-05, + "loss": 2.2146, + "step": 5863000 + }, + { + "epoch": 29.05, + "learning_rate": 3.548029904430671e-05, + "loss": 2.2241, + "step": 5863500 + }, + { + "epoch": 29.05, + "learning_rate": 3.547906045788063e-05, + "loss": 2.2167, + "step": 5864000 + }, + { + "epoch": 29.05, + "learning_rate": 3.5477821871454545e-05, + "loss": 2.2205, + "step": 5864500 + }, + { + "epoch": 29.06, + "learning_rate": 3.547658328502846e-05, + "loss": 2.2322, + "step": 5865000 + }, + { + "epoch": 29.06, + "learning_rate": 3.547534469860238e-05, + "loss": 2.2218, + "step": 5865500 + }, + { + "epoch": 29.06, + "learning_rate": 3.5474106112176296e-05, + "loss": 2.2323, + "step": 5866000 + }, + { + "epoch": 29.06, + "learning_rate": 3.547286752575021e-05, + "loss": 2.2263, + "step": 5866500 + }, + { + "epoch": 29.07, + "learning_rate": 3.547162893932413e-05, + "loss": 2.1892, + "step": 5867000 + }, + { + "epoch": 29.07, + "learning_rate": 3.547039035289805e-05, + "loss": 2.2291, + "step": 5867500 + }, + { + "epoch": 29.07, + "learning_rate": 3.5469151766471964e-05, + "loss": 2.2354, + "step": 5868000 + }, + { + "epoch": 29.07, + "learning_rate": 3.546791318004588e-05, + "loss": 2.2302, + "step": 5868500 + }, + { + "epoch": 29.08, + "learning_rate": 3.54666745936198e-05, + "loss": 2.2066, + "step": 5869000 + }, + { + "epoch": 29.08, + "learning_rate": 3.5465436007193715e-05, + "loss": 2.219, + "step": 5869500 + }, + { + "epoch": 29.08, + "learning_rate": 3.5464197420767625e-05, + "loss": 2.2361, + "step": 5870000 + }, + { + "epoch": 29.08, + "learning_rate": 3.54629613115144e-05, + "loss": 2.2123, + "step": 5870500 + }, + { + "epoch": 29.09, + "learning_rate": 3.546172272508832e-05, + "loss": 2.2175, + "step": 5871000 + }, + { + "epoch": 29.09, + "learning_rate": 3.546048661583508e-05, + "loss": 2.2331, + "step": 5871500 + }, + { + "epoch": 29.09, + "learning_rate": 3.545925050658185e-05, + "loss": 2.2405, + "step": 5872000 + }, + { + "epoch": 29.09, + "learning_rate": 3.5458011920155765e-05, + "loss": 2.2258, + "step": 5872500 + }, + { + "epoch": 29.1, + "learning_rate": 3.5456775810902534e-05, + "loss": 2.2055, + "step": 5873000 + }, + { + "epoch": 29.1, + "learning_rate": 3.545553722447645e-05, + "loss": 2.2264, + "step": 5873500 + }, + { + "epoch": 29.1, + "learning_rate": 3.545429863805037e-05, + "loss": 2.2461, + "step": 5874000 + }, + { + "epoch": 29.1, + "learning_rate": 3.5453060051624285e-05, + "loss": 2.2322, + "step": 5874500 + }, + { + "epoch": 29.11, + "learning_rate": 3.54518214651982e-05, + "loss": 2.2506, + "step": 5875000 + }, + { + "epoch": 29.11, + "learning_rate": 3.545058287877211e-05, + "loss": 2.2381, + "step": 5875500 + }, + { + "epoch": 29.11, + "learning_rate": 3.544934429234603e-05, + "loss": 2.2179, + "step": 5876000 + }, + { + "epoch": 29.11, + "learning_rate": 3.5448105705919946e-05, + "loss": 2.2353, + "step": 5876500 + }, + { + "epoch": 29.12, + "learning_rate": 3.544686711949386e-05, + "loss": 2.2316, + "step": 5877000 + }, + { + "epoch": 29.12, + "learning_rate": 3.544563101024063e-05, + "loss": 2.2306, + "step": 5877500 + }, + { + "epoch": 29.12, + "learning_rate": 3.544439242381455e-05, + "loss": 2.2229, + "step": 5878000 + }, + { + "epoch": 29.12, + "learning_rate": 3.5443153837388465e-05, + "loss": 2.2512, + "step": 5878500 + }, + { + "epoch": 29.13, + "learning_rate": 3.544191525096238e-05, + "loss": 2.2306, + "step": 5879000 + }, + { + "epoch": 29.13, + "learning_rate": 3.54406766645363e-05, + "loss": 2.2534, + "step": 5879500 + }, + { + "epoch": 29.13, + "learning_rate": 3.543944055528307e-05, + "loss": 2.2212, + "step": 5880000 + }, + { + "epoch": 29.13, + "learning_rate": 3.5438201968856985e-05, + "loss": 2.2609, + "step": 5880500 + }, + { + "epoch": 29.14, + "learning_rate": 3.54369633824309e-05, + "loss": 2.2195, + "step": 5881000 + }, + { + "epoch": 29.14, + "learning_rate": 3.543572727317767e-05, + "loss": 2.2432, + "step": 5881500 + }, + { + "epoch": 29.14, + "learning_rate": 3.543448868675159e-05, + "loss": 2.2379, + "step": 5882000 + }, + { + "epoch": 29.14, + "learning_rate": 3.5433250100325504e-05, + "loss": 2.2313, + "step": 5882500 + }, + { + "epoch": 29.15, + "learning_rate": 3.543201151389942e-05, + "loss": 2.2383, + "step": 5883000 + }, + { + "epoch": 29.15, + "learning_rate": 3.543077292747333e-05, + "loss": 2.2507, + "step": 5883500 + }, + { + "epoch": 29.15, + "learning_rate": 3.542953434104725e-05, + "loss": 2.2331, + "step": 5884000 + }, + { + "epoch": 29.15, + "learning_rate": 3.5428295754621165e-05, + "loss": 2.2345, + "step": 5884500 + }, + { + "epoch": 29.16, + "learning_rate": 3.542705964536794e-05, + "loss": 2.2113, + "step": 5885000 + }, + { + "epoch": 29.16, + "learning_rate": 3.542582105894186e-05, + "loss": 2.2003, + "step": 5885500 + }, + { + "epoch": 29.16, + "learning_rate": 3.5424582472515775e-05, + "loss": 2.2238, + "step": 5886000 + }, + { + "epoch": 29.16, + "learning_rate": 3.5423343886089685e-05, + "loss": 2.2471, + "step": 5886500 + }, + { + "epoch": 29.17, + "learning_rate": 3.54221052996636e-05, + "loss": 2.1966, + "step": 5887000 + }, + { + "epoch": 29.17, + "learning_rate": 3.542086671323752e-05, + "loss": 2.2397, + "step": 5887500 + }, + { + "epoch": 29.17, + "learning_rate": 3.5419628126811436e-05, + "loss": 2.2606, + "step": 5888000 + }, + { + "epoch": 29.17, + "learning_rate": 3.541838954038535e-05, + "loss": 2.2447, + "step": 5888500 + }, + { + "epoch": 29.18, + "learning_rate": 3.541715095395926e-05, + "loss": 2.2425, + "step": 5889000 + }, + { + "epoch": 29.18, + "learning_rate": 3.541591236753318e-05, + "loss": 2.226, + "step": 5889500 + }, + { + "epoch": 29.18, + "learning_rate": 3.54146737811071e-05, + "loss": 2.2204, + "step": 5890000 + }, + { + "epoch": 29.18, + "learning_rate": 3.5413437671853866e-05, + "loss": 2.2296, + "step": 5890500 + }, + { + "epoch": 29.19, + "learning_rate": 3.541220156260064e-05, + "loss": 2.2481, + "step": 5891000 + }, + { + "epoch": 29.19, + "learning_rate": 3.541096297617456e-05, + "loss": 2.2228, + "step": 5891500 + }, + { + "epoch": 29.19, + "learning_rate": 3.5409724389748475e-05, + "loss": 2.2576, + "step": 5892000 + }, + { + "epoch": 29.19, + "learning_rate": 3.5408485803322385e-05, + "loss": 2.2459, + "step": 5892500 + }, + { + "epoch": 29.2, + "learning_rate": 3.54072472168963e-05, + "loss": 2.2061, + "step": 5893000 + }, + { + "epoch": 29.2, + "learning_rate": 3.540600863047022e-05, + "loss": 2.2236, + "step": 5893500 + }, + { + "epoch": 29.2, + "learning_rate": 3.5404770044044136e-05, + "loss": 2.2591, + "step": 5894000 + }, + { + "epoch": 29.2, + "learning_rate": 3.540353145761805e-05, + "loss": 2.2513, + "step": 5894500 + }, + { + "epoch": 29.21, + "learning_rate": 3.540229534836482e-05, + "loss": 2.2299, + "step": 5895000 + }, + { + "epoch": 29.21, + "learning_rate": 3.540105676193874e-05, + "loss": 2.2314, + "step": 5895500 + }, + { + "epoch": 29.21, + "learning_rate": 3.539981817551265e-05, + "loss": 2.2248, + "step": 5896000 + }, + { + "epoch": 29.21, + "learning_rate": 3.5398579589086566e-05, + "loss": 2.2565, + "step": 5896500 + }, + { + "epoch": 29.22, + "learning_rate": 3.539734100266048e-05, + "loss": 2.207, + "step": 5897000 + }, + { + "epoch": 29.22, + "learning_rate": 3.53961024162344e-05, + "loss": 2.2213, + "step": 5897500 + }, + { + "epoch": 29.22, + "learning_rate": 3.5394863829808316e-05, + "loss": 2.2324, + "step": 5898000 + }, + { + "epoch": 29.22, + "learning_rate": 3.539362524338223e-05, + "loss": 2.2222, + "step": 5898500 + }, + { + "epoch": 29.23, + "learning_rate": 3.5392389134129e-05, + "loss": 2.2263, + "step": 5899000 + }, + { + "epoch": 29.23, + "learning_rate": 3.539115054770292e-05, + "loss": 2.2123, + "step": 5899500 + }, + { + "epoch": 29.23, + "learning_rate": 3.5389911961276836e-05, + "loss": 2.2744, + "step": 5900000 + }, + { + "epoch": 29.23, + "learning_rate": 3.538867337485075e-05, + "loss": 2.231, + "step": 5900500 + }, + { + "epoch": 29.24, + "learning_rate": 3.538743478842467e-05, + "loss": 2.2395, + "step": 5901000 + }, + { + "epoch": 29.24, + "learning_rate": 3.538619867917144e-05, + "loss": 2.2213, + "step": 5901500 + }, + { + "epoch": 29.24, + "learning_rate": 3.538496256991821e-05, + "loss": 2.2558, + "step": 5902000 + }, + { + "epoch": 29.24, + "learning_rate": 3.5383723983492124e-05, + "loss": 2.234, + "step": 5902500 + }, + { + "epoch": 29.25, + "learning_rate": 3.538248539706604e-05, + "loss": 2.2555, + "step": 5903000 + }, + { + "epoch": 29.25, + "learning_rate": 3.538124681063996e-05, + "loss": 2.245, + "step": 5903500 + }, + { + "epoch": 29.25, + "learning_rate": 3.5380008224213875e-05, + "loss": 2.239, + "step": 5904000 + }, + { + "epoch": 29.25, + "learning_rate": 3.537876963778779e-05, + "loss": 2.2479, + "step": 5904500 + }, + { + "epoch": 29.26, + "learning_rate": 3.53775310513617e-05, + "loss": 2.2461, + "step": 5905000 + }, + { + "epoch": 29.26, + "learning_rate": 3.537629494210847e-05, + "loss": 2.2416, + "step": 5905500 + }, + { + "epoch": 29.26, + "learning_rate": 3.537505635568239e-05, + "loss": 2.2077, + "step": 5906000 + }, + { + "epoch": 29.26, + "learning_rate": 3.5373817769256305e-05, + "loss": 2.2354, + "step": 5906500 + }, + { + "epoch": 29.27, + "learning_rate": 3.537257918283022e-05, + "loss": 2.2289, + "step": 5907000 + }, + { + "epoch": 29.27, + "learning_rate": 3.537134059640414e-05, + "loss": 2.2317, + "step": 5907500 + }, + { + "epoch": 29.27, + "learning_rate": 3.5370102009978056e-05, + "loss": 2.2199, + "step": 5908000 + }, + { + "epoch": 29.27, + "learning_rate": 3.5368863423551966e-05, + "loss": 2.2475, + "step": 5908500 + }, + { + "epoch": 29.28, + "learning_rate": 3.536762483712588e-05, + "loss": 2.2429, + "step": 5909000 + }, + { + "epoch": 29.28, + "learning_rate": 3.53663862506998e-05, + "loss": 2.2527, + "step": 5909500 + }, + { + "epoch": 29.28, + "learning_rate": 3.5365150141446575e-05, + "loss": 2.2502, + "step": 5910000 + }, + { + "epoch": 29.28, + "learning_rate": 3.536391403219334e-05, + "loss": 2.2442, + "step": 5910500 + }, + { + "epoch": 29.29, + "learning_rate": 3.5362675445767254e-05, + "loss": 2.2257, + "step": 5911000 + }, + { + "epoch": 29.29, + "learning_rate": 3.536143685934117e-05, + "loss": 2.2407, + "step": 5911500 + }, + { + "epoch": 29.29, + "learning_rate": 3.536019827291509e-05, + "loss": 2.238, + "step": 5912000 + }, + { + "epoch": 29.29, + "learning_rate": 3.5358959686489005e-05, + "loss": 2.2623, + "step": 5912500 + }, + { + "epoch": 29.3, + "learning_rate": 3.535772110006292e-05, + "loss": 2.228, + "step": 5913000 + }, + { + "epoch": 29.3, + "learning_rate": 3.535648251363684e-05, + "loss": 2.2201, + "step": 5913500 + }, + { + "epoch": 29.3, + "learning_rate": 3.535524640438361e-05, + "loss": 2.2372, + "step": 5914000 + }, + { + "epoch": 29.3, + "learning_rate": 3.5354007817957525e-05, + "loss": 2.2437, + "step": 5914500 + }, + { + "epoch": 29.3, + "learning_rate": 3.535276923153144e-05, + "loss": 2.2395, + "step": 5915000 + }, + { + "epoch": 29.31, + "learning_rate": 3.535153312227821e-05, + "loss": 2.223, + "step": 5915500 + }, + { + "epoch": 29.31, + "learning_rate": 3.535029453585213e-05, + "loss": 2.2415, + "step": 5916000 + }, + { + "epoch": 29.31, + "learning_rate": 3.5349055949426044e-05, + "loss": 2.2512, + "step": 5916500 + }, + { + "epoch": 29.31, + "learning_rate": 3.5347817362999954e-05, + "loss": 2.2071, + "step": 5917000 + }, + { + "epoch": 29.32, + "learning_rate": 3.534657877657387e-05, + "loss": 2.2317, + "step": 5917500 + }, + { + "epoch": 29.32, + "learning_rate": 3.534534019014779e-05, + "loss": 2.2262, + "step": 5918000 + }, + { + "epoch": 29.32, + "learning_rate": 3.5344101603721705e-05, + "loss": 2.2189, + "step": 5918500 + }, + { + "epoch": 29.32, + "learning_rate": 3.534286301729562e-05, + "loss": 2.2357, + "step": 5919000 + }, + { + "epoch": 29.33, + "learning_rate": 3.534162690804239e-05, + "loss": 2.245, + "step": 5919500 + }, + { + "epoch": 29.33, + "learning_rate": 3.534038832161631e-05, + "loss": 2.2388, + "step": 5920000 + }, + { + "epoch": 29.33, + "learning_rate": 3.5339149735190225e-05, + "loss": 2.2373, + "step": 5920500 + }, + { + "epoch": 29.33, + "learning_rate": 3.533791114876414e-05, + "loss": 2.2406, + "step": 5921000 + }, + { + "epoch": 29.34, + "learning_rate": 3.533667256233806e-05, + "loss": 2.2509, + "step": 5921500 + }, + { + "epoch": 29.34, + "learning_rate": 3.5335433975911976e-05, + "loss": 2.2308, + "step": 5922000 + }, + { + "epoch": 29.34, + "learning_rate": 3.533419538948589e-05, + "loss": 2.2571, + "step": 5922500 + }, + { + "epoch": 29.34, + "learning_rate": 3.533295680305981e-05, + "loss": 2.2244, + "step": 5923000 + }, + { + "epoch": 29.35, + "learning_rate": 3.5331718216633726e-05, + "loss": 2.2334, + "step": 5923500 + }, + { + "epoch": 29.35, + "learning_rate": 3.5330479630207637e-05, + "loss": 2.2752, + "step": 5924000 + }, + { + "epoch": 29.35, + "learning_rate": 3.5329241043781553e-05, + "loss": 2.2395, + "step": 5924500 + }, + { + "epoch": 29.35, + "learning_rate": 3.532800245735547e-05, + "loss": 2.2595, + "step": 5925000 + }, + { + "epoch": 29.36, + "learning_rate": 3.532676387092939e-05, + "loss": 2.2495, + "step": 5925500 + }, + { + "epoch": 29.36, + "learning_rate": 3.5325525284503304e-05, + "loss": 2.2548, + "step": 5926000 + }, + { + "epoch": 29.36, + "learning_rate": 3.532428669807722e-05, + "loss": 2.2415, + "step": 5926500 + }, + { + "epoch": 29.36, + "learning_rate": 3.532304811165114e-05, + "loss": 2.2515, + "step": 5927000 + }, + { + "epoch": 29.37, + "learning_rate": 3.532181447957076e-05, + "loss": 2.2455, + "step": 5927500 + }, + { + "epoch": 29.37, + "learning_rate": 3.532057837031753e-05, + "loss": 2.2333, + "step": 5928000 + }, + { + "epoch": 29.37, + "learning_rate": 3.5319339783891445e-05, + "loss": 2.2581, + "step": 5928500 + }, + { + "epoch": 29.37, + "learning_rate": 3.531810119746536e-05, + "loss": 2.2516, + "step": 5929000 + }, + { + "epoch": 29.38, + "learning_rate": 3.5316865088212123e-05, + "loss": 2.2583, + "step": 5929500 + }, + { + "epoch": 29.38, + "learning_rate": 3.53156289789589e-05, + "loss": 2.2352, + "step": 5930000 + }, + { + "epoch": 29.38, + "learning_rate": 3.5314390392532816e-05, + "loss": 2.2317, + "step": 5930500 + }, + { + "epoch": 29.38, + "learning_rate": 3.5313151806106726e-05, + "loss": 2.2426, + "step": 5931000 + }, + { + "epoch": 29.39, + "learning_rate": 3.531191321968064e-05, + "loss": 2.2369, + "step": 5931500 + }, + { + "epoch": 29.39, + "learning_rate": 3.531067463325456e-05, + "loss": 2.2255, + "step": 5932000 + }, + { + "epoch": 29.39, + "learning_rate": 3.530943604682848e-05, + "loss": 2.2445, + "step": 5932500 + }, + { + "epoch": 29.39, + "learning_rate": 3.5308197460402394e-05, + "loss": 2.2298, + "step": 5933000 + }, + { + "epoch": 29.4, + "learning_rate": 3.530695887397631e-05, + "loss": 2.2297, + "step": 5933500 + }, + { + "epoch": 29.4, + "learning_rate": 3.530572028755023e-05, + "loss": 2.2726, + "step": 5934000 + }, + { + "epoch": 29.4, + "learning_rate": 3.5304481701124145e-05, + "loss": 2.2399, + "step": 5934500 + }, + { + "epoch": 29.4, + "learning_rate": 3.530324311469806e-05, + "loss": 2.262, + "step": 5935000 + }, + { + "epoch": 29.41, + "learning_rate": 3.530200452827198e-05, + "loss": 2.2255, + "step": 5935500 + }, + { + "epoch": 29.41, + "learning_rate": 3.5300765941845895e-05, + "loss": 2.2357, + "step": 5936000 + }, + { + "epoch": 29.41, + "learning_rate": 3.529952735541981e-05, + "loss": 2.2195, + "step": 5936500 + }, + { + "epoch": 29.41, + "learning_rate": 3.5298291246166574e-05, + "loss": 2.2428, + "step": 5937000 + }, + { + "epoch": 29.42, + "learning_rate": 3.529705513691334e-05, + "loss": 2.2513, + "step": 5937500 + }, + { + "epoch": 29.42, + "learning_rate": 3.529581655048726e-05, + "loss": 2.2211, + "step": 5938000 + }, + { + "epoch": 29.42, + "learning_rate": 3.529457796406118e-05, + "loss": 2.2356, + "step": 5938500 + }, + { + "epoch": 29.42, + "learning_rate": 3.5293339377635094e-05, + "loss": 2.2263, + "step": 5939000 + }, + { + "epoch": 29.43, + "learning_rate": 3.529210079120901e-05, + "loss": 2.2276, + "step": 5939500 + }, + { + "epoch": 29.43, + "learning_rate": 3.529086220478293e-05, + "loss": 2.2462, + "step": 5940000 + }, + { + "epoch": 29.43, + "learning_rate": 3.5289623618356845e-05, + "loss": 2.1902, + "step": 5940500 + }, + { + "epoch": 29.43, + "learning_rate": 3.528838503193076e-05, + "loss": 2.26, + "step": 5941000 + }, + { + "epoch": 29.44, + "learning_rate": 3.528714644550468e-05, + "loss": 2.2554, + "step": 5941500 + }, + { + "epoch": 29.44, + "learning_rate": 3.5285907859078596e-05, + "loss": 2.2377, + "step": 5942000 + }, + { + "epoch": 29.44, + "learning_rate": 3.528466927265251e-05, + "loss": 2.2355, + "step": 5942500 + }, + { + "epoch": 29.44, + "learning_rate": 3.528343068622643e-05, + "loss": 2.2532, + "step": 5943000 + }, + { + "epoch": 29.45, + "learning_rate": 3.528219705414604e-05, + "loss": 2.2435, + "step": 5943500 + }, + { + "epoch": 29.45, + "learning_rate": 3.528096094489282e-05, + "loss": 2.2458, + "step": 5944000 + }, + { + "epoch": 29.45, + "learning_rate": 3.527972235846673e-05, + "loss": 2.2236, + "step": 5944500 + }, + { + "epoch": 29.45, + "learning_rate": 3.5278483772040646e-05, + "loss": 2.2477, + "step": 5945000 + }, + { + "epoch": 29.46, + "learning_rate": 3.527724518561456e-05, + "loss": 2.2348, + "step": 5945500 + }, + { + "epoch": 29.46, + "learning_rate": 3.527600659918848e-05, + "loss": 2.2568, + "step": 5946000 + }, + { + "epoch": 29.46, + "learning_rate": 3.52747680127624e-05, + "loss": 2.2553, + "step": 5946500 + }, + { + "epoch": 29.46, + "learning_rate": 3.527352942633631e-05, + "loss": 2.246, + "step": 5947000 + }, + { + "epoch": 29.47, + "learning_rate": 3.5272290839910224e-05, + "loss": 2.258, + "step": 5947500 + }, + { + "epoch": 29.47, + "learning_rate": 3.527105225348414e-05, + "loss": 2.2518, + "step": 5948000 + }, + { + "epoch": 29.47, + "learning_rate": 3.5269816144230916e-05, + "loss": 2.2239, + "step": 5948500 + }, + { + "epoch": 29.47, + "learning_rate": 3.526857755780483e-05, + "loss": 2.2415, + "step": 5949000 + }, + { + "epoch": 29.48, + "learning_rate": 3.5267338971378743e-05, + "loss": 2.256, + "step": 5949500 + }, + { + "epoch": 29.48, + "learning_rate": 3.526610038495266e-05, + "loss": 2.2548, + "step": 5950000 + }, + { + "epoch": 29.48, + "learning_rate": 3.5264864275699436e-05, + "loss": 2.2458, + "step": 5950500 + }, + { + "epoch": 29.48, + "learning_rate": 3.526362568927335e-05, + "loss": 2.2636, + "step": 5951000 + }, + { + "epoch": 29.49, + "learning_rate": 3.526238710284726e-05, + "loss": 2.221, + "step": 5951500 + }, + { + "epoch": 29.49, + "learning_rate": 3.526114851642118e-05, + "loss": 2.2384, + "step": 5952000 + }, + { + "epoch": 29.49, + "learning_rate": 3.525991240716795e-05, + "loss": 2.2514, + "step": 5952500 + }, + { + "epoch": 29.49, + "learning_rate": 3.5258673820741866e-05, + "loss": 2.2481, + "step": 5953000 + }, + { + "epoch": 29.5, + "learning_rate": 3.525743523431578e-05, + "loss": 2.205, + "step": 5953500 + }, + { + "epoch": 29.5, + "learning_rate": 3.52561966478897e-05, + "loss": 2.2412, + "step": 5954000 + }, + { + "epoch": 29.5, + "learning_rate": 3.5254958061463617e-05, + "loss": 2.2481, + "step": 5954500 + }, + { + "epoch": 29.5, + "learning_rate": 3.5253719475037533e-05, + "loss": 2.239, + "step": 5955000 + }, + { + "epoch": 29.51, + "learning_rate": 3.52524833657843e-05, + "loss": 2.2468, + "step": 5955500 + }, + { + "epoch": 29.51, + "learning_rate": 3.525124477935822e-05, + "loss": 2.2548, + "step": 5956000 + }, + { + "epoch": 29.51, + "learning_rate": 3.5250006192932136e-05, + "loss": 2.2493, + "step": 5956500 + }, + { + "epoch": 29.51, + "learning_rate": 3.524876760650605e-05, + "loss": 2.2602, + "step": 5957000 + }, + { + "epoch": 29.52, + "learning_rate": 3.5247531497252815e-05, + "loss": 2.255, + "step": 5957500 + }, + { + "epoch": 29.52, + "learning_rate": 3.524629291082673e-05, + "loss": 2.2757, + "step": 5958000 + }, + { + "epoch": 29.52, + "learning_rate": 3.524505432440065e-05, + "loss": 2.2548, + "step": 5958500 + }, + { + "epoch": 29.52, + "learning_rate": 3.5243815737974566e-05, + "loss": 2.2651, + "step": 5959000 + }, + { + "epoch": 29.53, + "learning_rate": 3.524257715154848e-05, + "loss": 2.2414, + "step": 5959500 + }, + { + "epoch": 29.53, + "learning_rate": 3.52413385651224e-05, + "loss": 2.2512, + "step": 5960000 + }, + { + "epoch": 29.53, + "learning_rate": 3.524009997869632e-05, + "loss": 2.2631, + "step": 5960500 + }, + { + "epoch": 29.53, + "learning_rate": 3.5238861392270234e-05, + "loss": 2.254, + "step": 5961000 + }, + { + "epoch": 29.54, + "learning_rate": 3.523762280584415e-05, + "loss": 2.2334, + "step": 5961500 + }, + { + "epoch": 29.54, + "learning_rate": 3.523638421941806e-05, + "loss": 2.2553, + "step": 5962000 + }, + { + "epoch": 29.54, + "learning_rate": 3.5235148110164836e-05, + "loss": 2.2297, + "step": 5962500 + }, + { + "epoch": 29.54, + "learning_rate": 3.523390952373875e-05, + "loss": 2.2426, + "step": 5963000 + }, + { + "epoch": 29.55, + "learning_rate": 3.5232673414485515e-05, + "loss": 2.2523, + "step": 5963500 + }, + { + "epoch": 29.55, + "learning_rate": 3.523143482805943e-05, + "loss": 2.2493, + "step": 5964000 + }, + { + "epoch": 29.55, + "learning_rate": 3.523019624163335e-05, + "loss": 2.2559, + "step": 5964500 + }, + { + "epoch": 29.55, + "learning_rate": 3.5228957655207266e-05, + "loss": 2.2758, + "step": 5965000 + }, + { + "epoch": 29.56, + "learning_rate": 3.522771906878118e-05, + "loss": 2.2565, + "step": 5965500 + }, + { + "epoch": 29.56, + "learning_rate": 3.52264804823551e-05, + "loss": 2.2483, + "step": 5966000 + }, + { + "epoch": 29.56, + "learning_rate": 3.522524189592902e-05, + "loss": 2.2588, + "step": 5966500 + }, + { + "epoch": 29.56, + "learning_rate": 3.5224003309502934e-05, + "loss": 2.2445, + "step": 5967000 + }, + { + "epoch": 29.57, + "learning_rate": 3.52227672002497e-05, + "loss": 2.274, + "step": 5967500 + }, + { + "epoch": 29.57, + "learning_rate": 3.522152861382362e-05, + "loss": 2.2289, + "step": 5968000 + }, + { + "epoch": 29.57, + "learning_rate": 3.5220290027397536e-05, + "loss": 2.2546, + "step": 5968500 + }, + { + "epoch": 29.57, + "learning_rate": 3.52190539181443e-05, + "loss": 2.2503, + "step": 5969000 + }, + { + "epoch": 29.57, + "learning_rate": 3.5217815331718215e-05, + "loss": 2.2506, + "step": 5969500 + }, + { + "epoch": 29.58, + "learning_rate": 3.521657674529213e-05, + "loss": 2.251, + "step": 5970000 + }, + { + "epoch": 29.58, + "learning_rate": 3.52153406360389e-05, + "loss": 2.2336, + "step": 5970500 + }, + { + "epoch": 29.58, + "learning_rate": 3.521410204961282e-05, + "loss": 2.2587, + "step": 5971000 + }, + { + "epoch": 29.58, + "learning_rate": 3.5212863463186735e-05, + "loss": 2.2595, + "step": 5971500 + }, + { + "epoch": 29.59, + "learning_rate": 3.5211627353933504e-05, + "loss": 2.2517, + "step": 5972000 + }, + { + "epoch": 29.59, + "learning_rate": 3.521038876750742e-05, + "loss": 2.258, + "step": 5972500 + }, + { + "epoch": 29.59, + "learning_rate": 3.520915018108134e-05, + "loss": 2.251, + "step": 5973000 + }, + { + "epoch": 29.59, + "learning_rate": 3.5207911594655255e-05, + "loss": 2.2584, + "step": 5973500 + }, + { + "epoch": 29.6, + "learning_rate": 3.520667300822917e-05, + "loss": 2.2444, + "step": 5974000 + }, + { + "epoch": 29.6, + "learning_rate": 3.520543442180309e-05, + "loss": 2.2307, + "step": 5974500 + }, + { + "epoch": 29.6, + "learning_rate": 3.5204195835377e-05, + "loss": 2.2548, + "step": 5975000 + }, + { + "epoch": 29.6, + "learning_rate": 3.5202957248950915e-05, + "loss": 2.2333, + "step": 5975500 + }, + { + "epoch": 29.61, + "learning_rate": 3.520171866252483e-05, + "loss": 2.2488, + "step": 5976000 + }, + { + "epoch": 29.61, + "learning_rate": 3.520048007609875e-05, + "loss": 2.2246, + "step": 5976500 + }, + { + "epoch": 29.61, + "learning_rate": 3.5199241489672666e-05, + "loss": 2.2331, + "step": 5977000 + }, + { + "epoch": 29.61, + "learning_rate": 3.519800290324658e-05, + "loss": 2.2524, + "step": 5977500 + }, + { + "epoch": 29.62, + "learning_rate": 3.51967643168205e-05, + "loss": 2.2479, + "step": 5978000 + }, + { + "epoch": 29.62, + "learning_rate": 3.519552573039442e-05, + "loss": 2.2346, + "step": 5978500 + }, + { + "epoch": 29.62, + "learning_rate": 3.5194287143968334e-05, + "loss": 2.2478, + "step": 5979000 + }, + { + "epoch": 29.62, + "learning_rate": 3.519304855754225e-05, + "loss": 2.2465, + "step": 5979500 + }, + { + "epoch": 29.63, + "learning_rate": 3.519180997111617e-05, + "loss": 2.2337, + "step": 5980000 + }, + { + "epoch": 29.63, + "learning_rate": 3.5190571384690085e-05, + "loss": 2.2392, + "step": 5980500 + }, + { + "epoch": 29.63, + "learning_rate": 3.5189335275436854e-05, + "loss": 2.2563, + "step": 5981000 + }, + { + "epoch": 29.63, + "learning_rate": 3.518809668901077e-05, + "loss": 2.2376, + "step": 5981500 + }, + { + "epoch": 29.64, + "learning_rate": 3.5186863056930384e-05, + "loss": 2.2474, + "step": 5982000 + }, + { + "epoch": 29.64, + "learning_rate": 3.51856244705043e-05, + "loss": 2.2415, + "step": 5982500 + }, + { + "epoch": 29.64, + "learning_rate": 3.518438588407822e-05, + "loss": 2.2424, + "step": 5983000 + }, + { + "epoch": 29.64, + "learning_rate": 3.5183147297652135e-05, + "loss": 2.2372, + "step": 5983500 + }, + { + "epoch": 29.65, + "learning_rate": 3.518190871122605e-05, + "loss": 2.2606, + "step": 5984000 + }, + { + "epoch": 29.65, + "learning_rate": 3.518067260197282e-05, + "loss": 2.2476, + "step": 5984500 + }, + { + "epoch": 29.65, + "learning_rate": 3.517943401554674e-05, + "loss": 2.2595, + "step": 5985000 + }, + { + "epoch": 29.65, + "learning_rate": 3.5178195429120655e-05, + "loss": 2.2325, + "step": 5985500 + }, + { + "epoch": 29.66, + "learning_rate": 3.517695684269457e-05, + "loss": 2.255, + "step": 5986000 + }, + { + "epoch": 29.66, + "learning_rate": 3.517571825626849e-05, + "loss": 2.2224, + "step": 5986500 + }, + { + "epoch": 29.66, + "learning_rate": 3.5174479669842406e-05, + "loss": 2.2385, + "step": 5987000 + }, + { + "epoch": 29.66, + "learning_rate": 3.517324108341632e-05, + "loss": 2.2482, + "step": 5987500 + }, + { + "epoch": 29.67, + "learning_rate": 3.517200249699024e-05, + "loss": 2.2485, + "step": 5988000 + }, + { + "epoch": 29.67, + "learning_rate": 3.517076391056415e-05, + "loss": 2.2526, + "step": 5988500 + }, + { + "epoch": 29.67, + "learning_rate": 3.5169525324138066e-05, + "loss": 2.2193, + "step": 5989000 + }, + { + "epoch": 29.67, + "learning_rate": 3.5168286737711983e-05, + "loss": 2.2374, + "step": 5989500 + }, + { + "epoch": 29.68, + "learning_rate": 3.51670481512859e-05, + "loss": 2.2634, + "step": 5990000 + }, + { + "epoch": 29.68, + "learning_rate": 3.516580956485982e-05, + "loss": 2.24, + "step": 5990500 + }, + { + "epoch": 29.68, + "learning_rate": 3.5164570978433734e-05, + "loss": 2.2492, + "step": 5991000 + }, + { + "epoch": 29.68, + "learning_rate": 3.516333239200765e-05, + "loss": 2.265, + "step": 5991500 + }, + { + "epoch": 29.69, + "learning_rate": 3.516209380558157e-05, + "loss": 2.2368, + "step": 5992000 + }, + { + "epoch": 29.69, + "learning_rate": 3.5160855219155485e-05, + "loss": 2.2397, + "step": 5992500 + }, + { + "epoch": 29.69, + "learning_rate": 3.51596166327294e-05, + "loss": 2.2589, + "step": 5993000 + }, + { + "epoch": 29.69, + "learning_rate": 3.515837804630331e-05, + "loss": 2.246, + "step": 5993500 + }, + { + "epoch": 29.7, + "learning_rate": 3.515714193705009e-05, + "loss": 2.2362, + "step": 5994000 + }, + { + "epoch": 29.7, + "learning_rate": 3.5155905827796856e-05, + "loss": 2.2473, + "step": 5994500 + }, + { + "epoch": 29.7, + "learning_rate": 3.515466724137077e-05, + "loss": 2.2488, + "step": 5995000 + }, + { + "epoch": 29.7, + "learning_rate": 3.5153428654944684e-05, + "loss": 2.2277, + "step": 5995500 + }, + { + "epoch": 29.71, + "learning_rate": 3.51521900685186e-05, + "loss": 2.2283, + "step": 5996000 + }, + { + "epoch": 29.71, + "learning_rate": 3.515095148209252e-05, + "loss": 2.245, + "step": 5996500 + }, + { + "epoch": 29.71, + "learning_rate": 3.5149715372839286e-05, + "loss": 2.2349, + "step": 5997000 + }, + { + "epoch": 29.71, + "learning_rate": 3.51484767864132e-05, + "loss": 2.2465, + "step": 5997500 + }, + { + "epoch": 29.72, + "learning_rate": 3.514723819998712e-05, + "loss": 2.2382, + "step": 5998000 + }, + { + "epoch": 29.72, + "learning_rate": 3.514599961356104e-05, + "loss": 2.2591, + "step": 5998500 + }, + { + "epoch": 29.72, + "learning_rate": 3.5144761027134954e-05, + "loss": 2.2273, + "step": 5999000 + }, + { + "epoch": 29.72, + "learning_rate": 3.514352244070887e-05, + "loss": 2.2438, + "step": 5999500 + }, + { + "epoch": 29.73, + "learning_rate": 3.514228385428279e-05, + "loss": 2.2216, + "step": 6000000 + }, + { + "epoch": 29.73, + "learning_rate": 3.5141045267856705e-05, + "loss": 2.2397, + "step": 6000500 + }, + { + "epoch": 29.73, + "learning_rate": 3.513980668143062e-05, + "loss": 2.2442, + "step": 6001000 + }, + { + "epoch": 29.73, + "learning_rate": 3.513856809500454e-05, + "loss": 2.2634, + "step": 6001500 + }, + { + "epoch": 29.74, + "learning_rate": 3.5137329508578455e-05, + "loss": 2.2353, + "step": 6002000 + }, + { + "epoch": 29.74, + "learning_rate": 3.5136090922152366e-05, + "loss": 2.2581, + "step": 6002500 + }, + { + "epoch": 29.74, + "learning_rate": 3.513485233572628e-05, + "loss": 2.2491, + "step": 6003000 + }, + { + "epoch": 29.74, + "learning_rate": 3.51336137493002e-05, + "loss": 2.2587, + "step": 6003500 + }, + { + "epoch": 29.75, + "learning_rate": 3.513238011721982e-05, + "loss": 2.2485, + "step": 6004000 + }, + { + "epoch": 29.75, + "learning_rate": 3.513114153079374e-05, + "loss": 2.2202, + "step": 6004500 + }, + { + "epoch": 29.75, + "learning_rate": 3.5129902944367654e-05, + "loss": 2.2374, + "step": 6005000 + }, + { + "epoch": 29.75, + "learning_rate": 3.512866435794157e-05, + "loss": 2.2464, + "step": 6005500 + }, + { + "epoch": 29.76, + "learning_rate": 3.512742824868834e-05, + "loss": 2.2466, + "step": 6006000 + }, + { + "epoch": 29.76, + "learning_rate": 3.512618966226226e-05, + "loss": 2.2135, + "step": 6006500 + }, + { + "epoch": 29.76, + "learning_rate": 3.5124951075836174e-05, + "loss": 2.2327, + "step": 6007000 + }, + { + "epoch": 29.76, + "learning_rate": 3.512371248941009e-05, + "loss": 2.2538, + "step": 6007500 + }, + { + "epoch": 29.77, + "learning_rate": 3.512247390298401e-05, + "loss": 2.241, + "step": 6008000 + }, + { + "epoch": 29.77, + "learning_rate": 3.512123779373077e-05, + "loss": 2.234, + "step": 6008500 + }, + { + "epoch": 29.77, + "learning_rate": 3.5119999207304686e-05, + "loss": 2.2631, + "step": 6009000 + }, + { + "epoch": 29.77, + "learning_rate": 3.51187606208786e-05, + "loss": 2.2543, + "step": 6009500 + }, + { + "epoch": 29.78, + "learning_rate": 3.511752203445252e-05, + "loss": 2.2295, + "step": 6010000 + }, + { + "epoch": 29.78, + "learning_rate": 3.511628344802644e-05, + "loss": 2.2397, + "step": 6010500 + }, + { + "epoch": 29.78, + "learning_rate": 3.5115044861600354e-05, + "loss": 2.2528, + "step": 6011000 + }, + { + "epoch": 29.78, + "learning_rate": 3.511380627517427e-05, + "loss": 2.2429, + "step": 6011500 + }, + { + "epoch": 29.79, + "learning_rate": 3.511256768874819e-05, + "loss": 2.2785, + "step": 6012000 + }, + { + "epoch": 29.79, + "learning_rate": 3.5111329102322105e-05, + "loss": 2.2572, + "step": 6012500 + }, + { + "epoch": 29.79, + "learning_rate": 3.511009051589602e-05, + "loss": 2.2329, + "step": 6013000 + }, + { + "epoch": 29.79, + "learning_rate": 3.510885192946994e-05, + "loss": 2.2503, + "step": 6013500 + }, + { + "epoch": 29.8, + "learning_rate": 3.5107613343043856e-05, + "loss": 2.2507, + "step": 6014000 + }, + { + "epoch": 29.8, + "learning_rate": 3.510637475661777e-05, + "loss": 2.2361, + "step": 6014500 + }, + { + "epoch": 29.8, + "learning_rate": 3.5105141124537387e-05, + "loss": 2.2559, + "step": 6015000 + }, + { + "epoch": 29.8, + "learning_rate": 3.5103902538111303e-05, + "loss": 2.2426, + "step": 6015500 + }, + { + "epoch": 29.81, + "learning_rate": 3.510266395168522e-05, + "loss": 2.2477, + "step": 6016000 + }, + { + "epoch": 29.81, + "learning_rate": 3.510142536525914e-05, + "loss": 2.235, + "step": 6016500 + }, + { + "epoch": 29.81, + "learning_rate": 3.5100186778833054e-05, + "loss": 2.2402, + "step": 6017000 + }, + { + "epoch": 29.81, + "learning_rate": 3.509894819240697e-05, + "loss": 2.2432, + "step": 6017500 + }, + { + "epoch": 29.82, + "learning_rate": 3.509770960598089e-05, + "loss": 2.2734, + "step": 6018000 + }, + { + "epoch": 29.82, + "learning_rate": 3.5096471019554805e-05, + "loss": 2.2639, + "step": 6018500 + }, + { + "epoch": 29.82, + "learning_rate": 3.5095234910301574e-05, + "loss": 2.2591, + "step": 6019000 + }, + { + "epoch": 29.82, + "learning_rate": 3.509399632387549e-05, + "loss": 2.2149, + "step": 6019500 + }, + { + "epoch": 29.83, + "learning_rate": 3.509276021462225e-05, + "loss": 2.2437, + "step": 6020000 + }, + { + "epoch": 29.83, + "learning_rate": 3.509152162819617e-05, + "loss": 2.2636, + "step": 6020500 + }, + { + "epoch": 29.83, + "learning_rate": 3.509028304177009e-05, + "loss": 2.2386, + "step": 6021000 + }, + { + "epoch": 29.83, + "learning_rate": 3.5089044455344004e-05, + "loss": 2.2542, + "step": 6021500 + }, + { + "epoch": 29.84, + "learning_rate": 3.508780586891792e-05, + "loss": 2.2627, + "step": 6022000 + }, + { + "epoch": 29.84, + "learning_rate": 3.508656975966469e-05, + "loss": 2.2434, + "step": 6022500 + }, + { + "epoch": 29.84, + "learning_rate": 3.5085331173238606e-05, + "loss": 2.2404, + "step": 6023000 + }, + { + "epoch": 29.84, + "learning_rate": 3.508409258681252e-05, + "loss": 2.2144, + "step": 6023500 + }, + { + "epoch": 29.84, + "learning_rate": 3.508285400038644e-05, + "loss": 2.2302, + "step": 6024000 + }, + { + "epoch": 29.85, + "learning_rate": 3.508161541396036e-05, + "loss": 2.2015, + "step": 6024500 + }, + { + "epoch": 29.85, + "learning_rate": 3.5080379304707126e-05, + "loss": 2.2531, + "step": 6025000 + }, + { + "epoch": 29.85, + "learning_rate": 3.507914071828104e-05, + "loss": 2.2415, + "step": 6025500 + }, + { + "epoch": 29.85, + "learning_rate": 3.507790213185495e-05, + "loss": 2.2345, + "step": 6026000 + }, + { + "epoch": 29.86, + "learning_rate": 3.507666354542887e-05, + "loss": 2.246, + "step": 6026500 + }, + { + "epoch": 29.86, + "learning_rate": 3.507542495900279e-05, + "loss": 2.2626, + "step": 6027000 + }, + { + "epoch": 29.86, + "learning_rate": 3.5074186372576704e-05, + "loss": 2.2182, + "step": 6027500 + }, + { + "epoch": 29.86, + "learning_rate": 3.507295026332348e-05, + "loss": 2.2315, + "step": 6028000 + }, + { + "epoch": 29.87, + "learning_rate": 3.507171167689739e-05, + "loss": 2.26, + "step": 6028500 + }, + { + "epoch": 29.87, + "learning_rate": 3.5070473090471306e-05, + "loss": 2.2435, + "step": 6029000 + }, + { + "epoch": 29.87, + "learning_rate": 3.506923450404522e-05, + "loss": 2.2568, + "step": 6029500 + }, + { + "epoch": 29.87, + "learning_rate": 3.506799591761914e-05, + "loss": 2.2584, + "step": 6030000 + }, + { + "epoch": 29.88, + "learning_rate": 3.506675733119306e-05, + "loss": 2.2313, + "step": 6030500 + }, + { + "epoch": 29.88, + "learning_rate": 3.5065518744766974e-05, + "loss": 2.2419, + "step": 6031000 + }, + { + "epoch": 29.88, + "learning_rate": 3.506428015834089e-05, + "loss": 2.2516, + "step": 6031500 + }, + { + "epoch": 29.88, + "learning_rate": 3.506304157191481e-05, + "loss": 2.2528, + "step": 6032000 + }, + { + "epoch": 29.89, + "learning_rate": 3.506180793983443e-05, + "loss": 2.2518, + "step": 6032500 + }, + { + "epoch": 29.89, + "learning_rate": 3.5060569353408346e-05, + "loss": 2.2389, + "step": 6033000 + }, + { + "epoch": 29.89, + "learning_rate": 3.505933076698226e-05, + "loss": 2.2587, + "step": 6033500 + }, + { + "epoch": 29.89, + "learning_rate": 3.505809218055618e-05, + "loss": 2.2524, + "step": 6034000 + }, + { + "epoch": 29.9, + "learning_rate": 3.505685607130295e-05, + "loss": 2.2516, + "step": 6034500 + }, + { + "epoch": 29.9, + "learning_rate": 3.5055617484876865e-05, + "loss": 2.244, + "step": 6035000 + }, + { + "epoch": 29.9, + "learning_rate": 3.505437889845078e-05, + "loss": 2.2784, + "step": 6035500 + }, + { + "epoch": 29.9, + "learning_rate": 3.50531403120247e-05, + "loss": 2.2512, + "step": 6036000 + }, + { + "epoch": 29.91, + "learning_rate": 3.5051901725598616e-05, + "loss": 2.2421, + "step": 6036500 + }, + { + "epoch": 29.91, + "learning_rate": 3.5050663139172526e-05, + "loss": 2.2322, + "step": 6037000 + }, + { + "epoch": 29.91, + "learning_rate": 3.5049427029919295e-05, + "loss": 2.2487, + "step": 6037500 + }, + { + "epoch": 29.91, + "learning_rate": 3.5048190920666064e-05, + "loss": 2.2687, + "step": 6038000 + }, + { + "epoch": 29.92, + "learning_rate": 3.504695233423998e-05, + "loss": 2.2331, + "step": 6038500 + }, + { + "epoch": 29.92, + "learning_rate": 3.50457137478139e-05, + "loss": 2.2588, + "step": 6039000 + }, + { + "epoch": 29.92, + "learning_rate": 3.5044475161387815e-05, + "loss": 2.2409, + "step": 6039500 + }, + { + "epoch": 29.92, + "learning_rate": 3.504323657496173e-05, + "loss": 2.2369, + "step": 6040000 + }, + { + "epoch": 29.93, + "learning_rate": 3.504199798853565e-05, + "loss": 2.2599, + "step": 6040500 + }, + { + "epoch": 29.93, + "learning_rate": 3.504076683362812e-05, + "loss": 2.2484, + "step": 6041000 + }, + { + "epoch": 29.93, + "learning_rate": 3.503952824720204e-05, + "loss": 2.2689, + "step": 6041500 + }, + { + "epoch": 29.93, + "learning_rate": 3.5038289660775955e-05, + "loss": 2.2367, + "step": 6042000 + }, + { + "epoch": 29.94, + "learning_rate": 3.503705107434987e-05, + "loss": 2.2461, + "step": 6042500 + }, + { + "epoch": 29.94, + "learning_rate": 3.503581248792379e-05, + "loss": 2.2407, + "step": 6043000 + }, + { + "epoch": 29.94, + "learning_rate": 3.5034573901497706e-05, + "loss": 2.2564, + "step": 6043500 + }, + { + "epoch": 29.94, + "learning_rate": 3.503333531507162e-05, + "loss": 2.2676, + "step": 6044000 + }, + { + "epoch": 29.95, + "learning_rate": 3.503209672864553e-05, + "loss": 2.2478, + "step": 6044500 + }, + { + "epoch": 29.95, + "learning_rate": 3.503085814221945e-05, + "loss": 2.2507, + "step": 6045000 + }, + { + "epoch": 29.95, + "learning_rate": 3.5029619555793367e-05, + "loss": 2.2605, + "step": 6045500 + }, + { + "epoch": 29.95, + "learning_rate": 3.5028383446540135e-05, + "loss": 2.2413, + "step": 6046000 + }, + { + "epoch": 29.96, + "learning_rate": 3.502714486011405e-05, + "loss": 2.2463, + "step": 6046500 + }, + { + "epoch": 29.96, + "learning_rate": 3.502590627368797e-05, + "loss": 2.2444, + "step": 6047000 + }, + { + "epoch": 29.96, + "learning_rate": 3.5024667687261886e-05, + "loss": 2.2214, + "step": 6047500 + }, + { + "epoch": 29.96, + "learning_rate": 3.5023429100835796e-05, + "loss": 2.2366, + "step": 6048000 + }, + { + "epoch": 29.97, + "learning_rate": 3.502219051440971e-05, + "loss": 2.2388, + "step": 6048500 + }, + { + "epoch": 29.97, + "learning_rate": 3.502095192798363e-05, + "loss": 2.265, + "step": 6049000 + }, + { + "epoch": 29.97, + "learning_rate": 3.501971334155755e-05, + "loss": 2.2427, + "step": 6049500 + }, + { + "epoch": 29.97, + "learning_rate": 3.5018474755131464e-05, + "loss": 2.243, + "step": 6050000 + }, + { + "epoch": 29.98, + "learning_rate": 3.501723616870538e-05, + "loss": 2.2914, + "step": 6050500 + }, + { + "epoch": 29.98, + "learning_rate": 3.50159975822793e-05, + "loss": 2.2452, + "step": 6051000 + }, + { + "epoch": 29.98, + "learning_rate": 3.5014758995853215e-05, + "loss": 2.2627, + "step": 6051500 + }, + { + "epoch": 29.98, + "learning_rate": 3.501352040942713e-05, + "loss": 2.2594, + "step": 6052000 + }, + { + "epoch": 29.99, + "learning_rate": 3.50122843001739e-05, + "loss": 2.2361, + "step": 6052500 + }, + { + "epoch": 29.99, + "learning_rate": 3.501104571374782e-05, + "loss": 2.2669, + "step": 6053000 + }, + { + "epoch": 29.99, + "learning_rate": 3.500980712732173e-05, + "loss": 2.264, + "step": 6053500 + }, + { + "epoch": 29.99, + "learning_rate": 3.5008568540895645e-05, + "loss": 2.244, + "step": 6054000 + }, + { + "epoch": 30.0, + "learning_rate": 3.500732995446956e-05, + "loss": 2.2693, + "step": 6054500 + }, + { + "epoch": 30.0, + "learning_rate": 3.500609384521633e-05, + "loss": 2.2239, + "step": 6055000 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.6577156623596847, + "eval_accuracy_mlm": 0.6136358534754317, + "eval_accuracy_nsp": 0.8656725198953558, + "eval_loss": 2.330178737640381, + "eval_runtime": 146.0024, + "eval_samples_per_second": 1746.266, + "eval_steps_per_second": 72.766, + "step": 6055290 + }, + { + "epoch": 30.0, + "learning_rate": 3.500485525879025e-05, + "loss": 2.2413, + "step": 6055500 + }, + { + "epoch": 30.0, + "learning_rate": 3.5003616672364164e-05, + "loss": 2.2212, + "step": 6056000 + }, + { + "epoch": 30.01, + "learning_rate": 3.500237808593808e-05, + "loss": 2.2128, + "step": 6056500 + }, + { + "epoch": 30.01, + "learning_rate": 3.500114197668485e-05, + "loss": 2.225, + "step": 6057000 + }, + { + "epoch": 30.01, + "learning_rate": 3.499990339025877e-05, + "loss": 2.2007, + "step": 6057500 + }, + { + "epoch": 30.01, + "learning_rate": 3.4998664803832684e-05, + "loss": 2.2145, + "step": 6058000 + }, + { + "epoch": 30.02, + "learning_rate": 3.499742869457945e-05, + "loss": 2.2361, + "step": 6058500 + }, + { + "epoch": 30.02, + "learning_rate": 3.499619010815337e-05, + "loss": 2.2315, + "step": 6059000 + }, + { + "epoch": 30.02, + "learning_rate": 3.4994951521727286e-05, + "loss": 2.2215, + "step": 6059500 + }, + { + "epoch": 30.02, + "learning_rate": 3.49937129353012e-05, + "loss": 2.2348, + "step": 6060000 + }, + { + "epoch": 30.03, + "learning_rate": 3.4992474348875113e-05, + "loss": 2.198, + "step": 6060500 + }, + { + "epoch": 30.03, + "learning_rate": 3.499123576244903e-05, + "loss": 2.2208, + "step": 6061000 + }, + { + "epoch": 30.03, + "learning_rate": 3.498999717602295e-05, + "loss": 2.2319, + "step": 6061500 + }, + { + "epoch": 30.03, + "learning_rate": 3.4988758589596864e-05, + "loss": 2.1845, + "step": 6062000 + }, + { + "epoch": 30.04, + "learning_rate": 3.498752000317078e-05, + "loss": 2.2193, + "step": 6062500 + }, + { + "epoch": 30.04, + "learning_rate": 3.49862814167447e-05, + "loss": 2.215, + "step": 6063000 + }, + { + "epoch": 30.04, + "learning_rate": 3.4985042830318615e-05, + "loss": 2.2272, + "step": 6063500 + }, + { + "epoch": 30.04, + "learning_rate": 3.4983806721065384e-05, + "loss": 2.2079, + "step": 6064000 + }, + { + "epoch": 30.05, + "learning_rate": 3.49825681346393e-05, + "loss": 2.2012, + "step": 6064500 + }, + { + "epoch": 30.05, + "learning_rate": 3.498132954821322e-05, + "loss": 2.2302, + "step": 6065000 + }, + { + "epoch": 30.05, + "learning_rate": 3.4980090961787135e-05, + "loss": 2.2268, + "step": 6065500 + }, + { + "epoch": 30.05, + "learning_rate": 3.497885237536105e-05, + "loss": 2.2134, + "step": 6066000 + }, + { + "epoch": 30.06, + "learning_rate": 3.497761378893497e-05, + "loss": 2.2258, + "step": 6066500 + }, + { + "epoch": 30.06, + "learning_rate": 3.497637520250888e-05, + "loss": 2.244, + "step": 6067000 + }, + { + "epoch": 30.06, + "learning_rate": 3.4975136616082796e-05, + "loss": 2.2202, + "step": 6067500 + }, + { + "epoch": 30.06, + "learning_rate": 3.4973900506829564e-05, + "loss": 2.2133, + "step": 6068000 + }, + { + "epoch": 30.07, + "learning_rate": 3.497266192040348e-05, + "loss": 2.2548, + "step": 6068500 + }, + { + "epoch": 30.07, + "learning_rate": 3.49714233339774e-05, + "loss": 2.2356, + "step": 6069000 + }, + { + "epoch": 30.07, + "learning_rate": 3.497018970189702e-05, + "loss": 2.2452, + "step": 6069500 + }, + { + "epoch": 30.07, + "learning_rate": 3.4968951115470936e-05, + "loss": 2.2254, + "step": 6070000 + }, + { + "epoch": 30.08, + "learning_rate": 3.4967715006217705e-05, + "loss": 2.2346, + "step": 6070500 + }, + { + "epoch": 30.08, + "learning_rate": 3.496647641979162e-05, + "loss": 2.2092, + "step": 6071000 + }, + { + "epoch": 30.08, + "learning_rate": 3.496523783336554e-05, + "loss": 2.2229, + "step": 6071500 + }, + { + "epoch": 30.08, + "learning_rate": 3.4963999246939455e-05, + "loss": 2.2513, + "step": 6072000 + }, + { + "epoch": 30.09, + "learning_rate": 3.496276066051337e-05, + "loss": 2.2273, + "step": 6072500 + }, + { + "epoch": 30.09, + "learning_rate": 3.496152207408729e-05, + "loss": 2.2222, + "step": 6073000 + }, + { + "epoch": 30.09, + "learning_rate": 3.4960283487661206e-05, + "loss": 2.2396, + "step": 6073500 + }, + { + "epoch": 30.09, + "learning_rate": 3.495904490123512e-05, + "loss": 2.2165, + "step": 6074000 + }, + { + "epoch": 30.1, + "learning_rate": 3.495780879198189e-05, + "loss": 2.1869, + "step": 6074500 + }, + { + "epoch": 30.1, + "learning_rate": 3.49565702055558e-05, + "loss": 2.2352, + "step": 6075000 + }, + { + "epoch": 30.1, + "learning_rate": 3.495533161912972e-05, + "loss": 2.2344, + "step": 6075500 + }, + { + "epoch": 30.1, + "learning_rate": 3.4954093032703636e-05, + "loss": 2.2467, + "step": 6076000 + }, + { + "epoch": 30.11, + "learning_rate": 3.495285444627755e-05, + "loss": 2.2179, + "step": 6076500 + }, + { + "epoch": 30.11, + "learning_rate": 3.495161585985147e-05, + "loss": 2.2305, + "step": 6077000 + }, + { + "epoch": 30.11, + "learning_rate": 3.495037727342539e-05, + "loss": 2.2301, + "step": 6077500 + }, + { + "epoch": 30.11, + "learning_rate": 3.494914364134501e-05, + "loss": 2.2311, + "step": 6078000 + }, + { + "epoch": 30.11, + "learning_rate": 3.4947905054918924e-05, + "loss": 2.2474, + "step": 6078500 + }, + { + "epoch": 30.12, + "learning_rate": 3.494666646849284e-05, + "loss": 2.2372, + "step": 6079000 + }, + { + "epoch": 30.12, + "learning_rate": 3.494542788206676e-05, + "loss": 2.2275, + "step": 6079500 + }, + { + "epoch": 30.12, + "learning_rate": 3.4944189295640675e-05, + "loss": 2.2308, + "step": 6080000 + }, + { + "epoch": 30.12, + "learning_rate": 3.494295070921459e-05, + "loss": 2.2416, + "step": 6080500 + }, + { + "epoch": 30.13, + "learning_rate": 3.494171212278851e-05, + "loss": 2.2317, + "step": 6081000 + }, + { + "epoch": 30.13, + "learning_rate": 3.494047353636242e-05, + "loss": 2.1961, + "step": 6081500 + }, + { + "epoch": 30.13, + "learning_rate": 3.4939234949936336e-05, + "loss": 2.2102, + "step": 6082000 + }, + { + "epoch": 30.13, + "learning_rate": 3.493799636351025e-05, + "loss": 2.2449, + "step": 6082500 + }, + { + "epoch": 30.14, + "learning_rate": 3.493676025425702e-05, + "loss": 2.2392, + "step": 6083000 + }, + { + "epoch": 30.14, + "learning_rate": 3.493552166783094e-05, + "loss": 2.2121, + "step": 6083500 + }, + { + "epoch": 30.14, + "learning_rate": 3.4934283081404856e-05, + "loss": 2.2322, + "step": 6084000 + }, + { + "epoch": 30.14, + "learning_rate": 3.493304449497877e-05, + "loss": 2.2107, + "step": 6084500 + }, + { + "epoch": 30.15, + "learning_rate": 3.493180590855269e-05, + "loss": 2.2081, + "step": 6085000 + }, + { + "epoch": 30.15, + "learning_rate": 3.4930567322126606e-05, + "loss": 2.2319, + "step": 6085500 + }, + { + "epoch": 30.15, + "learning_rate": 3.4929328735700523e-05, + "loss": 2.2277, + "step": 6086000 + }, + { + "epoch": 30.15, + "learning_rate": 3.492809262644729e-05, + "loss": 2.2258, + "step": 6086500 + }, + { + "epoch": 30.16, + "learning_rate": 3.492685404002121e-05, + "loss": 2.224, + "step": 6087000 + }, + { + "epoch": 30.16, + "learning_rate": 3.4925615453595126e-05, + "loss": 2.2234, + "step": 6087500 + }, + { + "epoch": 30.16, + "learning_rate": 3.492437934434189e-05, + "loss": 2.2201, + "step": 6088000 + }, + { + "epoch": 30.16, + "learning_rate": 3.4923140757915805e-05, + "loss": 2.2361, + "step": 6088500 + }, + { + "epoch": 30.17, + "learning_rate": 3.492190217148972e-05, + "loss": 2.2149, + "step": 6089000 + }, + { + "epoch": 30.17, + "learning_rate": 3.492066358506364e-05, + "loss": 2.2179, + "step": 6089500 + }, + { + "epoch": 30.17, + "learning_rate": 3.4919424998637556e-05, + "loss": 2.2521, + "step": 6090000 + }, + { + "epoch": 30.17, + "learning_rate": 3.491818641221147e-05, + "loss": 2.2198, + "step": 6090500 + }, + { + "epoch": 30.18, + "learning_rate": 3.491694782578539e-05, + "loss": 2.2251, + "step": 6091000 + }, + { + "epoch": 30.18, + "learning_rate": 3.4915709239359307e-05, + "loss": 2.2303, + "step": 6091500 + }, + { + "epoch": 30.18, + "learning_rate": 3.4914470652933224e-05, + "loss": 2.2546, + "step": 6092000 + }, + { + "epoch": 30.18, + "learning_rate": 3.491323206650714e-05, + "loss": 2.2206, + "step": 6092500 + }, + { + "epoch": 30.19, + "learning_rate": 3.491199595725391e-05, + "loss": 2.2407, + "step": 6093000 + }, + { + "epoch": 30.19, + "learning_rate": 3.4910757370827826e-05, + "loss": 2.2124, + "step": 6093500 + }, + { + "epoch": 30.19, + "learning_rate": 3.490951878440174e-05, + "loss": 2.2374, + "step": 6094000 + }, + { + "epoch": 30.19, + "learning_rate": 3.490828019797566e-05, + "loss": 2.2252, + "step": 6094500 + }, + { + "epoch": 30.2, + "learning_rate": 3.490704161154957e-05, + "loss": 2.2356, + "step": 6095000 + }, + { + "epoch": 30.2, + "learning_rate": 3.490580302512349e-05, + "loss": 2.2251, + "step": 6095500 + }, + { + "epoch": 30.2, + "learning_rate": 3.4904564438697404e-05, + "loss": 2.2153, + "step": 6096000 + }, + { + "epoch": 30.2, + "learning_rate": 3.490332585227132e-05, + "loss": 2.2476, + "step": 6096500 + }, + { + "epoch": 30.21, + "learning_rate": 3.490208726584524e-05, + "loss": 2.2666, + "step": 6097000 + }, + { + "epoch": 30.21, + "learning_rate": 3.4900848679419155e-05, + "loss": 2.2188, + "step": 6097500 + }, + { + "epoch": 30.21, + "learning_rate": 3.4899610092993065e-05, + "loss": 2.2538, + "step": 6098000 + }, + { + "epoch": 30.21, + "learning_rate": 3.489837398373984e-05, + "loss": 2.2291, + "step": 6098500 + }, + { + "epoch": 30.22, + "learning_rate": 3.489713539731376e-05, + "loss": 2.2363, + "step": 6099000 + }, + { + "epoch": 30.22, + "learning_rate": 3.4895896810887674e-05, + "loss": 2.2546, + "step": 6099500 + }, + { + "epoch": 30.22, + "learning_rate": 3.489465822446159e-05, + "loss": 2.2155, + "step": 6100000 + }, + { + "epoch": 30.22, + "learning_rate": 3.489341963803551e-05, + "loss": 2.2316, + "step": 6100500 + }, + { + "epoch": 30.23, + "learning_rate": 3.489218105160942e-05, + "loss": 2.2429, + "step": 6101000 + }, + { + "epoch": 30.23, + "learning_rate": 3.4890944942356194e-05, + "loss": 2.2284, + "step": 6101500 + }, + { + "epoch": 30.23, + "learning_rate": 3.4889708833102956e-05, + "loss": 2.2208, + "step": 6102000 + }, + { + "epoch": 30.23, + "learning_rate": 3.488847024667687e-05, + "loss": 2.2401, + "step": 6102500 + }, + { + "epoch": 30.24, + "learning_rate": 3.488723166025079e-05, + "loss": 2.2289, + "step": 6103000 + }, + { + "epoch": 30.24, + "learning_rate": 3.488599307382471e-05, + "loss": 2.2329, + "step": 6103500 + }, + { + "epoch": 30.24, + "learning_rate": 3.4884754487398624e-05, + "loss": 2.2288, + "step": 6104000 + }, + { + "epoch": 30.24, + "learning_rate": 3.488351590097254e-05, + "loss": 2.2316, + "step": 6104500 + }, + { + "epoch": 30.25, + "learning_rate": 3.488227731454646e-05, + "loss": 2.2553, + "step": 6105000 + }, + { + "epoch": 30.25, + "learning_rate": 3.4881038728120375e-05, + "loss": 2.21, + "step": 6105500 + }, + { + "epoch": 30.25, + "learning_rate": 3.487980261886714e-05, + "loss": 2.2134, + "step": 6106000 + }, + { + "epoch": 30.25, + "learning_rate": 3.487856403244106e-05, + "loss": 2.2151, + "step": 6106500 + }, + { + "epoch": 30.26, + "learning_rate": 3.487732544601498e-05, + "loss": 2.229, + "step": 6107000 + }, + { + "epoch": 30.26, + "learning_rate": 3.4876086859588894e-05, + "loss": 2.2177, + "step": 6107500 + }, + { + "epoch": 30.26, + "learning_rate": 3.4874850750335656e-05, + "loss": 2.2384, + "step": 6108000 + }, + { + "epoch": 30.26, + "learning_rate": 3.487361216390957e-05, + "loss": 2.2151, + "step": 6108500 + }, + { + "epoch": 30.27, + "learning_rate": 3.487237357748349e-05, + "loss": 2.2436, + "step": 6109000 + }, + { + "epoch": 30.27, + "learning_rate": 3.487113746823026e-05, + "loss": 2.2404, + "step": 6109500 + }, + { + "epoch": 30.27, + "learning_rate": 3.4869898881804176e-05, + "loss": 2.2555, + "step": 6110000 + }, + { + "epoch": 30.27, + "learning_rate": 3.486866029537809e-05, + "loss": 2.2404, + "step": 6110500 + }, + { + "epoch": 30.28, + "learning_rate": 3.486742170895201e-05, + "loss": 2.2173, + "step": 6111000 + }, + { + "epoch": 30.28, + "learning_rate": 3.486618559969878e-05, + "loss": 2.2282, + "step": 6111500 + }, + { + "epoch": 30.28, + "learning_rate": 3.486494701327269e-05, + "loss": 2.2582, + "step": 6112000 + }, + { + "epoch": 30.28, + "learning_rate": 3.4863708426846606e-05, + "loss": 2.2264, + "step": 6112500 + }, + { + "epoch": 30.29, + "learning_rate": 3.486246984042052e-05, + "loss": 2.2294, + "step": 6113000 + }, + { + "epoch": 30.29, + "learning_rate": 3.486123125399444e-05, + "loss": 2.2493, + "step": 6113500 + }, + { + "epoch": 30.29, + "learning_rate": 3.4859992667568356e-05, + "loss": 2.2327, + "step": 6114000 + }, + { + "epoch": 30.29, + "learning_rate": 3.485875408114227e-05, + "loss": 2.2406, + "step": 6114500 + }, + { + "epoch": 30.3, + "learning_rate": 3.485751549471619e-05, + "loss": 2.2053, + "step": 6115000 + }, + { + "epoch": 30.3, + "learning_rate": 3.485627690829011e-05, + "loss": 2.2343, + "step": 6115500 + }, + { + "epoch": 30.3, + "learning_rate": 3.4855040799036876e-05, + "loss": 2.2061, + "step": 6116000 + }, + { + "epoch": 30.3, + "learning_rate": 3.4853804689783645e-05, + "loss": 2.2168, + "step": 6116500 + }, + { + "epoch": 30.31, + "learning_rate": 3.485256610335756e-05, + "loss": 2.2477, + "step": 6117000 + }, + { + "epoch": 30.31, + "learning_rate": 3.485132751693148e-05, + "loss": 2.233, + "step": 6117500 + }, + { + "epoch": 30.31, + "learning_rate": 3.4850088930505395e-05, + "loss": 2.2193, + "step": 6118000 + }, + { + "epoch": 30.31, + "learning_rate": 3.484885034407931e-05, + "loss": 2.224, + "step": 6118500 + }, + { + "epoch": 30.32, + "learning_rate": 3.484761175765322e-05, + "loss": 2.221, + "step": 6119000 + }, + { + "epoch": 30.32, + "learning_rate": 3.48463756484e-05, + "loss": 2.2206, + "step": 6119500 + }, + { + "epoch": 30.32, + "learning_rate": 3.4845137061973915e-05, + "loss": 2.2106, + "step": 6120000 + }, + { + "epoch": 30.32, + "learning_rate": 3.4843898475547825e-05, + "loss": 2.2688, + "step": 6120500 + }, + { + "epoch": 30.33, + "learning_rate": 3.484265988912174e-05, + "loss": 2.224, + "step": 6121000 + }, + { + "epoch": 30.33, + "learning_rate": 3.484142130269566e-05, + "loss": 2.2378, + "step": 6121500 + }, + { + "epoch": 30.33, + "learning_rate": 3.4840182716269576e-05, + "loss": 2.1927, + "step": 6122000 + }, + { + "epoch": 30.33, + "learning_rate": 3.483894412984349e-05, + "loss": 2.2423, + "step": 6122500 + }, + { + "epoch": 30.34, + "learning_rate": 3.483770554341741e-05, + "loss": 2.2092, + "step": 6123000 + }, + { + "epoch": 30.34, + "learning_rate": 3.483646695699133e-05, + "loss": 2.242, + "step": 6123500 + }, + { + "epoch": 30.34, + "learning_rate": 3.4835228370565244e-05, + "loss": 2.217, + "step": 6124000 + }, + { + "epoch": 30.34, + "learning_rate": 3.483398978413916e-05, + "loss": 2.2285, + "step": 6124500 + }, + { + "epoch": 30.35, + "learning_rate": 3.483275119771308e-05, + "loss": 2.2339, + "step": 6125000 + }, + { + "epoch": 30.35, + "learning_rate": 3.4831512611286995e-05, + "loss": 2.235, + "step": 6125500 + }, + { + "epoch": 30.35, + "learning_rate": 3.483027402486091e-05, + "loss": 2.2277, + "step": 6126000 + }, + { + "epoch": 30.35, + "learning_rate": 3.482903543843483e-05, + "loss": 2.2282, + "step": 6126500 + }, + { + "epoch": 30.36, + "learning_rate": 3.482779932918159e-05, + "loss": 2.2184, + "step": 6127000 + }, + { + "epoch": 30.36, + "learning_rate": 3.482656321992836e-05, + "loss": 2.2337, + "step": 6127500 + }, + { + "epoch": 30.36, + "learning_rate": 3.4825324633502276e-05, + "loss": 2.256, + "step": 6128000 + }, + { + "epoch": 30.36, + "learning_rate": 3.482408852424905e-05, + "loss": 2.2668, + "step": 6128500 + }, + { + "epoch": 30.37, + "learning_rate": 3.482284993782297e-05, + "loss": 2.254, + "step": 6129000 + }, + { + "epoch": 30.37, + "learning_rate": 3.482161382856973e-05, + "loss": 2.2228, + "step": 6129500 + }, + { + "epoch": 30.37, + "learning_rate": 3.482037524214365e-05, + "loss": 2.2304, + "step": 6130000 + }, + { + "epoch": 30.37, + "learning_rate": 3.4819136655717565e-05, + "loss": 2.2172, + "step": 6130500 + }, + { + "epoch": 30.38, + "learning_rate": 3.481789806929148e-05, + "loss": 2.2406, + "step": 6131000 + }, + { + "epoch": 30.38, + "learning_rate": 3.481666196003825e-05, + "loss": 2.2339, + "step": 6131500 + }, + { + "epoch": 30.38, + "learning_rate": 3.481542337361217e-05, + "loss": 2.2278, + "step": 6132000 + }, + { + "epoch": 30.38, + "learning_rate": 3.4814184787186084e-05, + "loss": 2.2436, + "step": 6132500 + }, + { + "epoch": 30.39, + "learning_rate": 3.481294620076e-05, + "loss": 2.2639, + "step": 6133000 + }, + { + "epoch": 30.39, + "learning_rate": 3.481170761433392e-05, + "loss": 2.2498, + "step": 6133500 + }, + { + "epoch": 30.39, + "learning_rate": 3.481047150508068e-05, + "loss": 2.2274, + "step": 6134000 + }, + { + "epoch": 30.39, + "learning_rate": 3.48092329186546e-05, + "loss": 2.251, + "step": 6134500 + }, + { + "epoch": 30.39, + "learning_rate": 3.4807994332228514e-05, + "loss": 2.2089, + "step": 6135000 + }, + { + "epoch": 30.4, + "learning_rate": 3.480675574580243e-05, + "loss": 2.2332, + "step": 6135500 + }, + { + "epoch": 30.4, + "learning_rate": 3.48055196365492e-05, + "loss": 2.2195, + "step": 6136000 + }, + { + "epoch": 30.4, + "learning_rate": 3.4804281050123117e-05, + "loss": 2.2192, + "step": 6136500 + }, + { + "epoch": 30.4, + "learning_rate": 3.4803042463697033e-05, + "loss": 2.2242, + "step": 6137000 + }, + { + "epoch": 30.41, + "learning_rate": 3.480180387727095e-05, + "loss": 2.2118, + "step": 6137500 + }, + { + "epoch": 30.41, + "learning_rate": 3.480056529084487e-05, + "loss": 2.2459, + "step": 6138000 + }, + { + "epoch": 30.41, + "learning_rate": 3.4799326704418784e-05, + "loss": 2.2424, + "step": 6138500 + }, + { + "epoch": 30.41, + "learning_rate": 3.47980881179927e-05, + "loss": 2.2348, + "step": 6139000 + }, + { + "epoch": 30.42, + "learning_rate": 3.479684953156662e-05, + "loss": 2.2165, + "step": 6139500 + }, + { + "epoch": 30.42, + "learning_rate": 3.4795610945140535e-05, + "loss": 2.2224, + "step": 6140000 + }, + { + "epoch": 30.42, + "learning_rate": 3.47943748358873e-05, + "loss": 2.2588, + "step": 6140500 + }, + { + "epoch": 30.42, + "learning_rate": 3.4793138726634066e-05, + "loss": 2.2282, + "step": 6141000 + }, + { + "epoch": 30.43, + "learning_rate": 3.479190014020798e-05, + "loss": 2.2549, + "step": 6141500 + }, + { + "epoch": 30.43, + "learning_rate": 3.47906615537819e-05, + "loss": 2.2456, + "step": 6142000 + }, + { + "epoch": 30.43, + "learning_rate": 3.478942296735582e-05, + "loss": 2.2379, + "step": 6142500 + }, + { + "epoch": 30.43, + "learning_rate": 3.4788184380929734e-05, + "loss": 2.2362, + "step": 6143000 + }, + { + "epoch": 30.44, + "learning_rate": 3.478694579450365e-05, + "loss": 2.2371, + "step": 6143500 + }, + { + "epoch": 30.44, + "learning_rate": 3.478570720807757e-05, + "loss": 2.2568, + "step": 6144000 + }, + { + "epoch": 30.44, + "learning_rate": 3.4784468621651484e-05, + "loss": 2.2387, + "step": 6144500 + }, + { + "epoch": 30.44, + "learning_rate": 3.47832300352254e-05, + "loss": 2.2459, + "step": 6145000 + }, + { + "epoch": 30.45, + "learning_rate": 3.478199144879932e-05, + "loss": 2.2056, + "step": 6145500 + }, + { + "epoch": 30.45, + "learning_rate": 3.4780752862373235e-05, + "loss": 2.2531, + "step": 6146000 + }, + { + "epoch": 30.45, + "learning_rate": 3.477951427594715e-05, + "loss": 2.2194, + "step": 6146500 + }, + { + "epoch": 30.45, + "learning_rate": 3.477827568952107e-05, + "loss": 2.2402, + "step": 6147000 + }, + { + "epoch": 30.46, + "learning_rate": 3.4777037103094986e-05, + "loss": 2.2502, + "step": 6147500 + }, + { + "epoch": 30.46, + "learning_rate": 3.47757985166689e-05, + "loss": 2.2541, + "step": 6148000 + }, + { + "epoch": 30.46, + "learning_rate": 3.477455993024281e-05, + "loss": 2.2304, + "step": 6148500 + }, + { + "epoch": 30.46, + "learning_rate": 3.477332134381673e-05, + "loss": 2.2356, + "step": 6149000 + }, + { + "epoch": 30.47, + "learning_rate": 3.47720852345635e-05, + "loss": 2.2525, + "step": 6149500 + }, + { + "epoch": 30.47, + "learning_rate": 3.477084912531027e-05, + "loss": 2.2437, + "step": 6150000 + }, + { + "epoch": 30.47, + "learning_rate": 3.4769610538884185e-05, + "loss": 2.2182, + "step": 6150500 + }, + { + "epoch": 30.47, + "learning_rate": 3.47683719524581e-05, + "loss": 2.2214, + "step": 6151000 + }, + { + "epoch": 30.48, + "learning_rate": 3.476713336603202e-05, + "loss": 2.2403, + "step": 6151500 + }, + { + "epoch": 30.48, + "learning_rate": 3.4765894779605935e-05, + "loss": 2.2509, + "step": 6152000 + }, + { + "epoch": 30.48, + "learning_rate": 3.476465619317985e-05, + "loss": 2.2426, + "step": 6152500 + }, + { + "epoch": 30.48, + "learning_rate": 3.476342008392662e-05, + "loss": 2.2147, + "step": 6153000 + }, + { + "epoch": 30.49, + "learning_rate": 3.476218149750053e-05, + "loss": 2.2379, + "step": 6153500 + }, + { + "epoch": 30.49, + "learning_rate": 3.476094291107445e-05, + "loss": 2.1983, + "step": 6154000 + }, + { + "epoch": 30.49, + "learning_rate": 3.4759704324648365e-05, + "loss": 2.2076, + "step": 6154500 + }, + { + "epoch": 30.49, + "learning_rate": 3.4758468215395134e-05, + "loss": 2.22, + "step": 6155000 + }, + { + "epoch": 30.5, + "learning_rate": 3.475722962896905e-05, + "loss": 2.2341, + "step": 6155500 + }, + { + "epoch": 30.5, + "learning_rate": 3.475599351971582e-05, + "loss": 2.2196, + "step": 6156000 + }, + { + "epoch": 30.5, + "learning_rate": 3.475475741046259e-05, + "loss": 2.2361, + "step": 6156500 + }, + { + "epoch": 30.5, + "learning_rate": 3.4753518824036505e-05, + "loss": 2.2396, + "step": 6157000 + }, + { + "epoch": 30.51, + "learning_rate": 3.475228023761042e-05, + "loss": 2.2363, + "step": 6157500 + }, + { + "epoch": 30.51, + "learning_rate": 3.475104165118434e-05, + "loss": 2.235, + "step": 6158000 + }, + { + "epoch": 30.51, + "learning_rate": 3.474980306475825e-05, + "loss": 2.247, + "step": 6158500 + }, + { + "epoch": 30.51, + "learning_rate": 3.4748564478332166e-05, + "loss": 2.2427, + "step": 6159000 + }, + { + "epoch": 30.52, + "learning_rate": 3.474732589190608e-05, + "loss": 2.2194, + "step": 6159500 + }, + { + "epoch": 30.52, + "learning_rate": 3.474608730548e-05, + "loss": 2.2395, + "step": 6160000 + }, + { + "epoch": 30.52, + "learning_rate": 3.474484871905392e-05, + "loss": 2.2607, + "step": 6160500 + }, + { + "epoch": 30.52, + "learning_rate": 3.4743610132627834e-05, + "loss": 2.2377, + "step": 6161000 + }, + { + "epoch": 30.53, + "learning_rate": 3.474237154620175e-05, + "loss": 2.2333, + "step": 6161500 + }, + { + "epoch": 30.53, + "learning_rate": 3.474113295977567e-05, + "loss": 2.226, + "step": 6162000 + }, + { + "epoch": 30.53, + "learning_rate": 3.4739894373349585e-05, + "loss": 2.2342, + "step": 6162500 + }, + { + "epoch": 30.53, + "learning_rate": 3.4738658264096354e-05, + "loss": 2.2176, + "step": 6163000 + }, + { + "epoch": 30.54, + "learning_rate": 3.473741967767027e-05, + "loss": 2.2501, + "step": 6163500 + }, + { + "epoch": 30.54, + "learning_rate": 3.473618109124419e-05, + "loss": 2.2182, + "step": 6164000 + }, + { + "epoch": 30.54, + "learning_rate": 3.4734942504818104e-05, + "loss": 2.2549, + "step": 6164500 + }, + { + "epoch": 30.54, + "learning_rate": 3.473370391839202e-05, + "loss": 2.2135, + "step": 6165000 + }, + { + "epoch": 30.55, + "learning_rate": 3.473246533196594e-05, + "loss": 2.2249, + "step": 6165500 + }, + { + "epoch": 30.55, + "learning_rate": 3.47312292227127e-05, + "loss": 2.2566, + "step": 6166000 + }, + { + "epoch": 30.55, + "learning_rate": 3.4729993113459476e-05, + "loss": 2.2149, + "step": 6166500 + }, + { + "epoch": 30.55, + "learning_rate": 3.472875452703339e-05, + "loss": 2.2436, + "step": 6167000 + }, + { + "epoch": 30.56, + "learning_rate": 3.472751594060731e-05, + "loss": 2.2307, + "step": 6167500 + }, + { + "epoch": 30.56, + "learning_rate": 3.472627983135407e-05, + "loss": 2.2567, + "step": 6168000 + }, + { + "epoch": 30.56, + "learning_rate": 3.472504124492799e-05, + "loss": 2.198, + "step": 6168500 + }, + { + "epoch": 30.56, + "learning_rate": 3.4723802658501906e-05, + "loss": 2.2303, + "step": 6169000 + }, + { + "epoch": 30.57, + "learning_rate": 3.472256407207582e-05, + "loss": 2.229, + "step": 6169500 + }, + { + "epoch": 30.57, + "learning_rate": 3.472132548564974e-05, + "loss": 2.2487, + "step": 6170000 + }, + { + "epoch": 30.57, + "learning_rate": 3.4720086899223656e-05, + "loss": 2.2399, + "step": 6170500 + }, + { + "epoch": 30.57, + "learning_rate": 3.4718848312797567e-05, + "loss": 2.2207, + "step": 6171000 + }, + { + "epoch": 30.58, + "learning_rate": 3.4717609726371483e-05, + "loss": 2.2181, + "step": 6171500 + }, + { + "epoch": 30.58, + "learning_rate": 3.47163711399454e-05, + "loss": 2.2427, + "step": 6172000 + }, + { + "epoch": 30.58, + "learning_rate": 3.471513255351932e-05, + "loss": 2.2214, + "step": 6172500 + }, + { + "epoch": 30.58, + "learning_rate": 3.4713893967093234e-05, + "loss": 2.2395, + "step": 6173000 + }, + { + "epoch": 30.59, + "learning_rate": 3.471265538066715e-05, + "loss": 2.2376, + "step": 6173500 + }, + { + "epoch": 30.59, + "learning_rate": 3.471141679424107e-05, + "loss": 2.2301, + "step": 6174000 + }, + { + "epoch": 30.59, + "learning_rate": 3.4710178207814985e-05, + "loss": 2.2256, + "step": 6174500 + }, + { + "epoch": 30.59, + "learning_rate": 3.47089396213889e-05, + "loss": 2.2518, + "step": 6175000 + }, + { + "epoch": 30.6, + "learning_rate": 3.470770103496282e-05, + "loss": 2.2303, + "step": 6175500 + }, + { + "epoch": 30.6, + "learning_rate": 3.4706462448536736e-05, + "loss": 2.2201, + "step": 6176000 + }, + { + "epoch": 30.6, + "learning_rate": 3.470522386211065e-05, + "loss": 2.224, + "step": 6176500 + }, + { + "epoch": 30.6, + "learning_rate": 3.470398527568457e-05, + "loss": 2.2206, + "step": 6177000 + }, + { + "epoch": 30.61, + "learning_rate": 3.4702746689258487e-05, + "loss": 2.2475, + "step": 6177500 + }, + { + "epoch": 30.61, + "learning_rate": 3.4701510580005255e-05, + "loss": 2.2503, + "step": 6178000 + }, + { + "epoch": 30.61, + "learning_rate": 3.470027199357917e-05, + "loss": 2.2358, + "step": 6178500 + }, + { + "epoch": 30.61, + "learning_rate": 3.4699035884325934e-05, + "loss": 2.2493, + "step": 6179000 + }, + { + "epoch": 30.62, + "learning_rate": 3.469779729789985e-05, + "loss": 2.2415, + "step": 6179500 + }, + { + "epoch": 30.62, + "learning_rate": 3.469655871147377e-05, + "loss": 2.1972, + "step": 6180000 + }, + { + "epoch": 30.62, + "learning_rate": 3.4695325079393396e-05, + "loss": 2.2483, + "step": 6180500 + }, + { + "epoch": 30.62, + "learning_rate": 3.469408649296731e-05, + "loss": 2.2393, + "step": 6181000 + }, + { + "epoch": 30.63, + "learning_rate": 3.469284790654122e-05, + "loss": 2.2498, + "step": 6181500 + }, + { + "epoch": 30.63, + "learning_rate": 3.469160932011514e-05, + "loss": 2.2415, + "step": 6182000 + }, + { + "epoch": 30.63, + "learning_rate": 3.4690370733689057e-05, + "loss": 2.2731, + "step": 6182500 + }, + { + "epoch": 30.63, + "learning_rate": 3.4689132147262974e-05, + "loss": 2.265, + "step": 6183000 + }, + { + "epoch": 30.64, + "learning_rate": 3.468789356083689e-05, + "loss": 2.2388, + "step": 6183500 + }, + { + "epoch": 30.64, + "learning_rate": 3.468665745158366e-05, + "loss": 2.2341, + "step": 6184000 + }, + { + "epoch": 30.64, + "learning_rate": 3.4685418865157576e-05, + "loss": 2.2543, + "step": 6184500 + }, + { + "epoch": 30.64, + "learning_rate": 3.468418027873149e-05, + "loss": 2.2344, + "step": 6185000 + }, + { + "epoch": 30.65, + "learning_rate": 3.468294169230541e-05, + "loss": 2.2658, + "step": 6185500 + }, + { + "epoch": 30.65, + "learning_rate": 3.468170310587933e-05, + "loss": 2.2603, + "step": 6186000 + }, + { + "epoch": 30.65, + "learning_rate": 3.4680466996626096e-05, + "loss": 2.2264, + "step": 6186500 + }, + { + "epoch": 30.65, + "learning_rate": 3.467922841020001e-05, + "loss": 2.2426, + "step": 6187000 + }, + { + "epoch": 30.66, + "learning_rate": 3.467798982377393e-05, + "loss": 2.2392, + "step": 6187500 + }, + { + "epoch": 30.66, + "learning_rate": 3.467675123734784e-05, + "loss": 2.2032, + "step": 6188000 + }, + { + "epoch": 30.66, + "learning_rate": 3.467551265092176e-05, + "loss": 2.2448, + "step": 6188500 + }, + { + "epoch": 30.66, + "learning_rate": 3.4674274064495674e-05, + "loss": 2.2308, + "step": 6189000 + }, + { + "epoch": 30.66, + "learning_rate": 3.467303547806959e-05, + "loss": 2.2433, + "step": 6189500 + }, + { + "epoch": 30.67, + "learning_rate": 3.467179689164351e-05, + "loss": 2.2387, + "step": 6190000 + }, + { + "epoch": 30.67, + "learning_rate": 3.4670560782390276e-05, + "loss": 2.2341, + "step": 6190500 + }, + { + "epoch": 30.67, + "learning_rate": 3.466932219596419e-05, + "loss": 2.2467, + "step": 6191000 + }, + { + "epoch": 30.67, + "learning_rate": 3.466808360953811e-05, + "loss": 2.2455, + "step": 6191500 + }, + { + "epoch": 30.68, + "learning_rate": 3.466684502311203e-05, + "loss": 2.2521, + "step": 6192000 + }, + { + "epoch": 30.68, + "learning_rate": 3.4665606436685944e-05, + "loss": 2.2215, + "step": 6192500 + }, + { + "epoch": 30.68, + "learning_rate": 3.4664367850259854e-05, + "loss": 2.2362, + "step": 6193000 + }, + { + "epoch": 30.68, + "learning_rate": 3.466313174100663e-05, + "loss": 2.2413, + "step": 6193500 + }, + { + "epoch": 30.69, + "learning_rate": 3.466189315458055e-05, + "loss": 2.2268, + "step": 6194000 + }, + { + "epoch": 30.69, + "learning_rate": 3.4660654568154464e-05, + "loss": 2.2273, + "step": 6194500 + }, + { + "epoch": 30.69, + "learning_rate": 3.4659415981728374e-05, + "loss": 2.2315, + "step": 6195000 + }, + { + "epoch": 30.69, + "learning_rate": 3.465817987247514e-05, + "loss": 2.2424, + "step": 6195500 + }, + { + "epoch": 30.7, + "learning_rate": 3.465694128604906e-05, + "loss": 2.2435, + "step": 6196000 + }, + { + "epoch": 30.7, + "learning_rate": 3.465570517679583e-05, + "loss": 2.2266, + "step": 6196500 + }, + { + "epoch": 30.7, + "learning_rate": 3.4654466590369745e-05, + "loss": 2.2315, + "step": 6197000 + }, + { + "epoch": 30.7, + "learning_rate": 3.465322800394366e-05, + "loss": 2.2434, + "step": 6197500 + }, + { + "epoch": 30.71, + "learning_rate": 3.465198941751758e-05, + "loss": 2.2493, + "step": 6198000 + }, + { + "epoch": 30.71, + "learning_rate": 3.4650750831091496e-05, + "loss": 2.2375, + "step": 6198500 + }, + { + "epoch": 30.71, + "learning_rate": 3.464951224466541e-05, + "loss": 2.2524, + "step": 6199000 + }, + { + "epoch": 30.71, + "learning_rate": 3.4648276135412175e-05, + "loss": 2.2466, + "step": 6199500 + }, + { + "epoch": 30.72, + "learning_rate": 3.464703754898609e-05, + "loss": 2.2237, + "step": 6200000 + }, + { + "epoch": 30.72, + "learning_rate": 3.464579896256001e-05, + "loss": 2.2402, + "step": 6200500 + }, + { + "epoch": 30.72, + "learning_rate": 3.4644560376133926e-05, + "loss": 2.246, + "step": 6201000 + }, + { + "epoch": 30.72, + "learning_rate": 3.464332178970784e-05, + "loss": 2.2472, + "step": 6201500 + }, + { + "epoch": 30.73, + "learning_rate": 3.464208320328176e-05, + "loss": 2.2377, + "step": 6202000 + }, + { + "epoch": 30.73, + "learning_rate": 3.4640844616855677e-05, + "loss": 2.27, + "step": 6202500 + }, + { + "epoch": 30.73, + "learning_rate": 3.4639606030429594e-05, + "loss": 2.2133, + "step": 6203000 + }, + { + "epoch": 30.73, + "learning_rate": 3.463836744400351e-05, + "loss": 2.2362, + "step": 6203500 + }, + { + "epoch": 30.74, + "learning_rate": 3.463712885757743e-05, + "loss": 2.2287, + "step": 6204000 + }, + { + "epoch": 30.74, + "learning_rate": 3.4635892748324196e-05, + "loss": 2.2377, + "step": 6204500 + }, + { + "epoch": 30.74, + "learning_rate": 3.463465416189811e-05, + "loss": 2.2474, + "step": 6205000 + }, + { + "epoch": 30.74, + "learning_rate": 3.463341557547203e-05, + "loss": 2.2293, + "step": 6205500 + }, + { + "epoch": 30.75, + "learning_rate": 3.463217698904595e-05, + "loss": 2.2539, + "step": 6206000 + }, + { + "epoch": 30.75, + "learning_rate": 3.463094087979271e-05, + "loss": 2.2161, + "step": 6206500 + }, + { + "epoch": 30.75, + "learning_rate": 3.4629702293366626e-05, + "loss": 2.2283, + "step": 6207000 + }, + { + "epoch": 30.75, + "learning_rate": 3.462846370694054e-05, + "loss": 2.2341, + "step": 6207500 + }, + { + "epoch": 30.76, + "learning_rate": 3.462722512051446e-05, + "loss": 2.2445, + "step": 6208000 + }, + { + "epoch": 30.76, + "learning_rate": 3.462598653408838e-05, + "loss": 2.2316, + "step": 6208500 + }, + { + "epoch": 30.76, + "learning_rate": 3.4624747947662294e-05, + "loss": 2.242, + "step": 6209000 + }, + { + "epoch": 30.76, + "learning_rate": 3.462350936123621e-05, + "loss": 2.245, + "step": 6209500 + }, + { + "epoch": 30.77, + "learning_rate": 3.462227325198298e-05, + "loss": 2.253, + "step": 6210000 + }, + { + "epoch": 30.77, + "learning_rate": 3.4621034665556896e-05, + "loss": 2.2291, + "step": 6210500 + }, + { + "epoch": 30.77, + "learning_rate": 3.461979607913081e-05, + "loss": 2.2505, + "step": 6211000 + }, + { + "epoch": 30.77, + "learning_rate": 3.461855996987758e-05, + "loss": 2.2503, + "step": 6211500 + }, + { + "epoch": 30.78, + "learning_rate": 3.461732138345149e-05, + "loss": 2.2289, + "step": 6212000 + }, + { + "epoch": 30.78, + "learning_rate": 3.461608527419826e-05, + "loss": 2.2613, + "step": 6212500 + }, + { + "epoch": 30.78, + "learning_rate": 3.461484668777218e-05, + "loss": 2.2444, + "step": 6213000 + }, + { + "epoch": 30.78, + "learning_rate": 3.4613608101346095e-05, + "loss": 2.2416, + "step": 6213500 + }, + { + "epoch": 30.79, + "learning_rate": 3.461236951492001e-05, + "loss": 2.2211, + "step": 6214000 + }, + { + "epoch": 30.79, + "learning_rate": 3.461113092849393e-05, + "loss": 2.2169, + "step": 6214500 + }, + { + "epoch": 30.79, + "learning_rate": 3.4609892342067846e-05, + "loss": 2.2292, + "step": 6215000 + }, + { + "epoch": 30.79, + "learning_rate": 3.460865375564176e-05, + "loss": 2.2373, + "step": 6215500 + }, + { + "epoch": 30.8, + "learning_rate": 3.460741516921568e-05, + "loss": 2.2433, + "step": 6216000 + }, + { + "epoch": 30.8, + "learning_rate": 3.4606176582789596e-05, + "loss": 2.2271, + "step": 6216500 + }, + { + "epoch": 30.8, + "learning_rate": 3.460493799636351e-05, + "loss": 2.2382, + "step": 6217000 + }, + { + "epoch": 30.8, + "learning_rate": 3.460369940993743e-05, + "loss": 2.2384, + "step": 6217500 + }, + { + "epoch": 30.81, + "learning_rate": 3.460246082351135e-05, + "loss": 2.241, + "step": 6218000 + }, + { + "epoch": 30.81, + "learning_rate": 3.4601222237085264e-05, + "loss": 2.2461, + "step": 6218500 + }, + { + "epoch": 30.81, + "learning_rate": 3.459998365065918e-05, + "loss": 2.2499, + "step": 6219000 + }, + { + "epoch": 30.81, + "learning_rate": 3.45987450642331e-05, + "loss": 2.2335, + "step": 6219500 + }, + { + "epoch": 30.82, + "learning_rate": 3.4597506477807015e-05, + "loss": 2.2417, + "step": 6220000 + }, + { + "epoch": 30.82, + "learning_rate": 3.459626789138093e-05, + "loss": 2.2655, + "step": 6220500 + }, + { + "epoch": 30.82, + "learning_rate": 3.4595034259300546e-05, + "loss": 2.2529, + "step": 6221000 + }, + { + "epoch": 30.82, + "learning_rate": 3.459379567287446e-05, + "loss": 2.2362, + "step": 6221500 + }, + { + "epoch": 30.83, + "learning_rate": 3.459255708644838e-05, + "loss": 2.2316, + "step": 6222000 + }, + { + "epoch": 30.83, + "learning_rate": 3.4591318500022297e-05, + "loss": 2.2179, + "step": 6222500 + }, + { + "epoch": 30.83, + "learning_rate": 3.4590079913596213e-05, + "loss": 2.2264, + "step": 6223000 + }, + { + "epoch": 30.83, + "learning_rate": 3.458884132717013e-05, + "loss": 2.2518, + "step": 6223500 + }, + { + "epoch": 30.84, + "learning_rate": 3.45876052179169e-05, + "loss": 2.2177, + "step": 6224000 + }, + { + "epoch": 30.84, + "learning_rate": 3.4586366631490816e-05, + "loss": 2.2561, + "step": 6224500 + }, + { + "epoch": 30.84, + "learning_rate": 3.458512804506473e-05, + "loss": 2.2601, + "step": 6225000 + }, + { + "epoch": 30.84, + "learning_rate": 3.458388945863864e-05, + "loss": 2.2479, + "step": 6225500 + }, + { + "epoch": 30.85, + "learning_rate": 3.458265582655827e-05, + "loss": 2.2314, + "step": 6226000 + }, + { + "epoch": 30.85, + "learning_rate": 3.458141724013219e-05, + "loss": 2.2399, + "step": 6226500 + }, + { + "epoch": 30.85, + "learning_rate": 3.4580178653706105e-05, + "loss": 2.2339, + "step": 6227000 + }, + { + "epoch": 30.85, + "learning_rate": 3.457894006728002e-05, + "loss": 2.2389, + "step": 6227500 + }, + { + "epoch": 30.86, + "learning_rate": 3.457770148085393e-05, + "loss": 2.2303, + "step": 6228000 + }, + { + "epoch": 30.86, + "learning_rate": 3.457646289442785e-05, + "loss": 2.2388, + "step": 6228500 + }, + { + "epoch": 30.86, + "learning_rate": 3.4575224308001765e-05, + "loss": 2.2413, + "step": 6229000 + }, + { + "epoch": 30.86, + "learning_rate": 3.457398572157568e-05, + "loss": 2.2108, + "step": 6229500 + }, + { + "epoch": 30.87, + "learning_rate": 3.45727471351496e-05, + "loss": 2.2406, + "step": 6230000 + }, + { + "epoch": 30.87, + "learning_rate": 3.457151102589637e-05, + "loss": 2.2665, + "step": 6230500 + }, + { + "epoch": 30.87, + "learning_rate": 3.457027243947028e-05, + "loss": 2.2316, + "step": 6231000 + }, + { + "epoch": 30.87, + "learning_rate": 3.4569033853044195e-05, + "loss": 2.2667, + "step": 6231500 + }, + { + "epoch": 30.88, + "learning_rate": 3.456779526661811e-05, + "loss": 2.2364, + "step": 6232000 + }, + { + "epoch": 30.88, + "learning_rate": 3.456655668019203e-05, + "loss": 2.2286, + "step": 6232500 + }, + { + "epoch": 30.88, + "learning_rate": 3.4565318093765946e-05, + "loss": 2.251, + "step": 6233000 + }, + { + "epoch": 30.88, + "learning_rate": 3.456407950733986e-05, + "loss": 2.2181, + "step": 6233500 + }, + { + "epoch": 30.89, + "learning_rate": 3.456284092091378e-05, + "loss": 2.2578, + "step": 6234000 + }, + { + "epoch": 30.89, + "learning_rate": 3.456160481166055e-05, + "loss": 2.268, + "step": 6234500 + }, + { + "epoch": 30.89, + "learning_rate": 3.4560366225234466e-05, + "loss": 2.2286, + "step": 6235000 + }, + { + "epoch": 30.89, + "learning_rate": 3.455912763880838e-05, + "loss": 2.2199, + "step": 6235500 + }, + { + "epoch": 30.9, + "learning_rate": 3.45578890523823e-05, + "loss": 2.2377, + "step": 6236000 + }, + { + "epoch": 30.9, + "learning_rate": 3.4556650465956216e-05, + "loss": 2.238, + "step": 6236500 + }, + { + "epoch": 30.9, + "learning_rate": 3.455541187953013e-05, + "loss": 2.2317, + "step": 6237000 + }, + { + "epoch": 30.9, + "learning_rate": 3.455417329310405e-05, + "loss": 2.2309, + "step": 6237500 + }, + { + "epoch": 30.91, + "learning_rate": 3.455293470667797e-05, + "loss": 2.2461, + "step": 6238000 + }, + { + "epoch": 30.91, + "learning_rate": 3.4551696120251884e-05, + "loss": 2.2172, + "step": 6238500 + }, + { + "epoch": 30.91, + "learning_rate": 3.4550457533825794e-05, + "loss": 2.2666, + "step": 6239000 + }, + { + "epoch": 30.91, + "learning_rate": 3.454921894739971e-05, + "loss": 2.2173, + "step": 6239500 + }, + { + "epoch": 30.92, + "learning_rate": 3.454798036097363e-05, + "loss": 2.2493, + "step": 6240000 + }, + { + "epoch": 30.92, + "learning_rate": 3.4546741774547545e-05, + "loss": 2.2422, + "step": 6240500 + }, + { + "epoch": 30.92, + "learning_rate": 3.4545505665294314e-05, + "loss": 2.2461, + "step": 6241000 + }, + { + "epoch": 30.92, + "learning_rate": 3.454426707886823e-05, + "loss": 2.2144, + "step": 6241500 + }, + { + "epoch": 30.93, + "learning_rate": 3.4543030969615e-05, + "loss": 2.241, + "step": 6242000 + }, + { + "epoch": 30.93, + "learning_rate": 3.4541792383188917e-05, + "loss": 2.2454, + "step": 6242500 + }, + { + "epoch": 30.93, + "learning_rate": 3.4540553796762833e-05, + "loss": 2.2331, + "step": 6243000 + }, + { + "epoch": 30.93, + "learning_rate": 3.453931521033675e-05, + "loss": 2.241, + "step": 6243500 + }, + { + "epoch": 30.93, + "learning_rate": 3.453807910108351e-05, + "loss": 2.2426, + "step": 6244000 + }, + { + "epoch": 30.94, + "learning_rate": 3.453684051465743e-05, + "loss": 2.2353, + "step": 6244500 + }, + { + "epoch": 30.94, + "learning_rate": 3.4535601928231346e-05, + "loss": 2.2339, + "step": 6245000 + }, + { + "epoch": 30.94, + "learning_rate": 3.453436334180526e-05, + "loss": 2.2464, + "step": 6245500 + }, + { + "epoch": 30.94, + "learning_rate": 3.453312723255204e-05, + "loss": 2.247, + "step": 6246000 + }, + { + "epoch": 30.95, + "learning_rate": 3.453188864612595e-05, + "loss": 2.242, + "step": 6246500 + }, + { + "epoch": 30.95, + "learning_rate": 3.4530650059699866e-05, + "loss": 2.2535, + "step": 6247000 + }, + { + "epoch": 30.95, + "learning_rate": 3.452941147327378e-05, + "loss": 2.2252, + "step": 6247500 + }, + { + "epoch": 30.95, + "learning_rate": 3.45281728868477e-05, + "loss": 2.2676, + "step": 6248000 + }, + { + "epoch": 30.96, + "learning_rate": 3.452693677759447e-05, + "loss": 2.2438, + "step": 6248500 + }, + { + "epoch": 30.96, + "learning_rate": 3.4525698191168385e-05, + "loss": 2.2296, + "step": 6249000 + }, + { + "epoch": 30.96, + "learning_rate": 3.45244596047423e-05, + "loss": 2.254, + "step": 6249500 + }, + { + "epoch": 30.96, + "learning_rate": 3.452322101831621e-05, + "loss": 2.2367, + "step": 6250000 + }, + { + "epoch": 30.97, + "learning_rate": 3.452198243189013e-05, + "loss": 2.2447, + "step": 6250500 + }, + { + "epoch": 30.97, + "learning_rate": 3.4520746322636905e-05, + "loss": 2.2428, + "step": 6251000 + }, + { + "epoch": 30.97, + "learning_rate": 3.451950773621082e-05, + "loss": 2.224, + "step": 6251500 + }, + { + "epoch": 30.97, + "learning_rate": 3.451826914978474e-05, + "loss": 2.2347, + "step": 6252000 + }, + { + "epoch": 30.98, + "learning_rate": 3.4517030563358656e-05, + "loss": 2.2252, + "step": 6252500 + }, + { + "epoch": 30.98, + "learning_rate": 3.451579445410542e-05, + "loss": 2.2325, + "step": 6253000 + }, + { + "epoch": 30.98, + "learning_rate": 3.4514555867679335e-05, + "loss": 2.2505, + "step": 6253500 + }, + { + "epoch": 30.98, + "learning_rate": 3.4513319758426104e-05, + "loss": 2.25, + "step": 6254000 + }, + { + "epoch": 30.99, + "learning_rate": 3.451208117200002e-05, + "loss": 2.2485, + "step": 6254500 + }, + { + "epoch": 30.99, + "learning_rate": 3.451084258557394e-05, + "loss": 2.2436, + "step": 6255000 + }, + { + "epoch": 30.99, + "learning_rate": 3.4509603999147854e-05, + "loss": 2.2469, + "step": 6255500 + }, + { + "epoch": 30.99, + "learning_rate": 3.450836788989462e-05, + "loss": 2.2563, + "step": 6256000 + }, + { + "epoch": 31.0, + "learning_rate": 3.450712930346854e-05, + "loss": 2.2587, + "step": 6256500 + }, + { + "epoch": 31.0, + "learning_rate": 3.450589071704246e-05, + "loss": 2.235, + "step": 6257000 + }, + { + "epoch": 31.0, + "eval_accuracy": 0.6597478495877519, + "eval_accuracy_mlm": 0.6156466741652056, + "eval_accuracy_nsp": 0.8678179628881506, + "eval_loss": 2.3193306922912598, + "eval_runtime": 145.8562, + "eval_samples_per_second": 1748.017, + "eval_steps_per_second": 72.839, + "step": 6257133 + }, + { + "epoch": 31.0, + "learning_rate": 3.4504652130616374e-05, + "loss": 2.1808, + "step": 6257500 + }, + { + "epoch": 31.0, + "learning_rate": 3.4503416021363136e-05, + "loss": 2.213, + "step": 6258000 + }, + { + "epoch": 31.01, + "learning_rate": 3.450217743493705e-05, + "loss": 2.2195, + "step": 6258500 + }, + { + "epoch": 31.01, + "learning_rate": 3.450093884851097e-05, + "loss": 2.2085, + "step": 6259000 + }, + { + "epoch": 31.01, + "learning_rate": 3.449970026208489e-05, + "loss": 2.2248, + "step": 6259500 + }, + { + "epoch": 31.01, + "learning_rate": 3.4498461675658804e-05, + "loss": 2.2321, + "step": 6260000 + }, + { + "epoch": 31.02, + "learning_rate": 3.449722308923272e-05, + "loss": 2.2127, + "step": 6260500 + }, + { + "epoch": 31.02, + "learning_rate": 3.449598450280664e-05, + "loss": 2.1961, + "step": 6261000 + }, + { + "epoch": 31.02, + "learning_rate": 3.4494745916380554e-05, + "loss": 2.2115, + "step": 6261500 + }, + { + "epoch": 31.02, + "learning_rate": 3.449350980712732e-05, + "loss": 2.1928, + "step": 6262000 + }, + { + "epoch": 31.03, + "learning_rate": 3.449227122070124e-05, + "loss": 2.2092, + "step": 6262500 + }, + { + "epoch": 31.03, + "learning_rate": 3.449103263427516e-05, + "loss": 2.2159, + "step": 6263000 + }, + { + "epoch": 31.03, + "learning_rate": 3.4489794047849074e-05, + "loss": 2.189, + "step": 6263500 + }, + { + "epoch": 31.03, + "learning_rate": 3.448855546142299e-05, + "loss": 2.21, + "step": 6264000 + }, + { + "epoch": 31.04, + "learning_rate": 3.448731687499691e-05, + "loss": 2.2087, + "step": 6264500 + }, + { + "epoch": 31.04, + "learning_rate": 3.4486078288570825e-05, + "loss": 2.2178, + "step": 6265000 + }, + { + "epoch": 31.04, + "learning_rate": 3.448483970214474e-05, + "loss": 2.2034, + "step": 6265500 + }, + { + "epoch": 31.04, + "learning_rate": 3.448360111571866e-05, + "loss": 2.2245, + "step": 6266000 + }, + { + "epoch": 31.05, + "learning_rate": 3.448236252929257e-05, + "loss": 2.203, + "step": 6266500 + }, + { + "epoch": 31.05, + "learning_rate": 3.4481123942866486e-05, + "loss": 2.1872, + "step": 6267000 + }, + { + "epoch": 31.05, + "learning_rate": 3.4479887833613255e-05, + "loss": 2.1915, + "step": 6267500 + }, + { + "epoch": 31.05, + "learning_rate": 3.447864924718717e-05, + "loss": 2.2446, + "step": 6268000 + }, + { + "epoch": 31.06, + "learning_rate": 3.447741066076109e-05, + "loss": 2.2023, + "step": 6268500 + }, + { + "epoch": 31.06, + "learning_rate": 3.447617455150786e-05, + "loss": 2.2039, + "step": 6269000 + }, + { + "epoch": 31.06, + "learning_rate": 3.4474935965081774e-05, + "loss": 2.2216, + "step": 6269500 + }, + { + "epoch": 31.06, + "learning_rate": 3.447369737865569e-05, + "loss": 2.2161, + "step": 6270000 + }, + { + "epoch": 31.07, + "learning_rate": 3.447245879222961e-05, + "loss": 2.1849, + "step": 6270500 + }, + { + "epoch": 31.07, + "learning_rate": 3.4471220205803525e-05, + "loss": 2.2113, + "step": 6271000 + }, + { + "epoch": 31.07, + "learning_rate": 3.446998161937744e-05, + "loss": 2.2109, + "step": 6271500 + }, + { + "epoch": 31.07, + "learning_rate": 3.446874303295136e-05, + "loss": 2.2381, + "step": 6272000 + }, + { + "epoch": 31.08, + "learning_rate": 3.4467504446525276e-05, + "loss": 2.2078, + "step": 6272500 + }, + { + "epoch": 31.08, + "learning_rate": 3.446626586009919e-05, + "loss": 2.2187, + "step": 6273000 + }, + { + "epoch": 31.08, + "learning_rate": 3.44650272736731e-05, + "loss": 2.2024, + "step": 6273500 + }, + { + "epoch": 31.08, + "learning_rate": 3.446378868724702e-05, + "loss": 2.2256, + "step": 6274000 + }, + { + "epoch": 31.09, + "learning_rate": 3.446255010082094e-05, + "loss": 2.2071, + "step": 6274500 + }, + { + "epoch": 31.09, + "learning_rate": 3.4461311514394854e-05, + "loss": 2.2189, + "step": 6275000 + }, + { + "epoch": 31.09, + "learning_rate": 3.446007540514162e-05, + "loss": 2.2198, + "step": 6275500 + }, + { + "epoch": 31.09, + "learning_rate": 3.445883929588839e-05, + "loss": 2.219, + "step": 6276000 + }, + { + "epoch": 31.1, + "learning_rate": 3.445760070946231e-05, + "loss": 2.218, + "step": 6276500 + }, + { + "epoch": 31.1, + "learning_rate": 3.4456362123036225e-05, + "loss": 2.2116, + "step": 6277000 + }, + { + "epoch": 31.1, + "learning_rate": 3.445512353661014e-05, + "loss": 2.2026, + "step": 6277500 + }, + { + "epoch": 31.1, + "learning_rate": 3.445388495018406e-05, + "loss": 2.2124, + "step": 6278000 + }, + { + "epoch": 31.11, + "learning_rate": 3.4452646363757976e-05, + "loss": 2.2316, + "step": 6278500 + }, + { + "epoch": 31.11, + "learning_rate": 3.445140777733189e-05, + "loss": 2.2332, + "step": 6279000 + }, + { + "epoch": 31.11, + "learning_rate": 3.445016919090581e-05, + "loss": 2.2116, + "step": 6279500 + }, + { + "epoch": 31.11, + "learning_rate": 3.444893060447973e-05, + "loss": 2.2152, + "step": 6280000 + }, + { + "epoch": 31.12, + "learning_rate": 3.444769201805364e-05, + "loss": 2.211, + "step": 6280500 + }, + { + "epoch": 31.12, + "learning_rate": 3.4446453431627554e-05, + "loss": 2.2078, + "step": 6281000 + }, + { + "epoch": 31.12, + "learning_rate": 3.444521484520147e-05, + "loss": 2.2311, + "step": 6281500 + }, + { + "epoch": 31.12, + "learning_rate": 3.444397625877539e-05, + "loss": 2.208, + "step": 6282000 + }, + { + "epoch": 31.13, + "learning_rate": 3.44427376723493e-05, + "loss": 2.2156, + "step": 6282500 + }, + { + "epoch": 31.13, + "learning_rate": 3.444150156309607e-05, + "loss": 2.2341, + "step": 6283000 + }, + { + "epoch": 31.13, + "learning_rate": 3.444026545384284e-05, + "loss": 2.2018, + "step": 6283500 + }, + { + "epoch": 31.13, + "learning_rate": 3.4439029344589604e-05, + "loss": 2.2093, + "step": 6284000 + }, + { + "epoch": 31.14, + "learning_rate": 3.443779075816352e-05, + "loss": 2.2189, + "step": 6284500 + }, + { + "epoch": 31.14, + "learning_rate": 3.443655217173744e-05, + "loss": 2.2098, + "step": 6285000 + }, + { + "epoch": 31.14, + "learning_rate": 3.4435318539657066e-05, + "loss": 2.2191, + "step": 6285500 + }, + { + "epoch": 31.14, + "learning_rate": 3.443407995323098e-05, + "loss": 2.2183, + "step": 6286000 + }, + { + "epoch": 31.15, + "learning_rate": 3.44328413668049e-05, + "loss": 2.2258, + "step": 6286500 + }, + { + "epoch": 31.15, + "learning_rate": 3.4431602780378816e-05, + "loss": 2.2116, + "step": 6287000 + }, + { + "epoch": 31.15, + "learning_rate": 3.443036419395273e-05, + "loss": 2.2322, + "step": 6287500 + }, + { + "epoch": 31.15, + "learning_rate": 3.4429128084699495e-05, + "loss": 2.212, + "step": 6288000 + }, + { + "epoch": 31.16, + "learning_rate": 3.442788949827341e-05, + "loss": 2.2144, + "step": 6288500 + }, + { + "epoch": 31.16, + "learning_rate": 3.442665091184733e-05, + "loss": 2.1998, + "step": 6289000 + }, + { + "epoch": 31.16, + "learning_rate": 3.4425412325421246e-05, + "loss": 2.1758, + "step": 6289500 + }, + { + "epoch": 31.16, + "learning_rate": 3.442417373899516e-05, + "loss": 2.2297, + "step": 6290000 + }, + { + "epoch": 31.17, + "learning_rate": 3.442293515256908e-05, + "loss": 2.211, + "step": 6290500 + }, + { + "epoch": 31.17, + "learning_rate": 3.442169656614299e-05, + "loss": 2.212, + "step": 6291000 + }, + { + "epoch": 31.17, + "learning_rate": 3.442045797971691e-05, + "loss": 2.2227, + "step": 6291500 + }, + { + "epoch": 31.17, + "learning_rate": 3.4419219393290824e-05, + "loss": 2.2025, + "step": 6292000 + }, + { + "epoch": 31.18, + "learning_rate": 3.441798080686474e-05, + "loss": 2.2118, + "step": 6292500 + }, + { + "epoch": 31.18, + "learning_rate": 3.441674222043866e-05, + "loss": 2.235, + "step": 6293000 + }, + { + "epoch": 31.18, + "learning_rate": 3.4415503634012575e-05, + "loss": 2.2199, + "step": 6293500 + }, + { + "epoch": 31.18, + "learning_rate": 3.441426504758649e-05, + "loss": 2.2552, + "step": 6294000 + }, + { + "epoch": 31.19, + "learning_rate": 3.441302646116041e-05, + "loss": 2.207, + "step": 6294500 + }, + { + "epoch": 31.19, + "learning_rate": 3.4411787874734325e-05, + "loss": 2.2162, + "step": 6295000 + }, + { + "epoch": 31.19, + "learning_rate": 3.4410551765481094e-05, + "loss": 2.2181, + "step": 6295500 + }, + { + "epoch": 31.19, + "learning_rate": 3.440931565622786e-05, + "loss": 2.228, + "step": 6296000 + }, + { + "epoch": 31.2, + "learning_rate": 3.440807706980178e-05, + "loss": 2.2119, + "step": 6296500 + }, + { + "epoch": 31.2, + "learning_rate": 3.44068384833757e-05, + "loss": 2.2169, + "step": 6297000 + }, + { + "epoch": 31.2, + "learning_rate": 3.440559989694961e-05, + "loss": 2.2164, + "step": 6297500 + }, + { + "epoch": 31.2, + "learning_rate": 3.4404361310523524e-05, + "loss": 2.2253, + "step": 6298000 + }, + { + "epoch": 31.2, + "learning_rate": 3.440312272409744e-05, + "loss": 2.2121, + "step": 6298500 + }, + { + "epoch": 31.21, + "learning_rate": 3.440188413767136e-05, + "loss": 2.2298, + "step": 6299000 + }, + { + "epoch": 31.21, + "learning_rate": 3.4400645551245275e-05, + "loss": 2.2243, + "step": 6299500 + }, + { + "epoch": 31.21, + "learning_rate": 3.439940696481919e-05, + "loss": 2.2202, + "step": 6300000 + }, + { + "epoch": 31.21, + "learning_rate": 3.439817085556596e-05, + "loss": 2.2288, + "step": 6300500 + }, + { + "epoch": 31.22, + "learning_rate": 3.439693226913988e-05, + "loss": 2.2252, + "step": 6301000 + }, + { + "epoch": 31.22, + "learning_rate": 3.4395693682713794e-05, + "loss": 2.2148, + "step": 6301500 + }, + { + "epoch": 31.22, + "learning_rate": 3.439445509628771e-05, + "loss": 2.2291, + "step": 6302000 + }, + { + "epoch": 31.22, + "learning_rate": 3.439321650986163e-05, + "loss": 2.2196, + "step": 6302500 + }, + { + "epoch": 31.23, + "learning_rate": 3.4391977923435545e-05, + "loss": 2.2144, + "step": 6303000 + }, + { + "epoch": 31.23, + "learning_rate": 3.439073933700946e-05, + "loss": 2.2116, + "step": 6303500 + }, + { + "epoch": 31.23, + "learning_rate": 3.438950075058337e-05, + "loss": 2.2102, + "step": 6304000 + }, + { + "epoch": 31.23, + "learning_rate": 3.438826216415729e-05, + "loss": 2.2304, + "step": 6304500 + }, + { + "epoch": 31.24, + "learning_rate": 3.438702605490406e-05, + "loss": 2.2181, + "step": 6305000 + }, + { + "epoch": 31.24, + "learning_rate": 3.438579242282368e-05, + "loss": 2.2039, + "step": 6305500 + }, + { + "epoch": 31.24, + "learning_rate": 3.438455631357045e-05, + "loss": 2.2286, + "step": 6306000 + }, + { + "epoch": 31.24, + "learning_rate": 3.4383317727144364e-05, + "loss": 2.2183, + "step": 6306500 + }, + { + "epoch": 31.25, + "learning_rate": 3.438207914071828e-05, + "loss": 2.2127, + "step": 6307000 + }, + { + "epoch": 31.25, + "learning_rate": 3.43808405542922e-05, + "loss": 2.2101, + "step": 6307500 + }, + { + "epoch": 31.25, + "learning_rate": 3.4379601967866115e-05, + "loss": 2.203, + "step": 6308000 + }, + { + "epoch": 31.25, + "learning_rate": 3.437836338144003e-05, + "loss": 2.2003, + "step": 6308500 + }, + { + "epoch": 31.26, + "learning_rate": 3.437712479501395e-05, + "loss": 2.1954, + "step": 6309000 + }, + { + "epoch": 31.26, + "learning_rate": 3.4375886208587866e-05, + "loss": 2.2006, + "step": 6309500 + }, + { + "epoch": 31.26, + "learning_rate": 3.437464762216178e-05, + "loss": 2.2085, + "step": 6310000 + }, + { + "epoch": 31.26, + "learning_rate": 3.43734090357357e-05, + "loss": 2.2127, + "step": 6310500 + }, + { + "epoch": 31.27, + "learning_rate": 3.437217292648247e-05, + "loss": 2.2205, + "step": 6311000 + }, + { + "epoch": 31.27, + "learning_rate": 3.437093434005638e-05, + "loss": 2.2085, + "step": 6311500 + }, + { + "epoch": 31.27, + "learning_rate": 3.4369695753630296e-05, + "loss": 2.2141, + "step": 6312000 + }, + { + "epoch": 31.27, + "learning_rate": 3.436845716720421e-05, + "loss": 2.226, + "step": 6312500 + }, + { + "epoch": 31.28, + "learning_rate": 3.436722105795098e-05, + "loss": 2.2442, + "step": 6313000 + }, + { + "epoch": 31.28, + "learning_rate": 3.43659824715249e-05, + "loss": 2.2363, + "step": 6313500 + }, + { + "epoch": 31.28, + "learning_rate": 3.436474636227167e-05, + "loss": 2.208, + "step": 6314000 + }, + { + "epoch": 31.28, + "learning_rate": 3.4363507775845584e-05, + "loss": 2.233, + "step": 6314500 + }, + { + "epoch": 31.29, + "learning_rate": 3.43622691894195e-05, + "loss": 2.2111, + "step": 6315000 + }, + { + "epoch": 31.29, + "learning_rate": 3.436103060299342e-05, + "loss": 2.2063, + "step": 6315500 + }, + { + "epoch": 31.29, + "learning_rate": 3.4359792016567335e-05, + "loss": 2.2115, + "step": 6316000 + }, + { + "epoch": 31.29, + "learning_rate": 3.435855343014125e-05, + "loss": 2.1919, + "step": 6316500 + }, + { + "epoch": 31.3, + "learning_rate": 3.435731484371517e-05, + "loss": 2.2389, + "step": 6317000 + }, + { + "epoch": 31.3, + "learning_rate": 3.4356076257289086e-05, + "loss": 2.2189, + "step": 6317500 + }, + { + "epoch": 31.3, + "learning_rate": 3.435484014803585e-05, + "loss": 2.2316, + "step": 6318000 + }, + { + "epoch": 31.3, + "learning_rate": 3.4353601561609765e-05, + "loss": 2.2294, + "step": 6318500 + }, + { + "epoch": 31.31, + "learning_rate": 3.435236297518368e-05, + "loss": 2.228, + "step": 6319000 + }, + { + "epoch": 31.31, + "learning_rate": 3.43511243887576e-05, + "loss": 2.2099, + "step": 6319500 + }, + { + "epoch": 31.31, + "learning_rate": 3.4349885802331515e-05, + "loss": 2.2214, + "step": 6320000 + }, + { + "epoch": 31.31, + "learning_rate": 3.434864721590543e-05, + "loss": 2.2146, + "step": 6320500 + }, + { + "epoch": 31.32, + "learning_rate": 3.434740862947935e-05, + "loss": 2.2259, + "step": 6321000 + }, + { + "epoch": 31.32, + "learning_rate": 3.4346170043053266e-05, + "loss": 2.2005, + "step": 6321500 + }, + { + "epoch": 31.32, + "learning_rate": 3.434493145662718e-05, + "loss": 2.2291, + "step": 6322000 + }, + { + "epoch": 31.32, + "learning_rate": 3.43436928702011e-05, + "loss": 2.219, + "step": 6322500 + }, + { + "epoch": 31.33, + "learning_rate": 3.434245428377502e-05, + "loss": 2.1981, + "step": 6323000 + }, + { + "epoch": 31.33, + "learning_rate": 3.4341215697348934e-05, + "loss": 2.2405, + "step": 6323500 + }, + { + "epoch": 31.33, + "learning_rate": 3.433997711092285e-05, + "loss": 2.1992, + "step": 6324000 + }, + { + "epoch": 31.33, + "learning_rate": 3.433874100166962e-05, + "loss": 2.2415, + "step": 6324500 + }, + { + "epoch": 31.34, + "learning_rate": 3.433750241524353e-05, + "loss": 2.2222, + "step": 6325000 + }, + { + "epoch": 31.34, + "learning_rate": 3.433626382881745e-05, + "loss": 2.2247, + "step": 6325500 + }, + { + "epoch": 31.34, + "learning_rate": 3.4335025242391364e-05, + "loss": 2.2394, + "step": 6326000 + }, + { + "epoch": 31.34, + "learning_rate": 3.433378665596528e-05, + "loss": 2.2441, + "step": 6326500 + }, + { + "epoch": 31.35, + "learning_rate": 3.43325480695392e-05, + "loss": 2.2145, + "step": 6327000 + }, + { + "epoch": 31.35, + "learning_rate": 3.4331309483113115e-05, + "loss": 2.2068, + "step": 6327500 + }, + { + "epoch": 31.35, + "learning_rate": 3.433007089668703e-05, + "loss": 2.2079, + "step": 6328000 + }, + { + "epoch": 31.35, + "learning_rate": 3.432883231026094e-05, + "loss": 2.2071, + "step": 6328500 + }, + { + "epoch": 31.36, + "learning_rate": 3.432759620100772e-05, + "loss": 2.2209, + "step": 6329000 + }, + { + "epoch": 31.36, + "learning_rate": 3.4326357614581634e-05, + "loss": 2.2367, + "step": 6329500 + }, + { + "epoch": 31.36, + "learning_rate": 3.432511902815555e-05, + "loss": 2.2124, + "step": 6330000 + }, + { + "epoch": 31.36, + "learning_rate": 3.432388044172947e-05, + "loss": 2.2072, + "step": 6330500 + }, + { + "epoch": 31.37, + "learning_rate": 3.4322641855303385e-05, + "loss": 2.2324, + "step": 6331000 + }, + { + "epoch": 31.37, + "learning_rate": 3.4321408223223e-05, + "loss": 2.2163, + "step": 6331500 + }, + { + "epoch": 31.37, + "learning_rate": 3.4320172113969774e-05, + "loss": 2.2097, + "step": 6332000 + }, + { + "epoch": 31.37, + "learning_rate": 3.4318936004716536e-05, + "loss": 2.2137, + "step": 6332500 + }, + { + "epoch": 31.38, + "learning_rate": 3.431769741829045e-05, + "loss": 2.2035, + "step": 6333000 + }, + { + "epoch": 31.38, + "learning_rate": 3.431645883186437e-05, + "loss": 2.2252, + "step": 6333500 + }, + { + "epoch": 31.38, + "learning_rate": 3.431522024543829e-05, + "loss": 2.2297, + "step": 6334000 + }, + { + "epoch": 31.38, + "learning_rate": 3.4313981659012204e-05, + "loss": 2.2349, + "step": 6334500 + }, + { + "epoch": 31.39, + "learning_rate": 3.431274307258612e-05, + "loss": 2.2302, + "step": 6335000 + }, + { + "epoch": 31.39, + "learning_rate": 3.431150696333289e-05, + "loss": 2.2064, + "step": 6335500 + }, + { + "epoch": 31.39, + "learning_rate": 3.431026837690681e-05, + "loss": 2.2251, + "step": 6336000 + }, + { + "epoch": 31.39, + "learning_rate": 3.4309029790480724e-05, + "loss": 2.238, + "step": 6336500 + }, + { + "epoch": 31.4, + "learning_rate": 3.430779120405464e-05, + "loss": 2.2249, + "step": 6337000 + }, + { + "epoch": 31.4, + "learning_rate": 3.430655261762856e-05, + "loss": 2.2352, + "step": 6337500 + }, + { + "epoch": 31.4, + "learning_rate": 3.4305316508375326e-05, + "loss": 2.2189, + "step": 6338000 + }, + { + "epoch": 31.4, + "learning_rate": 3.430407792194924e-05, + "loss": 2.2131, + "step": 6338500 + }, + { + "epoch": 31.41, + "learning_rate": 3.430283933552316e-05, + "loss": 2.2507, + "step": 6339000 + }, + { + "epoch": 31.41, + "learning_rate": 3.430160074909707e-05, + "loss": 2.2302, + "step": 6339500 + }, + { + "epoch": 31.41, + "learning_rate": 3.430036216267099e-05, + "loss": 2.2123, + "step": 6340000 + }, + { + "epoch": 31.41, + "learning_rate": 3.4299123576244904e-05, + "loss": 2.2245, + "step": 6340500 + }, + { + "epoch": 31.42, + "learning_rate": 3.429788498981882e-05, + "loss": 2.2152, + "step": 6341000 + }, + { + "epoch": 31.42, + "learning_rate": 3.429664888056559e-05, + "loss": 2.2311, + "step": 6341500 + }, + { + "epoch": 31.42, + "learning_rate": 3.429541029413951e-05, + "loss": 2.2054, + "step": 6342000 + }, + { + "epoch": 31.42, + "learning_rate": 3.4294171707713424e-05, + "loss": 2.2194, + "step": 6342500 + }, + { + "epoch": 31.43, + "learning_rate": 3.429293312128734e-05, + "loss": 2.2208, + "step": 6343000 + }, + { + "epoch": 31.43, + "learning_rate": 3.429169453486126e-05, + "loss": 2.2581, + "step": 6343500 + }, + { + "epoch": 31.43, + "learning_rate": 3.4290455948435175e-05, + "loss": 2.1989, + "step": 6344000 + }, + { + "epoch": 31.43, + "learning_rate": 3.428921736200909e-05, + "loss": 2.2191, + "step": 6344500 + }, + { + "epoch": 31.44, + "learning_rate": 3.4287978775583e-05, + "loss": 2.239, + "step": 6345000 + }, + { + "epoch": 31.44, + "learning_rate": 3.428674018915692e-05, + "loss": 2.2267, + "step": 6345500 + }, + { + "epoch": 31.44, + "learning_rate": 3.428550407990369e-05, + "loss": 2.2187, + "step": 6346000 + }, + { + "epoch": 31.44, + "learning_rate": 3.4284265493477604e-05, + "loss": 2.2207, + "step": 6346500 + }, + { + "epoch": 31.45, + "learning_rate": 3.428302690705152e-05, + "loss": 2.2389, + "step": 6347000 + }, + { + "epoch": 31.45, + "learning_rate": 3.428178832062544e-05, + "loss": 2.2213, + "step": 6347500 + }, + { + "epoch": 31.45, + "learning_rate": 3.4280549734199355e-05, + "loss": 2.2062, + "step": 6348000 + }, + { + "epoch": 31.45, + "learning_rate": 3.4279311147773265e-05, + "loss": 2.218, + "step": 6348500 + }, + { + "epoch": 31.46, + "learning_rate": 3.427807256134718e-05, + "loss": 2.2122, + "step": 6349000 + }, + { + "epoch": 31.46, + "learning_rate": 3.42768339749211e-05, + "loss": 2.2257, + "step": 6349500 + }, + { + "epoch": 31.46, + "learning_rate": 3.4275595388495016e-05, + "loss": 2.2072, + "step": 6350000 + }, + { + "epoch": 31.46, + "learning_rate": 3.427435927924179e-05, + "loss": 2.2411, + "step": 6350500 + }, + { + "epoch": 31.47, + "learning_rate": 3.427312316998856e-05, + "loss": 2.2063, + "step": 6351000 + }, + { + "epoch": 31.47, + "learning_rate": 3.427188458356248e-05, + "loss": 2.2173, + "step": 6351500 + }, + { + "epoch": 31.47, + "learning_rate": 3.4270645997136394e-05, + "loss": 2.2306, + "step": 6352000 + }, + { + "epoch": 31.47, + "learning_rate": 3.426940741071031e-05, + "loss": 2.2277, + "step": 6352500 + }, + { + "epoch": 31.47, + "learning_rate": 3.426816882428422e-05, + "loss": 2.229, + "step": 6353000 + }, + { + "epoch": 31.48, + "learning_rate": 3.426693023785814e-05, + "loss": 2.2136, + "step": 6353500 + }, + { + "epoch": 31.48, + "learning_rate": 3.4265691651432055e-05, + "loss": 2.2245, + "step": 6354000 + }, + { + "epoch": 31.48, + "learning_rate": 3.426445306500597e-05, + "loss": 2.2111, + "step": 6354500 + }, + { + "epoch": 31.48, + "learning_rate": 3.426321447857989e-05, + "loss": 2.2525, + "step": 6355000 + }, + { + "epoch": 31.49, + "learning_rate": 3.42619758921538e-05, + "loss": 2.2219, + "step": 6355500 + }, + { + "epoch": 31.49, + "learning_rate": 3.4260737305727716e-05, + "loss": 2.2086, + "step": 6356000 + }, + { + "epoch": 31.49, + "learning_rate": 3.425949871930163e-05, + "loss": 2.2263, + "step": 6356500 + }, + { + "epoch": 31.49, + "learning_rate": 3.425826013287555e-05, + "loss": 2.2265, + "step": 6357000 + }, + { + "epoch": 31.5, + "learning_rate": 3.425702154644947e-05, + "loss": 2.2005, + "step": 6357500 + }, + { + "epoch": 31.5, + "learning_rate": 3.4255782960023384e-05, + "loss": 2.2232, + "step": 6358000 + }, + { + "epoch": 31.5, + "learning_rate": 3.42545443735973e-05, + "loss": 2.2305, + "step": 6358500 + }, + { + "epoch": 31.5, + "learning_rate": 3.425330826434407e-05, + "loss": 2.1797, + "step": 6359000 + }, + { + "epoch": 31.51, + "learning_rate": 3.425206967791799e-05, + "loss": 2.2228, + "step": 6359500 + }, + { + "epoch": 31.51, + "learning_rate": 3.4250831091491904e-05, + "loss": 2.2035, + "step": 6360000 + }, + { + "epoch": 31.51, + "learning_rate": 3.424959498223867e-05, + "loss": 2.2232, + "step": 6360500 + }, + { + "epoch": 31.51, + "learning_rate": 3.424835639581259e-05, + "loss": 2.2096, + "step": 6361000 + }, + { + "epoch": 31.52, + "learning_rate": 3.4247117809386506e-05, + "loss": 2.203, + "step": 6361500 + }, + { + "epoch": 31.52, + "learning_rate": 3.4245879222960416e-05, + "loss": 2.2527, + "step": 6362000 + }, + { + "epoch": 31.52, + "learning_rate": 3.424464063653433e-05, + "loss": 2.2428, + "step": 6362500 + }, + { + "epoch": 31.52, + "learning_rate": 3.424340452728111e-05, + "loss": 2.1951, + "step": 6363000 + }, + { + "epoch": 31.53, + "learning_rate": 3.424216594085502e-05, + "loss": 2.2294, + "step": 6363500 + }, + { + "epoch": 31.53, + "learning_rate": 3.4240927354428936e-05, + "loss": 2.2303, + "step": 6364000 + }, + { + "epoch": 31.53, + "learning_rate": 3.423968876800285e-05, + "loss": 2.2402, + "step": 6364500 + }, + { + "epoch": 31.53, + "learning_rate": 3.423845265874963e-05, + "loss": 2.2149, + "step": 6365000 + }, + { + "epoch": 31.54, + "learning_rate": 3.423721654949639e-05, + "loss": 2.2285, + "step": 6365500 + }, + { + "epoch": 31.54, + "learning_rate": 3.423597796307031e-05, + "loss": 2.2355, + "step": 6366000 + }, + { + "epoch": 31.54, + "learning_rate": 3.4234739376644224e-05, + "loss": 2.2315, + "step": 6366500 + }, + { + "epoch": 31.54, + "learning_rate": 3.423350079021814e-05, + "loss": 2.2164, + "step": 6367000 + }, + { + "epoch": 31.55, + "learning_rate": 3.423226220379206e-05, + "loss": 2.2494, + "step": 6367500 + }, + { + "epoch": 31.55, + "learning_rate": 3.4231023617365975e-05, + "loss": 2.21, + "step": 6368000 + }, + { + "epoch": 31.55, + "learning_rate": 3.4229787508112744e-05, + "loss": 2.2279, + "step": 6368500 + }, + { + "epoch": 31.55, + "learning_rate": 3.422854892168666e-05, + "loss": 2.2203, + "step": 6369000 + }, + { + "epoch": 31.56, + "learning_rate": 3.422731033526058e-05, + "loss": 2.2472, + "step": 6369500 + }, + { + "epoch": 31.56, + "learning_rate": 3.4226071748834495e-05, + "loss": 2.2179, + "step": 6370000 + }, + { + "epoch": 31.56, + "learning_rate": 3.422483316240841e-05, + "loss": 2.2375, + "step": 6370500 + }, + { + "epoch": 31.56, + "learning_rate": 3.422359457598233e-05, + "loss": 2.2469, + "step": 6371000 + }, + { + "epoch": 31.57, + "learning_rate": 3.4222355989556246e-05, + "loss": 2.2507, + "step": 6371500 + }, + { + "epoch": 31.57, + "learning_rate": 3.422111740313016e-05, + "loss": 2.2195, + "step": 6372000 + }, + { + "epoch": 31.57, + "learning_rate": 3.421987881670408e-05, + "loss": 2.2346, + "step": 6372500 + }, + { + "epoch": 31.57, + "learning_rate": 3.421864023027799e-05, + "loss": 2.2264, + "step": 6373000 + }, + { + "epoch": 31.58, + "learning_rate": 3.4217401643851906e-05, + "loss": 2.2479, + "step": 6373500 + }, + { + "epoch": 31.58, + "learning_rate": 3.421616305742582e-05, + "loss": 2.2097, + "step": 6374000 + }, + { + "epoch": 31.58, + "learning_rate": 3.421492694817259e-05, + "loss": 2.23, + "step": 6374500 + }, + { + "epoch": 31.58, + "learning_rate": 3.421368836174651e-05, + "loss": 2.2047, + "step": 6375000 + }, + { + "epoch": 31.59, + "learning_rate": 3.4212449775320426e-05, + "loss": 2.2001, + "step": 6375500 + }, + { + "epoch": 31.59, + "learning_rate": 3.4211211188894336e-05, + "loss": 2.1968, + "step": 6376000 + }, + { + "epoch": 31.59, + "learning_rate": 3.420997507964111e-05, + "loss": 2.219, + "step": 6376500 + }, + { + "epoch": 31.59, + "learning_rate": 3.420873649321503e-05, + "loss": 2.2418, + "step": 6377000 + }, + { + "epoch": 31.6, + "learning_rate": 3.4207497906788946e-05, + "loss": 2.2385, + "step": 6377500 + }, + { + "epoch": 31.6, + "learning_rate": 3.420625932036286e-05, + "loss": 2.2359, + "step": 6378000 + }, + { + "epoch": 31.6, + "learning_rate": 3.420502073393678e-05, + "loss": 2.2245, + "step": 6378500 + }, + { + "epoch": 31.6, + "learning_rate": 3.420378462468354e-05, + "loss": 2.231, + "step": 6379000 + }, + { + "epoch": 31.61, + "learning_rate": 3.420254603825746e-05, + "loss": 2.1829, + "step": 6379500 + }, + { + "epoch": 31.61, + "learning_rate": 3.420130992900423e-05, + "loss": 2.2169, + "step": 6380000 + }, + { + "epoch": 31.61, + "learning_rate": 3.4200071342578144e-05, + "loss": 2.2441, + "step": 6380500 + }, + { + "epoch": 31.61, + "learning_rate": 3.419883275615206e-05, + "loss": 2.2208, + "step": 6381000 + }, + { + "epoch": 31.62, + "learning_rate": 3.419759416972598e-05, + "loss": 2.2438, + "step": 6381500 + }, + { + "epoch": 31.62, + "learning_rate": 3.4196355583299895e-05, + "loss": 2.2072, + "step": 6382000 + }, + { + "epoch": 31.62, + "learning_rate": 3.419511699687381e-05, + "loss": 2.2324, + "step": 6382500 + }, + { + "epoch": 31.62, + "learning_rate": 3.419387841044773e-05, + "loss": 2.2065, + "step": 6383000 + }, + { + "epoch": 31.63, + "learning_rate": 3.4192639824021646e-05, + "loss": 2.2306, + "step": 6383500 + }, + { + "epoch": 31.63, + "learning_rate": 3.419140123759556e-05, + "loss": 2.2126, + "step": 6384000 + }, + { + "epoch": 31.63, + "learning_rate": 3.419016265116948e-05, + "loss": 2.2266, + "step": 6384500 + }, + { + "epoch": 31.63, + "learning_rate": 3.4188924064743397e-05, + "loss": 2.2249, + "step": 6385000 + }, + { + "epoch": 31.64, + "learning_rate": 3.418768547831731e-05, + "loss": 2.221, + "step": 6385500 + }, + { + "epoch": 31.64, + "learning_rate": 3.4186446891891224e-05, + "loss": 2.245, + "step": 6386000 + }, + { + "epoch": 31.64, + "learning_rate": 3.418520830546514e-05, + "loss": 2.2187, + "step": 6386500 + }, + { + "epoch": 31.64, + "learning_rate": 3.418396971903906e-05, + "loss": 2.2144, + "step": 6387000 + }, + { + "epoch": 31.65, + "learning_rate": 3.4182731132612974e-05, + "loss": 2.2362, + "step": 6387500 + }, + { + "epoch": 31.65, + "learning_rate": 3.418149254618689e-05, + "loss": 2.1898, + "step": 6388000 + }, + { + "epoch": 31.65, + "learning_rate": 3.418025643693365e-05, + "loss": 2.2426, + "step": 6388500 + }, + { + "epoch": 31.65, + "learning_rate": 3.417901785050757e-05, + "loss": 2.2156, + "step": 6389000 + }, + { + "epoch": 31.66, + "learning_rate": 3.417777926408149e-05, + "loss": 2.2327, + "step": 6389500 + }, + { + "epoch": 31.66, + "learning_rate": 3.4176540677655404e-05, + "loss": 2.2054, + "step": 6390000 + }, + { + "epoch": 31.66, + "learning_rate": 3.417530209122932e-05, + "loss": 2.2216, + "step": 6390500 + }, + { + "epoch": 31.66, + "learning_rate": 3.417406350480324e-05, + "loss": 2.2077, + "step": 6391000 + }, + { + "epoch": 31.67, + "learning_rate": 3.4172824918377155e-05, + "loss": 2.2122, + "step": 6391500 + }, + { + "epoch": 31.67, + "learning_rate": 3.417158633195107e-05, + "loss": 2.2206, + "step": 6392000 + }, + { + "epoch": 31.67, + "learning_rate": 3.417034774552499e-05, + "loss": 2.2276, + "step": 6392500 + }, + { + "epoch": 31.67, + "learning_rate": 3.4169109159098906e-05, + "loss": 2.2207, + "step": 6393000 + }, + { + "epoch": 31.68, + "learning_rate": 3.416787057267282e-05, + "loss": 2.2294, + "step": 6393500 + }, + { + "epoch": 31.68, + "learning_rate": 3.416663446341959e-05, + "loss": 2.2208, + "step": 6394000 + }, + { + "epoch": 31.68, + "learning_rate": 3.416539835416636e-05, + "loss": 2.2432, + "step": 6394500 + }, + { + "epoch": 31.68, + "learning_rate": 3.416415976774027e-05, + "loss": 2.2231, + "step": 6395000 + }, + { + "epoch": 31.69, + "learning_rate": 3.416292118131419e-05, + "loss": 2.2351, + "step": 6395500 + }, + { + "epoch": 31.69, + "learning_rate": 3.4161682594888104e-05, + "loss": 2.219, + "step": 6396000 + }, + { + "epoch": 31.69, + "learning_rate": 3.416044400846202e-05, + "loss": 2.2291, + "step": 6396500 + }, + { + "epoch": 31.69, + "learning_rate": 3.415920542203594e-05, + "loss": 2.2375, + "step": 6397000 + }, + { + "epoch": 31.7, + "learning_rate": 3.4157966835609855e-05, + "loss": 2.2292, + "step": 6397500 + }, + { + "epoch": 31.7, + "learning_rate": 3.415672824918377e-05, + "loss": 2.2253, + "step": 6398000 + }, + { + "epoch": 31.7, + "learning_rate": 3.415548966275769e-05, + "loss": 2.2333, + "step": 6398500 + }, + { + "epoch": 31.7, + "learning_rate": 3.415425603067731e-05, + "loss": 2.2152, + "step": 6399000 + }, + { + "epoch": 31.71, + "learning_rate": 3.415301992142408e-05, + "loss": 2.2123, + "step": 6399500 + }, + { + "epoch": 31.71, + "learning_rate": 3.415178381217085e-05, + "loss": 2.235, + "step": 6400000 + }, + { + "epoch": 31.71, + "learning_rate": 3.4150545225744764e-05, + "loss": 2.2219, + "step": 6400500 + }, + { + "epoch": 31.71, + "learning_rate": 3.414930663931868e-05, + "loss": 2.2168, + "step": 6401000 + }, + { + "epoch": 31.72, + "learning_rate": 3.41480680528926e-05, + "loss": 2.2214, + "step": 6401500 + }, + { + "epoch": 31.72, + "learning_rate": 3.4146829466466515e-05, + "loss": 2.2568, + "step": 6402000 + }, + { + "epoch": 31.72, + "learning_rate": 3.414559088004043e-05, + "loss": 2.1955, + "step": 6402500 + }, + { + "epoch": 31.72, + "learning_rate": 3.4144354770787194e-05, + "loss": 2.2304, + "step": 6403000 + }, + { + "epoch": 31.73, + "learning_rate": 3.414311618436111e-05, + "loss": 2.2254, + "step": 6403500 + }, + { + "epoch": 31.73, + "learning_rate": 3.414187759793503e-05, + "loss": 2.2343, + "step": 6404000 + }, + { + "epoch": 31.73, + "learning_rate": 3.4140639011508945e-05, + "loss": 2.2122, + "step": 6404500 + }, + { + "epoch": 31.73, + "learning_rate": 3.413940042508286e-05, + "loss": 2.2378, + "step": 6405000 + }, + { + "epoch": 31.74, + "learning_rate": 3.413816431582963e-05, + "loss": 2.2572, + "step": 6405500 + }, + { + "epoch": 31.74, + "learning_rate": 3.413692572940355e-05, + "loss": 2.2494, + "step": 6406000 + }, + { + "epoch": 31.74, + "learning_rate": 3.4135687142977464e-05, + "loss": 2.2201, + "step": 6406500 + }, + { + "epoch": 31.74, + "learning_rate": 3.413444855655138e-05, + "loss": 2.2259, + "step": 6407000 + }, + { + "epoch": 31.74, + "learning_rate": 3.41332099701253e-05, + "loss": 2.2272, + "step": 6407500 + }, + { + "epoch": 31.75, + "learning_rate": 3.4131971383699215e-05, + "loss": 2.235, + "step": 6408000 + }, + { + "epoch": 31.75, + "learning_rate": 3.413073279727313e-05, + "loss": 2.2403, + "step": 6408500 + }, + { + "epoch": 31.75, + "learning_rate": 3.412949421084705e-05, + "loss": 2.246, + "step": 6409000 + }, + { + "epoch": 31.75, + "learning_rate": 3.4128255624420966e-05, + "loss": 2.2335, + "step": 6409500 + }, + { + "epoch": 31.76, + "learning_rate": 3.412701703799488e-05, + "loss": 2.247, + "step": 6410000 + }, + { + "epoch": 31.76, + "learning_rate": 3.412577845156879e-05, + "loss": 2.2347, + "step": 6410500 + }, + { + "epoch": 31.76, + "learning_rate": 3.412453986514271e-05, + "loss": 2.2364, + "step": 6411000 + }, + { + "epoch": 31.76, + "learning_rate": 3.412330127871663e-05, + "loss": 2.2555, + "step": 6411500 + }, + { + "epoch": 31.77, + "learning_rate": 3.4122062692290544e-05, + "loss": 2.2152, + "step": 6412000 + }, + { + "epoch": 31.77, + "learning_rate": 3.412082658303731e-05, + "loss": 2.2121, + "step": 6412500 + }, + { + "epoch": 31.77, + "learning_rate": 3.411959047378408e-05, + "loss": 2.2103, + "step": 6413000 + }, + { + "epoch": 31.77, + "learning_rate": 3.411835436453085e-05, + "loss": 2.211, + "step": 6413500 + }, + { + "epoch": 31.78, + "learning_rate": 3.411711577810477e-05, + "loss": 2.222, + "step": 6414000 + }, + { + "epoch": 31.78, + "learning_rate": 3.411587719167868e-05, + "loss": 2.2353, + "step": 6414500 + }, + { + "epoch": 31.78, + "learning_rate": 3.4114638605252594e-05, + "loss": 2.2561, + "step": 6415000 + }, + { + "epoch": 31.78, + "learning_rate": 3.411340001882651e-05, + "loss": 2.2272, + "step": 6415500 + }, + { + "epoch": 31.79, + "learning_rate": 3.411216143240043e-05, + "loss": 2.2308, + "step": 6416000 + }, + { + "epoch": 31.79, + "learning_rate": 3.4110922845974345e-05, + "loss": 2.2271, + "step": 6416500 + }, + { + "epoch": 31.79, + "learning_rate": 3.410968425954826e-05, + "loss": 2.203, + "step": 6417000 + }, + { + "epoch": 31.79, + "learning_rate": 3.410844567312218e-05, + "loss": 2.2542, + "step": 6417500 + }, + { + "epoch": 31.8, + "learning_rate": 3.4107207086696096e-05, + "loss": 2.2169, + "step": 6418000 + }, + { + "epoch": 31.8, + "learning_rate": 3.410596850027001e-05, + "loss": 2.225, + "step": 6418500 + }, + { + "epoch": 31.8, + "learning_rate": 3.410473239101678e-05, + "loss": 2.225, + "step": 6419000 + }, + { + "epoch": 31.8, + "learning_rate": 3.41034938045907e-05, + "loss": 2.24, + "step": 6419500 + }, + { + "epoch": 31.81, + "learning_rate": 3.4102255218164615e-05, + "loss": 2.2394, + "step": 6420000 + }, + { + "epoch": 31.81, + "learning_rate": 3.410101663173853e-05, + "loss": 2.255, + "step": 6420500 + }, + { + "epoch": 31.81, + "learning_rate": 3.409977804531245e-05, + "loss": 2.2297, + "step": 6421000 + }, + { + "epoch": 31.81, + "learning_rate": 3.4098539458886366e-05, + "loss": 2.2249, + "step": 6421500 + }, + { + "epoch": 31.82, + "learning_rate": 3.409730087246028e-05, + "loss": 2.2136, + "step": 6422000 + }, + { + "epoch": 31.82, + "learning_rate": 3.40960622860342e-05, + "loss": 2.252, + "step": 6422500 + }, + { + "epoch": 31.82, + "learning_rate": 3.409482369960812e-05, + "loss": 2.213, + "step": 6423000 + }, + { + "epoch": 31.82, + "learning_rate": 3.4093585113182034e-05, + "loss": 2.2281, + "step": 6423500 + }, + { + "epoch": 31.83, + "learning_rate": 3.4092346526755944e-05, + "loss": 2.2149, + "step": 6424000 + }, + { + "epoch": 31.83, + "learning_rate": 3.409110794032986e-05, + "loss": 2.2163, + "step": 6424500 + }, + { + "epoch": 31.83, + "learning_rate": 3.408987183107663e-05, + "loss": 2.2166, + "step": 6425000 + }, + { + "epoch": 31.83, + "learning_rate": 3.408863324465055e-05, + "loss": 2.2333, + "step": 6425500 + }, + { + "epoch": 31.84, + "learning_rate": 3.4087397135397315e-05, + "loss": 2.2312, + "step": 6426000 + }, + { + "epoch": 31.84, + "learning_rate": 3.4086161026144084e-05, + "loss": 2.226, + "step": 6426500 + }, + { + "epoch": 31.84, + "learning_rate": 3.4084922439718e-05, + "loss": 2.2535, + "step": 6427000 + }, + { + "epoch": 31.84, + "learning_rate": 3.408368385329191e-05, + "loss": 2.2257, + "step": 6427500 + }, + { + "epoch": 31.85, + "learning_rate": 3.408244526686583e-05, + "loss": 2.232, + "step": 6428000 + }, + { + "epoch": 31.85, + "learning_rate": 3.4081206680439745e-05, + "loss": 2.2277, + "step": 6428500 + }, + { + "epoch": 31.85, + "learning_rate": 3.407996809401366e-05, + "loss": 2.215, + "step": 6429000 + }, + { + "epoch": 31.85, + "learning_rate": 3.407872950758758e-05, + "loss": 2.2315, + "step": 6429500 + }, + { + "epoch": 31.86, + "learning_rate": 3.4077490921161496e-05, + "loss": 2.2276, + "step": 6430000 + }, + { + "epoch": 31.86, + "learning_rate": 3.4076254811908265e-05, + "loss": 2.2121, + "step": 6430500 + }, + { + "epoch": 31.86, + "learning_rate": 3.407501622548218e-05, + "loss": 2.2239, + "step": 6431000 + }, + { + "epoch": 31.86, + "learning_rate": 3.40737776390561e-05, + "loss": 2.2147, + "step": 6431500 + }, + { + "epoch": 31.87, + "learning_rate": 3.4072539052630016e-05, + "loss": 2.2294, + "step": 6432000 + }, + { + "epoch": 31.87, + "learning_rate": 3.407130046620393e-05, + "loss": 2.2209, + "step": 6432500 + }, + { + "epoch": 31.87, + "learning_rate": 3.407006187977785e-05, + "loss": 2.2195, + "step": 6433000 + }, + { + "epoch": 31.87, + "learning_rate": 3.4068823293351766e-05, + "loss": 2.231, + "step": 6433500 + }, + { + "epoch": 31.88, + "learning_rate": 3.406758470692568e-05, + "loss": 2.2281, + "step": 6434000 + }, + { + "epoch": 31.88, + "learning_rate": 3.40663461204996e-05, + "loss": 2.2155, + "step": 6434500 + }, + { + "epoch": 31.88, + "learning_rate": 3.406511248841922e-05, + "loss": 2.2162, + "step": 6435000 + }, + { + "epoch": 31.88, + "learning_rate": 3.406387390199314e-05, + "loss": 2.217, + "step": 6435500 + }, + { + "epoch": 31.89, + "learning_rate": 3.406263531556705e-05, + "loss": 2.1886, + "step": 6436000 + }, + { + "epoch": 31.89, + "learning_rate": 3.4061396729140965e-05, + "loss": 2.2471, + "step": 6436500 + }, + { + "epoch": 31.89, + "learning_rate": 3.406015814271488e-05, + "loss": 2.2321, + "step": 6437000 + }, + { + "epoch": 31.89, + "learning_rate": 3.40589195562888e-05, + "loss": 2.2454, + "step": 6437500 + }, + { + "epoch": 31.9, + "learning_rate": 3.4057680969862716e-05, + "loss": 2.2335, + "step": 6438000 + }, + { + "epoch": 31.9, + "learning_rate": 3.4056444860609485e-05, + "loss": 2.2461, + "step": 6438500 + }, + { + "epoch": 31.9, + "learning_rate": 3.40552062741834e-05, + "loss": 2.2311, + "step": 6439000 + }, + { + "epoch": 31.9, + "learning_rate": 3.405396768775732e-05, + "loss": 2.2144, + "step": 6439500 + }, + { + "epoch": 31.91, + "learning_rate": 3.4052729101331235e-05, + "loss": 2.2356, + "step": 6440000 + }, + { + "epoch": 31.91, + "learning_rate": 3.405149051490515e-05, + "loss": 2.2321, + "step": 6440500 + }, + { + "epoch": 31.91, + "learning_rate": 3.405025440565192e-05, + "loss": 2.2336, + "step": 6441000 + }, + { + "epoch": 31.91, + "learning_rate": 3.404901581922584e-05, + "loss": 2.2452, + "step": 6441500 + }, + { + "epoch": 31.92, + "learning_rate": 3.4047777232799755e-05, + "loss": 2.2066, + "step": 6442000 + }, + { + "epoch": 31.92, + "learning_rate": 3.4046538646373665e-05, + "loss": 2.2472, + "step": 6442500 + }, + { + "epoch": 31.92, + "learning_rate": 3.404530005994758e-05, + "loss": 2.2411, + "step": 6443000 + }, + { + "epoch": 31.92, + "learning_rate": 3.40440664278672e-05, + "loss": 2.2134, + "step": 6443500 + }, + { + "epoch": 31.93, + "learning_rate": 3.404282784144112e-05, + "loss": 2.2387, + "step": 6444000 + }, + { + "epoch": 31.93, + "learning_rate": 3.4041589255015037e-05, + "loss": 2.2511, + "step": 6444500 + }, + { + "epoch": 31.93, + "learning_rate": 3.4040350668588953e-05, + "loss": 2.1958, + "step": 6445000 + }, + { + "epoch": 31.93, + "learning_rate": 3.403911208216287e-05, + "loss": 2.2224, + "step": 6445500 + }, + { + "epoch": 31.94, + "learning_rate": 3.403787349573679e-05, + "loss": 2.1971, + "step": 6446000 + }, + { + "epoch": 31.94, + "learning_rate": 3.4036634909310704e-05, + "loss": 2.2369, + "step": 6446500 + }, + { + "epoch": 31.94, + "learning_rate": 3.403539880005747e-05, + "loss": 2.2274, + "step": 6447000 + }, + { + "epoch": 31.94, + "learning_rate": 3.403416021363139e-05, + "loss": 2.2175, + "step": 6447500 + }, + { + "epoch": 31.95, + "learning_rate": 3.403292162720531e-05, + "loss": 2.2479, + "step": 6448000 + }, + { + "epoch": 31.95, + "learning_rate": 3.403168551795207e-05, + "loss": 2.2391, + "step": 6448500 + }, + { + "epoch": 31.95, + "learning_rate": 3.4030449408698845e-05, + "loss": 2.2423, + "step": 6449000 + }, + { + "epoch": 31.95, + "learning_rate": 3.4029210822272755e-05, + "loss": 2.2215, + "step": 6449500 + }, + { + "epoch": 31.96, + "learning_rate": 3.402797223584667e-05, + "loss": 2.2542, + "step": 6450000 + }, + { + "epoch": 31.96, + "learning_rate": 3.402673364942059e-05, + "loss": 2.2291, + "step": 6450500 + }, + { + "epoch": 31.96, + "learning_rate": 3.4025495062994505e-05, + "loss": 2.2185, + "step": 6451000 + }, + { + "epoch": 31.96, + "learning_rate": 3.402425895374128e-05, + "loss": 2.2083, + "step": 6451500 + }, + { + "epoch": 31.97, + "learning_rate": 3.40230203673152e-05, + "loss": 2.2151, + "step": 6452000 + }, + { + "epoch": 31.97, + "learning_rate": 3.402178178088911e-05, + "loss": 2.2216, + "step": 6452500 + }, + { + "epoch": 31.97, + "learning_rate": 3.4020543194463025e-05, + "loss": 2.2025, + "step": 6453000 + }, + { + "epoch": 31.97, + "learning_rate": 3.401930460803694e-05, + "loss": 2.2272, + "step": 6453500 + }, + { + "epoch": 31.98, + "learning_rate": 3.401806602161086e-05, + "loss": 2.2315, + "step": 6454000 + }, + { + "epoch": 31.98, + "learning_rate": 3.4016827435184776e-05, + "loss": 2.2421, + "step": 6454500 + }, + { + "epoch": 31.98, + "learning_rate": 3.4015588848758686e-05, + "loss": 2.229, + "step": 6455000 + }, + { + "epoch": 31.98, + "learning_rate": 3.40143502623326e-05, + "loss": 2.2382, + "step": 6455500 + }, + { + "epoch": 31.99, + "learning_rate": 3.401311167590652e-05, + "loss": 2.2209, + "step": 6456000 + }, + { + "epoch": 31.99, + "learning_rate": 3.401187308948044e-05, + "loss": 2.2126, + "step": 6456500 + }, + { + "epoch": 31.99, + "learning_rate": 3.4010634503054354e-05, + "loss": 2.2322, + "step": 6457000 + }, + { + "epoch": 31.99, + "learning_rate": 3.400939591662827e-05, + "loss": 2.2372, + "step": 6457500 + }, + { + "epoch": 32.0, + "learning_rate": 3.400815733020219e-05, + "loss": 2.236, + "step": 6458000 + }, + { + "epoch": 32.0, + "learning_rate": 3.4006918743776104e-05, + "loss": 2.2363, + "step": 6458500 + }, + { + "epoch": 32.0, + "eval_accuracy": 0.6593229081640353, + "eval_accuracy_mlm": 0.6153610829620848, + "eval_accuracy_nsp": 0.8667393580928698, + "eval_loss": 2.3164103031158447, + "eval_runtime": 145.6685, + "eval_samples_per_second": 1750.269, + "eval_steps_per_second": 72.933, + "step": 6458976 + }, + { + "epoch": 32.0, + "learning_rate": 3.400568015735002e-05, + "loss": 2.2388, + "step": 6459000 + }, + { + "epoch": 32.0, + "learning_rate": 3.400444652526964e-05, + "loss": 2.2307, + "step": 6459500 + }, + { + "epoch": 32.01, + "learning_rate": 3.400320793884356e-05, + "loss": 2.203, + "step": 6460000 + }, + { + "epoch": 32.01, + "learning_rate": 3.4001969352417476e-05, + "loss": 2.1939, + "step": 6460500 + }, + { + "epoch": 32.01, + "learning_rate": 3.400073076599139e-05, + "loss": 2.1834, + "step": 6461000 + }, + { + "epoch": 32.01, + "learning_rate": 3.399949217956531e-05, + "loss": 2.2082, + "step": 6461500 + }, + { + "epoch": 32.01, + "learning_rate": 3.399825607031207e-05, + "loss": 2.2135, + "step": 6462000 + }, + { + "epoch": 32.02, + "learning_rate": 3.399701748388599e-05, + "loss": 2.2041, + "step": 6462500 + }, + { + "epoch": 32.02, + "learning_rate": 3.3995778897459906e-05, + "loss": 2.1927, + "step": 6463000 + }, + { + "epoch": 32.02, + "learning_rate": 3.399454031103382e-05, + "loss": 2.1848, + "step": 6463500 + }, + { + "epoch": 32.02, + "learning_rate": 3.399330172460774e-05, + "loss": 2.2194, + "step": 6464000 + }, + { + "epoch": 32.03, + "learning_rate": 3.3992063138181656e-05, + "loss": 2.1935, + "step": 6464500 + }, + { + "epoch": 32.03, + "learning_rate": 3.3990824551755573e-05, + "loss": 2.22, + "step": 6465000 + }, + { + "epoch": 32.03, + "learning_rate": 3.398958596532949e-05, + "loss": 2.1954, + "step": 6465500 + }, + { + "epoch": 32.03, + "learning_rate": 3.398834737890341e-05, + "loss": 2.2214, + "step": 6466000 + }, + { + "epoch": 32.04, + "learning_rate": 3.3987111269650176e-05, + "loss": 2.1903, + "step": 6466500 + }, + { + "epoch": 32.04, + "learning_rate": 3.398587268322409e-05, + "loss": 2.1986, + "step": 6467000 + }, + { + "epoch": 32.04, + "learning_rate": 3.398463409679801e-05, + "loss": 2.1881, + "step": 6467500 + }, + { + "epoch": 32.04, + "learning_rate": 3.398339798754477e-05, + "loss": 2.2115, + "step": 6468000 + }, + { + "epoch": 32.05, + "learning_rate": 3.398215940111869e-05, + "loss": 2.2028, + "step": 6468500 + }, + { + "epoch": 32.05, + "learning_rate": 3.3980923291865464e-05, + "loss": 2.1965, + "step": 6469000 + }, + { + "epoch": 32.05, + "learning_rate": 3.3979687182612227e-05, + "loss": 2.2209, + "step": 6469500 + }, + { + "epoch": 32.05, + "learning_rate": 3.3978448596186143e-05, + "loss": 2.1943, + "step": 6470000 + }, + { + "epoch": 32.06, + "learning_rate": 3.397721000976006e-05, + "loss": 2.2164, + "step": 6470500 + }, + { + "epoch": 32.06, + "learning_rate": 3.397597390050683e-05, + "loss": 2.1946, + "step": 6471000 + }, + { + "epoch": 32.06, + "learning_rate": 3.3974735314080746e-05, + "loss": 2.2251, + "step": 6471500 + }, + { + "epoch": 32.06, + "learning_rate": 3.397349672765466e-05, + "loss": 2.1968, + "step": 6472000 + }, + { + "epoch": 32.07, + "learning_rate": 3.397225814122858e-05, + "loss": 2.212, + "step": 6472500 + }, + { + "epoch": 32.07, + "learning_rate": 3.39710195548025e-05, + "loss": 2.2174, + "step": 6473000 + }, + { + "epoch": 32.07, + "learning_rate": 3.3969780968376414e-05, + "loss": 2.2001, + "step": 6473500 + }, + { + "epoch": 32.07, + "learning_rate": 3.396854238195033e-05, + "loss": 2.2193, + "step": 6474000 + }, + { + "epoch": 32.08, + "learning_rate": 3.396730379552425e-05, + "loss": 2.2152, + "step": 6474500 + }, + { + "epoch": 32.08, + "learning_rate": 3.3966065209098165e-05, + "loss": 2.1806, + "step": 6475000 + }, + { + "epoch": 32.08, + "learning_rate": 3.3964829099844933e-05, + "loss": 2.2087, + "step": 6475500 + }, + { + "epoch": 32.08, + "learning_rate": 3.396359051341885e-05, + "loss": 2.1978, + "step": 6476000 + }, + { + "epoch": 32.09, + "learning_rate": 3.396235192699276e-05, + "loss": 2.2147, + "step": 6476500 + }, + { + "epoch": 32.09, + "learning_rate": 3.396111334056668e-05, + "loss": 2.2109, + "step": 6477000 + }, + { + "epoch": 32.09, + "learning_rate": 3.3959874754140594e-05, + "loss": 2.2149, + "step": 6477500 + }, + { + "epoch": 32.09, + "learning_rate": 3.395863616771451e-05, + "loss": 2.2203, + "step": 6478000 + }, + { + "epoch": 32.1, + "learning_rate": 3.395739758128843e-05, + "loss": 2.208, + "step": 6478500 + }, + { + "epoch": 32.1, + "learning_rate": 3.3956158994862345e-05, + "loss": 2.2123, + "step": 6479000 + }, + { + "epoch": 32.1, + "learning_rate": 3.395492040843626e-05, + "loss": 2.1831, + "step": 6479500 + }, + { + "epoch": 32.1, + "learning_rate": 3.395368182201018e-05, + "loss": 2.2049, + "step": 6480000 + }, + { + "epoch": 32.11, + "learning_rate": 3.395244323558409e-05, + "loss": 2.2029, + "step": 6480500 + }, + { + "epoch": 32.11, + "learning_rate": 3.3951207126330865e-05, + "loss": 2.1977, + "step": 6481000 + }, + { + "epoch": 32.11, + "learning_rate": 3.394996853990478e-05, + "loss": 2.2033, + "step": 6481500 + }, + { + "epoch": 32.11, + "learning_rate": 3.39487299534787e-05, + "loss": 2.2213, + "step": 6482000 + }, + { + "epoch": 32.12, + "learning_rate": 3.394749384422547e-05, + "loss": 2.2247, + "step": 6482500 + }, + { + "epoch": 32.12, + "learning_rate": 3.394625525779938e-05, + "loss": 2.2051, + "step": 6483000 + }, + { + "epoch": 32.12, + "learning_rate": 3.3945016671373294e-05, + "loss": 2.1888, + "step": 6483500 + }, + { + "epoch": 32.12, + "learning_rate": 3.394377808494721e-05, + "loss": 2.2, + "step": 6484000 + }, + { + "epoch": 32.13, + "learning_rate": 3.394253949852113e-05, + "loss": 2.2133, + "step": 6484500 + }, + { + "epoch": 32.13, + "learning_rate": 3.3941300912095045e-05, + "loss": 2.207, + "step": 6485000 + }, + { + "epoch": 32.13, + "learning_rate": 3.394006232566896e-05, + "loss": 2.2215, + "step": 6485500 + }, + { + "epoch": 32.13, + "learning_rate": 3.393882373924288e-05, + "loss": 2.2094, + "step": 6486000 + }, + { + "epoch": 32.14, + "learning_rate": 3.3937585152816796e-05, + "loss": 2.1998, + "step": 6486500 + }, + { + "epoch": 32.14, + "learning_rate": 3.3936346566390706e-05, + "loss": 2.1855, + "step": 6487000 + }, + { + "epoch": 32.14, + "learning_rate": 3.393510797996462e-05, + "loss": 2.2094, + "step": 6487500 + }, + { + "epoch": 32.14, + "learning_rate": 3.393386939353854e-05, + "loss": 2.1967, + "step": 6488000 + }, + { + "epoch": 32.15, + "learning_rate": 3.393263080711246e-05, + "loss": 2.2105, + "step": 6488500 + }, + { + "epoch": 32.15, + "learning_rate": 3.3931392220686374e-05, + "loss": 2.2006, + "step": 6489000 + }, + { + "epoch": 32.15, + "learning_rate": 3.393015363426029e-05, + "loss": 2.1944, + "step": 6489500 + }, + { + "epoch": 32.15, + "learning_rate": 3.392891504783421e-05, + "loss": 2.1904, + "step": 6490000 + }, + { + "epoch": 32.16, + "learning_rate": 3.3927676461408125e-05, + "loss": 2.1949, + "step": 6490500 + }, + { + "epoch": 32.16, + "learning_rate": 3.392643787498204e-05, + "loss": 2.23, + "step": 6491000 + }, + { + "epoch": 32.16, + "learning_rate": 3.392519928855596e-05, + "loss": 2.2266, + "step": 6491500 + }, + { + "epoch": 32.16, + "learning_rate": 3.392396317930273e-05, + "loss": 2.2044, + "step": 6492000 + }, + { + "epoch": 32.17, + "learning_rate": 3.3922724592876644e-05, + "loss": 2.2013, + "step": 6492500 + }, + { + "epoch": 32.17, + "learning_rate": 3.392148600645056e-05, + "loss": 2.2037, + "step": 6493000 + }, + { + "epoch": 32.17, + "learning_rate": 3.392024742002448e-05, + "loss": 2.213, + "step": 6493500 + }, + { + "epoch": 32.17, + "learning_rate": 3.3919008833598395e-05, + "loss": 2.2009, + "step": 6494000 + }, + { + "epoch": 32.18, + "learning_rate": 3.391777272434516e-05, + "loss": 2.1952, + "step": 6494500 + }, + { + "epoch": 32.18, + "learning_rate": 3.3916534137919074e-05, + "loss": 2.2143, + "step": 6495000 + }, + { + "epoch": 32.18, + "learning_rate": 3.391529555149299e-05, + "loss": 2.208, + "step": 6495500 + }, + { + "epoch": 32.18, + "learning_rate": 3.391405696506691e-05, + "loss": 2.1843, + "step": 6496000 + }, + { + "epoch": 32.19, + "learning_rate": 3.3912818378640825e-05, + "loss": 2.1988, + "step": 6496500 + }, + { + "epoch": 32.19, + "learning_rate": 3.391157979221474e-05, + "loss": 2.2156, + "step": 6497000 + }, + { + "epoch": 32.19, + "learning_rate": 3.391034120578866e-05, + "loss": 2.1969, + "step": 6497500 + }, + { + "epoch": 32.19, + "learning_rate": 3.3909102619362576e-05, + "loss": 2.1982, + "step": 6498000 + }, + { + "epoch": 32.2, + "learning_rate": 3.3907866510109344e-05, + "loss": 2.1881, + "step": 6498500 + }, + { + "epoch": 32.2, + "learning_rate": 3.390663040085611e-05, + "loss": 2.198, + "step": 6499000 + }, + { + "epoch": 32.2, + "learning_rate": 3.390539181443002e-05, + "loss": 2.2107, + "step": 6499500 + }, + { + "epoch": 32.2, + "learning_rate": 3.390415322800394e-05, + "loss": 2.2016, + "step": 6500000 + }, + { + "epoch": 32.21, + "learning_rate": 3.390291464157786e-05, + "loss": 2.2143, + "step": 6500500 + }, + { + "epoch": 32.21, + "learning_rate": 3.390167853232463e-05, + "loss": 2.2103, + "step": 6501000 + }, + { + "epoch": 32.21, + "learning_rate": 3.390043994589855e-05, + "loss": 2.1963, + "step": 6501500 + }, + { + "epoch": 32.21, + "learning_rate": 3.389920383664532e-05, + "loss": 2.2025, + "step": 6502000 + }, + { + "epoch": 32.22, + "learning_rate": 3.3897965250219235e-05, + "loss": 2.2115, + "step": 6502500 + }, + { + "epoch": 32.22, + "learning_rate": 3.389672666379315e-05, + "loss": 2.2162, + "step": 6503000 + }, + { + "epoch": 32.22, + "learning_rate": 3.389548807736706e-05, + "loss": 2.2093, + "step": 6503500 + }, + { + "epoch": 32.22, + "learning_rate": 3.389424949094098e-05, + "loss": 2.2149, + "step": 6504000 + }, + { + "epoch": 32.23, + "learning_rate": 3.3893010904514896e-05, + "loss": 2.2041, + "step": 6504500 + }, + { + "epoch": 32.23, + "learning_rate": 3.389177231808881e-05, + "loss": 2.1998, + "step": 6505000 + }, + { + "epoch": 32.23, + "learning_rate": 3.389053373166273e-05, + "loss": 2.1898, + "step": 6505500 + }, + { + "epoch": 32.23, + "learning_rate": 3.388929514523664e-05, + "loss": 2.2154, + "step": 6506000 + }, + { + "epoch": 32.24, + "learning_rate": 3.388805655881056e-05, + "loss": 2.2269, + "step": 6506500 + }, + { + "epoch": 32.24, + "learning_rate": 3.3886817972384474e-05, + "loss": 2.2098, + "step": 6507000 + }, + { + "epoch": 32.24, + "learning_rate": 3.388557938595839e-05, + "loss": 2.2206, + "step": 6507500 + }, + { + "epoch": 32.24, + "learning_rate": 3.388434079953231e-05, + "loss": 2.2283, + "step": 6508000 + }, + { + "epoch": 32.25, + "learning_rate": 3.388310469027908e-05, + "loss": 2.2005, + "step": 6508500 + }, + { + "epoch": 32.25, + "learning_rate": 3.3881866103852994e-05, + "loss": 2.2254, + "step": 6509000 + }, + { + "epoch": 32.25, + "learning_rate": 3.388062751742691e-05, + "loss": 2.1927, + "step": 6509500 + }, + { + "epoch": 32.25, + "learning_rate": 3.387938893100083e-05, + "loss": 2.201, + "step": 6510000 + }, + { + "epoch": 32.26, + "learning_rate": 3.3878150344574745e-05, + "loss": 2.1961, + "step": 6510500 + }, + { + "epoch": 32.26, + "learning_rate": 3.387691175814866e-05, + "loss": 2.1983, + "step": 6511000 + }, + { + "epoch": 32.26, + "learning_rate": 3.387567317172258e-05, + "loss": 2.2046, + "step": 6511500 + }, + { + "epoch": 32.26, + "learning_rate": 3.3874434585296495e-05, + "loss": 2.2265, + "step": 6512000 + }, + { + "epoch": 32.27, + "learning_rate": 3.387319599887041e-05, + "loss": 2.214, + "step": 6512500 + }, + { + "epoch": 32.27, + "learning_rate": 3.387195741244433e-05, + "loss": 2.194, + "step": 6513000 + }, + { + "epoch": 32.27, + "learning_rate": 3.3870718826018246e-05, + "loss": 2.2226, + "step": 6513500 + }, + { + "epoch": 32.27, + "learning_rate": 3.386948271676501e-05, + "loss": 2.1941, + "step": 6514000 + }, + { + "epoch": 32.28, + "learning_rate": 3.3868246607511784e-05, + "loss": 2.1788, + "step": 6514500 + }, + { + "epoch": 32.28, + "learning_rate": 3.3867008021085694e-05, + "loss": 2.205, + "step": 6515000 + }, + { + "epoch": 32.28, + "learning_rate": 3.386576943465961e-05, + "loss": 2.2079, + "step": 6515500 + }, + { + "epoch": 32.28, + "learning_rate": 3.386453084823353e-05, + "loss": 2.1918, + "step": 6516000 + }, + { + "epoch": 32.28, + "learning_rate": 3.3863294738980303e-05, + "loss": 2.2067, + "step": 6516500 + }, + { + "epoch": 32.29, + "learning_rate": 3.3862056152554214e-05, + "loss": 2.1896, + "step": 6517000 + }, + { + "epoch": 32.29, + "learning_rate": 3.386081756612813e-05, + "loss": 2.1909, + "step": 6517500 + }, + { + "epoch": 32.29, + "learning_rate": 3.385957897970205e-05, + "loss": 2.2219, + "step": 6518000 + }, + { + "epoch": 32.29, + "learning_rate": 3.3858342870448816e-05, + "loss": 2.2144, + "step": 6518500 + }, + { + "epoch": 32.3, + "learning_rate": 3.385710428402273e-05, + "loss": 2.2162, + "step": 6519000 + }, + { + "epoch": 32.3, + "learning_rate": 3.385586569759665e-05, + "loss": 2.2064, + "step": 6519500 + }, + { + "epoch": 32.3, + "learning_rate": 3.385462958834342e-05, + "loss": 2.2063, + "step": 6520000 + }, + { + "epoch": 32.3, + "learning_rate": 3.3853391001917336e-05, + "loss": 2.2012, + "step": 6520500 + }, + { + "epoch": 32.31, + "learning_rate": 3.385215241549125e-05, + "loss": 2.2028, + "step": 6521000 + }, + { + "epoch": 32.31, + "learning_rate": 3.385091382906517e-05, + "loss": 2.1945, + "step": 6521500 + }, + { + "epoch": 32.31, + "learning_rate": 3.3849680196984784e-05, + "loss": 2.2054, + "step": 6522000 + }, + { + "epoch": 32.31, + "learning_rate": 3.38484416105587e-05, + "loss": 2.2139, + "step": 6522500 + }, + { + "epoch": 32.32, + "learning_rate": 3.384720302413262e-05, + "loss": 2.203, + "step": 6523000 + }, + { + "epoch": 32.32, + "learning_rate": 3.3845964437706534e-05, + "loss": 2.2034, + "step": 6523500 + }, + { + "epoch": 32.32, + "learning_rate": 3.384472585128045e-05, + "loss": 2.2103, + "step": 6524000 + }, + { + "epoch": 32.32, + "learning_rate": 3.384348726485437e-05, + "loss": 2.2098, + "step": 6524500 + }, + { + "epoch": 32.33, + "learning_rate": 3.3842248678428285e-05, + "loss": 2.1964, + "step": 6525000 + }, + { + "epoch": 32.33, + "learning_rate": 3.38410100920022e-05, + "loss": 2.1948, + "step": 6525500 + }, + { + "epoch": 32.33, + "learning_rate": 3.383977150557612e-05, + "loss": 2.2176, + "step": 6526000 + }, + { + "epoch": 32.33, + "learning_rate": 3.3838532919150036e-05, + "loss": 2.2094, + "step": 6526500 + }, + { + "epoch": 32.34, + "learning_rate": 3.383729433272395e-05, + "loss": 2.2111, + "step": 6527000 + }, + { + "epoch": 32.34, + "learning_rate": 3.383605574629787e-05, + "loss": 2.2003, + "step": 6527500 + }, + { + "epoch": 32.34, + "learning_rate": 3.383481715987179e-05, + "loss": 2.2115, + "step": 6528000 + }, + { + "epoch": 32.34, + "learning_rate": 3.3833578573445704e-05, + "loss": 2.2292, + "step": 6528500 + }, + { + "epoch": 32.35, + "learning_rate": 3.3832342464192466e-05, + "loss": 2.2087, + "step": 6529000 + }, + { + "epoch": 32.35, + "learning_rate": 3.383110387776638e-05, + "loss": 2.2061, + "step": 6529500 + }, + { + "epoch": 32.35, + "learning_rate": 3.38298652913403e-05, + "loss": 2.2259, + "step": 6530000 + }, + { + "epoch": 32.35, + "learning_rate": 3.3828626704914217e-05, + "loss": 2.203, + "step": 6530500 + }, + { + "epoch": 32.36, + "learning_rate": 3.3827388118488133e-05, + "loss": 2.2353, + "step": 6531000 + }, + { + "epoch": 32.36, + "learning_rate": 3.382614953206205e-05, + "loss": 2.2215, + "step": 6531500 + }, + { + "epoch": 32.36, + "learning_rate": 3.382491342280882e-05, + "loss": 2.2307, + "step": 6532000 + }, + { + "epoch": 32.36, + "learning_rate": 3.3823674836382736e-05, + "loss": 2.2312, + "step": 6532500 + }, + { + "epoch": 32.37, + "learning_rate": 3.382243624995665e-05, + "loss": 2.2237, + "step": 6533000 + }, + { + "epoch": 32.37, + "learning_rate": 3.382119766353057e-05, + "loss": 2.2023, + "step": 6533500 + }, + { + "epoch": 32.37, + "learning_rate": 3.381996155427733e-05, + "loss": 2.2172, + "step": 6534000 + }, + { + "epoch": 32.37, + "learning_rate": 3.381872296785125e-05, + "loss": 2.2087, + "step": 6534500 + }, + { + "epoch": 32.38, + "learning_rate": 3.3817484381425166e-05, + "loss": 2.2205, + "step": 6535000 + }, + { + "epoch": 32.38, + "learning_rate": 3.381624579499908e-05, + "loss": 2.2083, + "step": 6535500 + }, + { + "epoch": 32.38, + "learning_rate": 3.3815007208573e-05, + "loss": 2.2052, + "step": 6536000 + }, + { + "epoch": 32.38, + "learning_rate": 3.381376862214692e-05, + "loss": 2.2252, + "step": 6536500 + }, + { + "epoch": 32.39, + "learning_rate": 3.3812530035720834e-05, + "loss": 2.1906, + "step": 6537000 + }, + { + "epoch": 32.39, + "learning_rate": 3.381129144929475e-05, + "loss": 2.2277, + "step": 6537500 + }, + { + "epoch": 32.39, + "learning_rate": 3.381005286286867e-05, + "loss": 2.2075, + "step": 6538000 + }, + { + "epoch": 32.39, + "learning_rate": 3.3808816753615436e-05, + "loss": 2.2181, + "step": 6538500 + }, + { + "epoch": 32.4, + "learning_rate": 3.380757816718935e-05, + "loss": 2.2153, + "step": 6539000 + }, + { + "epoch": 32.4, + "learning_rate": 3.380634205793612e-05, + "loss": 2.1944, + "step": 6539500 + }, + { + "epoch": 32.4, + "learning_rate": 3.380510842585574e-05, + "loss": 2.2176, + "step": 6540000 + }, + { + "epoch": 32.4, + "learning_rate": 3.380387231660251e-05, + "loss": 2.2233, + "step": 6540500 + }, + { + "epoch": 32.41, + "learning_rate": 3.380263373017643e-05, + "loss": 2.2152, + "step": 6541000 + }, + { + "epoch": 32.41, + "learning_rate": 3.380139514375034e-05, + "loss": 2.2154, + "step": 6541500 + }, + { + "epoch": 32.41, + "learning_rate": 3.3800156557324255e-05, + "loss": 2.2299, + "step": 6542000 + }, + { + "epoch": 32.41, + "learning_rate": 3.379891797089817e-05, + "loss": 2.2184, + "step": 6542500 + }, + { + "epoch": 32.42, + "learning_rate": 3.379767938447209e-05, + "loss": 2.2195, + "step": 6543000 + }, + { + "epoch": 32.42, + "learning_rate": 3.379644327521886e-05, + "loss": 2.2001, + "step": 6543500 + }, + { + "epoch": 32.42, + "learning_rate": 3.3795204688792775e-05, + "loss": 2.2322, + "step": 6544000 + }, + { + "epoch": 32.42, + "learning_rate": 3.379396610236669e-05, + "loss": 2.207, + "step": 6544500 + }, + { + "epoch": 32.43, + "learning_rate": 3.379272751594061e-05, + "loss": 2.2361, + "step": 6545000 + }, + { + "epoch": 32.43, + "learning_rate": 3.3791488929514526e-05, + "loss": 2.2226, + "step": 6545500 + }, + { + "epoch": 32.43, + "learning_rate": 3.379025034308844e-05, + "loss": 2.2174, + "step": 6546000 + }, + { + "epoch": 32.43, + "learning_rate": 3.378901175666236e-05, + "loss": 2.2221, + "step": 6546500 + }, + { + "epoch": 32.44, + "learning_rate": 3.378777317023628e-05, + "loss": 2.1688, + "step": 6547000 + }, + { + "epoch": 32.44, + "learning_rate": 3.3786534583810194e-05, + "loss": 2.2214, + "step": 6547500 + }, + { + "epoch": 32.44, + "learning_rate": 3.378529599738411e-05, + "loss": 2.2168, + "step": 6548000 + }, + { + "epoch": 32.44, + "learning_rate": 3.378405741095803e-05, + "loss": 2.2249, + "step": 6548500 + }, + { + "epoch": 32.45, + "learning_rate": 3.3782818824531944e-05, + "loss": 2.2091, + "step": 6549000 + }, + { + "epoch": 32.45, + "learning_rate": 3.378158023810586e-05, + "loss": 2.2079, + "step": 6549500 + }, + { + "epoch": 32.45, + "learning_rate": 3.378034165167977e-05, + "loss": 2.236, + "step": 6550000 + }, + { + "epoch": 32.45, + "learning_rate": 3.377910306525369e-05, + "loss": 2.219, + "step": 6550500 + }, + { + "epoch": 32.46, + "learning_rate": 3.3777864478827605e-05, + "loss": 2.212, + "step": 6551000 + }, + { + "epoch": 32.46, + "learning_rate": 3.377662589240152e-05, + "loss": 2.2248, + "step": 6551500 + }, + { + "epoch": 32.46, + "learning_rate": 3.377538730597544e-05, + "loss": 2.2177, + "step": 6552000 + }, + { + "epoch": 32.46, + "learning_rate": 3.3774148719549356e-05, + "loss": 2.2114, + "step": 6552500 + }, + { + "epoch": 32.47, + "learning_rate": 3.377291013312327e-05, + "loss": 2.2005, + "step": 6553000 + }, + { + "epoch": 32.47, + "learning_rate": 3.377167154669719e-05, + "loss": 2.2251, + "step": 6553500 + }, + { + "epoch": 32.47, + "learning_rate": 3.37704329602711e-05, + "loss": 2.2158, + "step": 6554000 + }, + { + "epoch": 32.47, + "learning_rate": 3.376919685101787e-05, + "loss": 2.1986, + "step": 6554500 + }, + { + "epoch": 32.48, + "learning_rate": 3.3767960741764644e-05, + "loss": 2.2362, + "step": 6555000 + }, + { + "epoch": 32.48, + "learning_rate": 3.376672215533856e-05, + "loss": 2.2072, + "step": 6555500 + }, + { + "epoch": 32.48, + "learning_rate": 3.3765488523258175e-05, + "loss": 2.2369, + "step": 6556000 + }, + { + "epoch": 32.48, + "learning_rate": 3.376424993683209e-05, + "loss": 2.2121, + "step": 6556500 + }, + { + "epoch": 32.49, + "learning_rate": 3.376301135040601e-05, + "loss": 2.2245, + "step": 6557000 + }, + { + "epoch": 32.49, + "learning_rate": 3.3761772763979926e-05, + "loss": 2.2273, + "step": 6557500 + }, + { + "epoch": 32.49, + "learning_rate": 3.376053417755384e-05, + "loss": 2.2315, + "step": 6558000 + }, + { + "epoch": 32.49, + "learning_rate": 3.375929559112776e-05, + "loss": 2.221, + "step": 6558500 + }, + { + "epoch": 32.5, + "learning_rate": 3.375805700470168e-05, + "loss": 2.2076, + "step": 6559000 + }, + { + "epoch": 32.5, + "learning_rate": 3.3756820895448446e-05, + "loss": 2.2375, + "step": 6559500 + }, + { + "epoch": 32.5, + "learning_rate": 3.375558230902236e-05, + "loss": 2.233, + "step": 6560000 + }, + { + "epoch": 32.5, + "learning_rate": 3.375434372259628e-05, + "loss": 2.2013, + "step": 6560500 + }, + { + "epoch": 32.51, + "learning_rate": 3.3753105136170196e-05, + "loss": 2.224, + "step": 6561000 + }, + { + "epoch": 32.51, + "learning_rate": 3.375186654974411e-05, + "loss": 2.2265, + "step": 6561500 + }, + { + "epoch": 32.51, + "learning_rate": 3.3750627963318024e-05, + "loss": 2.2233, + "step": 6562000 + }, + { + "epoch": 32.51, + "learning_rate": 3.374938937689194e-05, + "loss": 2.22, + "step": 6562500 + }, + { + "epoch": 32.52, + "learning_rate": 3.374815079046586e-05, + "loss": 2.2073, + "step": 6563000 + }, + { + "epoch": 32.52, + "learning_rate": 3.3746912204039774e-05, + "loss": 2.2204, + "step": 6563500 + }, + { + "epoch": 32.52, + "learning_rate": 3.374567609478654e-05, + "loss": 2.1987, + "step": 6564000 + }, + { + "epoch": 32.52, + "learning_rate": 3.374443750836046e-05, + "loss": 2.252, + "step": 6564500 + }, + { + "epoch": 32.53, + "learning_rate": 3.374319892193438e-05, + "loss": 2.2045, + "step": 6565000 + }, + { + "epoch": 32.53, + "learning_rate": 3.3741960335508294e-05, + "loss": 2.2147, + "step": 6565500 + }, + { + "epoch": 32.53, + "learning_rate": 3.374072174908221e-05, + "loss": 2.2299, + "step": 6566000 + }, + { + "epoch": 32.53, + "learning_rate": 3.373948316265613e-05, + "loss": 2.2087, + "step": 6566500 + }, + { + "epoch": 32.54, + "learning_rate": 3.3738244576230045e-05, + "loss": 2.2143, + "step": 6567000 + }, + { + "epoch": 32.54, + "learning_rate": 3.373700598980396e-05, + "loss": 2.2074, + "step": 6567500 + }, + { + "epoch": 32.54, + "learning_rate": 3.373576740337788e-05, + "loss": 2.2112, + "step": 6568000 + }, + { + "epoch": 32.54, + "learning_rate": 3.373453129412464e-05, + "loss": 2.1904, + "step": 6568500 + }, + { + "epoch": 32.55, + "learning_rate": 3.373329518487141e-05, + "loss": 2.1953, + "step": 6569000 + }, + { + "epoch": 32.55, + "learning_rate": 3.3732056598445326e-05, + "loss": 2.2283, + "step": 6569500 + }, + { + "epoch": 32.55, + "learning_rate": 3.373081801201924e-05, + "loss": 2.2039, + "step": 6570000 + }, + { + "epoch": 32.55, + "learning_rate": 3.372957942559316e-05, + "loss": 2.2101, + "step": 6570500 + }, + { + "epoch": 32.56, + "learning_rate": 3.372834083916708e-05, + "loss": 2.2203, + "step": 6571000 + }, + { + "epoch": 32.56, + "learning_rate": 3.3727102252740994e-05, + "loss": 2.1973, + "step": 6571500 + }, + { + "epoch": 32.56, + "learning_rate": 3.372586366631491e-05, + "loss": 2.212, + "step": 6572000 + }, + { + "epoch": 32.56, + "learning_rate": 3.372462755706168e-05, + "loss": 2.2298, + "step": 6572500 + }, + { + "epoch": 32.56, + "learning_rate": 3.37233889706356e-05, + "loss": 2.201, + "step": 6573000 + }, + { + "epoch": 32.57, + "learning_rate": 3.372215286138236e-05, + "loss": 2.2432, + "step": 6573500 + }, + { + "epoch": 32.57, + "learning_rate": 3.3720914274956276e-05, + "loss": 2.2075, + "step": 6574000 + }, + { + "epoch": 32.57, + "learning_rate": 3.371967568853019e-05, + "loss": 2.2052, + "step": 6574500 + }, + { + "epoch": 32.57, + "learning_rate": 3.371843710210411e-05, + "loss": 2.201, + "step": 6575000 + }, + { + "epoch": 32.58, + "learning_rate": 3.3717198515678026e-05, + "loss": 2.2347, + "step": 6575500 + }, + { + "epoch": 32.58, + "learning_rate": 3.3715959929251943e-05, + "loss": 2.2136, + "step": 6576000 + }, + { + "epoch": 32.58, + "learning_rate": 3.371472134282586e-05, + "loss": 2.2285, + "step": 6576500 + }, + { + "epoch": 32.58, + "learning_rate": 3.371348275639978e-05, + "loss": 2.2092, + "step": 6577000 + }, + { + "epoch": 32.59, + "learning_rate": 3.3712244169973694e-05, + "loss": 2.2303, + "step": 6577500 + }, + { + "epoch": 32.59, + "learning_rate": 3.371100806072046e-05, + "loss": 2.2372, + "step": 6578000 + }, + { + "epoch": 32.59, + "learning_rate": 3.370976947429438e-05, + "loss": 2.2142, + "step": 6578500 + }, + { + "epoch": 32.59, + "learning_rate": 3.37085308878683e-05, + "loss": 2.2163, + "step": 6579000 + }, + { + "epoch": 32.6, + "learning_rate": 3.3707292301442214e-05, + "loss": 2.2067, + "step": 6579500 + }, + { + "epoch": 32.6, + "learning_rate": 3.370605371501613e-05, + "loss": 2.2277, + "step": 6580000 + }, + { + "epoch": 32.6, + "learning_rate": 3.370481512859005e-05, + "loss": 2.2055, + "step": 6580500 + }, + { + "epoch": 32.6, + "learning_rate": 3.3703576542163965e-05, + "loss": 2.1938, + "step": 6581000 + }, + { + "epoch": 32.61, + "learning_rate": 3.370233795573788e-05, + "loss": 2.2145, + "step": 6581500 + }, + { + "epoch": 32.61, + "learning_rate": 3.370109936931179e-05, + "loss": 2.1997, + "step": 6582000 + }, + { + "epoch": 32.61, + "learning_rate": 3.369986078288571e-05, + "loss": 2.2109, + "step": 6582500 + }, + { + "epoch": 32.61, + "learning_rate": 3.3698622196459625e-05, + "loss": 2.2218, + "step": 6583000 + }, + { + "epoch": 32.62, + "learning_rate": 3.369738361003354e-05, + "loss": 2.2179, + "step": 6583500 + }, + { + "epoch": 32.62, + "learning_rate": 3.369614502360746e-05, + "loss": 2.2124, + "step": 6584000 + }, + { + "epoch": 32.62, + "learning_rate": 3.369490891435423e-05, + "loss": 2.218, + "step": 6584500 + }, + { + "epoch": 32.62, + "learning_rate": 3.3693670327928145e-05, + "loss": 2.2309, + "step": 6585000 + }, + { + "epoch": 32.63, + "learning_rate": 3.369243174150206e-05, + "loss": 2.1999, + "step": 6585500 + }, + { + "epoch": 32.63, + "learning_rate": 3.369119315507598e-05, + "loss": 2.2156, + "step": 6586000 + }, + { + "epoch": 32.63, + "learning_rate": 3.368995704582275e-05, + "loss": 2.2088, + "step": 6586500 + }, + { + "epoch": 32.63, + "learning_rate": 3.3688718459396665e-05, + "loss": 2.2409, + "step": 6587000 + }, + { + "epoch": 32.64, + "learning_rate": 3.368748235014343e-05, + "loss": 2.2263, + "step": 6587500 + }, + { + "epoch": 32.64, + "learning_rate": 3.3686243763717344e-05, + "loss": 2.2184, + "step": 6588000 + }, + { + "epoch": 32.64, + "learning_rate": 3.368500517729126e-05, + "loss": 2.2329, + "step": 6588500 + }, + { + "epoch": 32.64, + "learning_rate": 3.368376659086518e-05, + "loss": 2.2595, + "step": 6589000 + }, + { + "epoch": 32.65, + "learning_rate": 3.3682528004439094e-05, + "loss": 2.2049, + "step": 6589500 + }, + { + "epoch": 32.65, + "learning_rate": 3.368128941801301e-05, + "loss": 2.2098, + "step": 6590000 + }, + { + "epoch": 32.65, + "learning_rate": 3.368005083158693e-05, + "loss": 2.2009, + "step": 6590500 + }, + { + "epoch": 32.65, + "learning_rate": 3.3678812245160845e-05, + "loss": 2.2141, + "step": 6591000 + }, + { + "epoch": 32.66, + "learning_rate": 3.367757365873476e-05, + "loss": 2.2486, + "step": 6591500 + }, + { + "epoch": 32.66, + "learning_rate": 3.367633507230868e-05, + "loss": 2.1941, + "step": 6592000 + }, + { + "epoch": 32.66, + "learning_rate": 3.3675096485882596e-05, + "loss": 2.2165, + "step": 6592500 + }, + { + "epoch": 32.66, + "learning_rate": 3.367385789945651e-05, + "loss": 2.1934, + "step": 6593000 + }, + { + "epoch": 32.67, + "learning_rate": 3.367261931303042e-05, + "loss": 2.2103, + "step": 6593500 + }, + { + "epoch": 32.67, + "learning_rate": 3.367138072660434e-05, + "loss": 2.2143, + "step": 6594000 + }, + { + "epoch": 32.67, + "learning_rate": 3.367014214017826e-05, + "loss": 2.2099, + "step": 6594500 + }, + { + "epoch": 32.67, + "learning_rate": 3.366890603092503e-05, + "loss": 2.1974, + "step": 6595000 + }, + { + "epoch": 32.68, + "learning_rate": 3.366766744449894e-05, + "loss": 2.2372, + "step": 6595500 + }, + { + "epoch": 32.68, + "learning_rate": 3.366643133524571e-05, + "loss": 2.2234, + "step": 6596000 + }, + { + "epoch": 32.68, + "learning_rate": 3.366519274881963e-05, + "loss": 2.2023, + "step": 6596500 + }, + { + "epoch": 32.68, + "learning_rate": 3.3663954162393545e-05, + "loss": 2.2142, + "step": 6597000 + }, + { + "epoch": 32.69, + "learning_rate": 3.366271557596746e-05, + "loss": 2.2005, + "step": 6597500 + }, + { + "epoch": 32.69, + "learning_rate": 3.366147698954138e-05, + "loss": 2.219, + "step": 6598000 + }, + { + "epoch": 32.69, + "learning_rate": 3.366024088028815e-05, + "loss": 2.2181, + "step": 6598500 + }, + { + "epoch": 32.69, + "learning_rate": 3.3659002293862065e-05, + "loss": 2.2129, + "step": 6599000 + }, + { + "epoch": 32.7, + "learning_rate": 3.365776618460883e-05, + "loss": 2.2157, + "step": 6599500 + }, + { + "epoch": 32.7, + "learning_rate": 3.3656527598182744e-05, + "loss": 2.2164, + "step": 6600000 + }, + { + "epoch": 32.7, + "learning_rate": 3.365528901175666e-05, + "loss": 2.1974, + "step": 6600500 + }, + { + "epoch": 32.7, + "learning_rate": 3.365405042533058e-05, + "loss": 2.2306, + "step": 6601000 + }, + { + "epoch": 32.71, + "learning_rate": 3.3652814316077347e-05, + "loss": 2.2077, + "step": 6601500 + }, + { + "epoch": 32.71, + "learning_rate": 3.3651575729651263e-05, + "loss": 2.2149, + "step": 6602000 + }, + { + "epoch": 32.71, + "learning_rate": 3.365033714322518e-05, + "loss": 2.2067, + "step": 6602500 + }, + { + "epoch": 32.71, + "learning_rate": 3.36490985567991e-05, + "loss": 2.2259, + "step": 6603000 + }, + { + "epoch": 32.72, + "learning_rate": 3.3647859970373014e-05, + "loss": 2.2163, + "step": 6603500 + }, + { + "epoch": 32.72, + "learning_rate": 3.364662138394693e-05, + "loss": 2.2088, + "step": 6604000 + }, + { + "epoch": 32.72, + "learning_rate": 3.364538279752085e-05, + "loss": 2.2304, + "step": 6604500 + }, + { + "epoch": 32.72, + "learning_rate": 3.3644144211094765e-05, + "loss": 2.2071, + "step": 6605000 + }, + { + "epoch": 32.73, + "learning_rate": 3.364290562466868e-05, + "loss": 2.2017, + "step": 6605500 + }, + { + "epoch": 32.73, + "learning_rate": 3.36416670382426e-05, + "loss": 2.2221, + "step": 6606000 + }, + { + "epoch": 32.73, + "learning_rate": 3.3640428451816516e-05, + "loss": 2.2516, + "step": 6606500 + }, + { + "epoch": 32.73, + "learning_rate": 3.363918986539043e-05, + "loss": 2.2249, + "step": 6607000 + }, + { + "epoch": 32.74, + "learning_rate": 3.363795127896435e-05, + "loss": 2.2237, + "step": 6607500 + }, + { + "epoch": 32.74, + "learning_rate": 3.363671269253827e-05, + "loss": 2.2048, + "step": 6608000 + }, + { + "epoch": 32.74, + "learning_rate": 3.3635474106112184e-05, + "loss": 2.2148, + "step": 6608500 + }, + { + "epoch": 32.74, + "learning_rate": 3.3634235519686094e-05, + "loss": 2.2256, + "step": 6609000 + }, + { + "epoch": 32.75, + "learning_rate": 3.363299693326001e-05, + "loss": 2.2463, + "step": 6609500 + }, + { + "epoch": 32.75, + "learning_rate": 3.363175834683393e-05, + "loss": 2.2072, + "step": 6610000 + }, + { + "epoch": 32.75, + "learning_rate": 3.3630519760407844e-05, + "loss": 2.2111, + "step": 6610500 + }, + { + "epoch": 32.75, + "learning_rate": 3.362928365115461e-05, + "loss": 2.2146, + "step": 6611000 + }, + { + "epoch": 32.76, + "learning_rate": 3.362804506472853e-05, + "loss": 2.252, + "step": 6611500 + }, + { + "epoch": 32.76, + "learning_rate": 3.362680647830245e-05, + "loss": 2.2347, + "step": 6612000 + }, + { + "epoch": 32.76, + "learning_rate": 3.362556789187636e-05, + "loss": 2.2109, + "step": 6612500 + }, + { + "epoch": 32.76, + "learning_rate": 3.3624329305450274e-05, + "loss": 2.2187, + "step": 6613000 + }, + { + "epoch": 32.77, + "learning_rate": 3.362309319619705e-05, + "loss": 2.2218, + "step": 6613500 + }, + { + "epoch": 32.77, + "learning_rate": 3.362185460977097e-05, + "loss": 2.226, + "step": 6614000 + }, + { + "epoch": 32.77, + "learning_rate": 3.3620616023344884e-05, + "loss": 2.2319, + "step": 6614500 + }, + { + "epoch": 32.77, + "learning_rate": 3.36193774369188e-05, + "loss": 2.2312, + "step": 6615000 + }, + { + "epoch": 32.78, + "learning_rate": 3.361814132766556e-05, + "loss": 2.224, + "step": 6615500 + }, + { + "epoch": 32.78, + "learning_rate": 3.361690274123948e-05, + "loss": 2.242, + "step": 6616000 + }, + { + "epoch": 32.78, + "learning_rate": 3.3615664154813396e-05, + "loss": 2.2007, + "step": 6616500 + }, + { + "epoch": 32.78, + "learning_rate": 3.361443052273302e-05, + "loss": 2.2512, + "step": 6617000 + }, + { + "epoch": 32.79, + "learning_rate": 3.3613191936306934e-05, + "loss": 2.2141, + "step": 6617500 + }, + { + "epoch": 32.79, + "learning_rate": 3.361195334988085e-05, + "loss": 2.221, + "step": 6618000 + }, + { + "epoch": 32.79, + "learning_rate": 3.361071476345477e-05, + "loss": 2.2071, + "step": 6618500 + }, + { + "epoch": 32.79, + "learning_rate": 3.360947617702868e-05, + "loss": 2.2425, + "step": 6619000 + }, + { + "epoch": 32.8, + "learning_rate": 3.3608237590602595e-05, + "loss": 2.223, + "step": 6619500 + }, + { + "epoch": 32.8, + "learning_rate": 3.360699900417651e-05, + "loss": 2.2144, + "step": 6620000 + }, + { + "epoch": 32.8, + "learning_rate": 3.360576041775043e-05, + "loss": 2.2007, + "step": 6620500 + }, + { + "epoch": 32.8, + "learning_rate": 3.3604521831324346e-05, + "loss": 2.204, + "step": 6621000 + }, + { + "epoch": 32.81, + "learning_rate": 3.3603285722071115e-05, + "loss": 2.2528, + "step": 6621500 + }, + { + "epoch": 32.81, + "learning_rate": 3.360204713564503e-05, + "loss": 2.21, + "step": 6622000 + }, + { + "epoch": 32.81, + "learning_rate": 3.360080854921895e-05, + "loss": 2.2036, + "step": 6622500 + }, + { + "epoch": 32.81, + "learning_rate": 3.3599569962792865e-05, + "loss": 2.2467, + "step": 6623000 + }, + { + "epoch": 32.82, + "learning_rate": 3.3598336330712486e-05, + "loss": 2.214, + "step": 6623500 + }, + { + "epoch": 32.82, + "learning_rate": 3.35970977442864e-05, + "loss": 2.2221, + "step": 6624000 + }, + { + "epoch": 32.82, + "learning_rate": 3.359585915786032e-05, + "loss": 2.2069, + "step": 6624500 + }, + { + "epoch": 32.82, + "learning_rate": 3.359462057143424e-05, + "loss": 2.228, + "step": 6625000 + }, + { + "epoch": 32.83, + "learning_rate": 3.359338198500815e-05, + "loss": 2.2318, + "step": 6625500 + }, + { + "epoch": 32.83, + "learning_rate": 3.359214587575492e-05, + "loss": 2.2248, + "step": 6626000 + }, + { + "epoch": 32.83, + "learning_rate": 3.359090728932884e-05, + "loss": 2.2005, + "step": 6626500 + }, + { + "epoch": 32.83, + "learning_rate": 3.3589673657248453e-05, + "loss": 2.2361, + "step": 6627000 + }, + { + "epoch": 32.83, + "learning_rate": 3.358843507082237e-05, + "loss": 2.2029, + "step": 6627500 + }, + { + "epoch": 32.84, + "learning_rate": 3.358719648439629e-05, + "loss": 2.2289, + "step": 6628000 + }, + { + "epoch": 32.84, + "learning_rate": 3.3585957897970204e-05, + "loss": 2.2083, + "step": 6628500 + }, + { + "epoch": 32.84, + "learning_rate": 3.358471931154412e-05, + "loss": 2.2152, + "step": 6629000 + }, + { + "epoch": 32.84, + "learning_rate": 3.358348072511804e-05, + "loss": 2.2215, + "step": 6629500 + }, + { + "epoch": 32.85, + "learning_rate": 3.3582242138691955e-05, + "loss": 2.1913, + "step": 6630000 + }, + { + "epoch": 32.85, + "learning_rate": 3.358100355226587e-05, + "loss": 2.2186, + "step": 6630500 + }, + { + "epoch": 32.85, + "learning_rate": 3.357976496583979e-05, + "loss": 2.2363, + "step": 6631000 + }, + { + "epoch": 32.85, + "learning_rate": 3.3578526379413706e-05, + "loss": 2.2145, + "step": 6631500 + }, + { + "epoch": 32.86, + "learning_rate": 3.357728779298762e-05, + "loss": 2.2131, + "step": 6632000 + }, + { + "epoch": 32.86, + "learning_rate": 3.357604920656154e-05, + "loss": 2.2287, + "step": 6632500 + }, + { + "epoch": 32.86, + "learning_rate": 3.357481062013546e-05, + "loss": 2.2282, + "step": 6633000 + }, + { + "epoch": 32.86, + "learning_rate": 3.3573572033709374e-05, + "loss": 2.222, + "step": 6633500 + }, + { + "epoch": 32.87, + "learning_rate": 3.357233344728329e-05, + "loss": 2.2288, + "step": 6634000 + }, + { + "epoch": 32.87, + "learning_rate": 3.357109486085721e-05, + "loss": 2.2067, + "step": 6634500 + }, + { + "epoch": 32.87, + "learning_rate": 3.356985627443112e-05, + "loss": 2.2245, + "step": 6635000 + }, + { + "epoch": 32.87, + "learning_rate": 3.3568620165177886e-05, + "loss": 2.2202, + "step": 6635500 + }, + { + "epoch": 32.88, + "learning_rate": 3.35673815787518e-05, + "loss": 2.2116, + "step": 6636000 + }, + { + "epoch": 32.88, + "learning_rate": 3.356614546949857e-05, + "loss": 2.2298, + "step": 6636500 + }, + { + "epoch": 32.88, + "learning_rate": 3.356490688307249e-05, + "loss": 2.2243, + "step": 6637000 + }, + { + "epoch": 32.88, + "learning_rate": 3.3563668296646406e-05, + "loss": 2.2296, + "step": 6637500 + }, + { + "epoch": 32.89, + "learning_rate": 3.356242971022032e-05, + "loss": 2.2225, + "step": 6638000 + }, + { + "epoch": 32.89, + "learning_rate": 3.356119112379424e-05, + "loss": 2.1984, + "step": 6638500 + }, + { + "epoch": 32.89, + "learning_rate": 3.355995253736816e-05, + "loss": 2.2297, + "step": 6639000 + }, + { + "epoch": 32.89, + "learning_rate": 3.3558713950942074e-05, + "loss": 2.2113, + "step": 6639500 + }, + { + "epoch": 32.9, + "learning_rate": 3.355747536451599e-05, + "loss": 2.2124, + "step": 6640000 + }, + { + "epoch": 32.9, + "learning_rate": 3.355623677808991e-05, + "loss": 2.213, + "step": 6640500 + }, + { + "epoch": 32.9, + "learning_rate": 3.355500066883667e-05, + "loss": 2.1908, + "step": 6641000 + }, + { + "epoch": 32.9, + "learning_rate": 3.3553762082410586e-05, + "loss": 2.2046, + "step": 6641500 + }, + { + "epoch": 32.91, + "learning_rate": 3.3552523495984503e-05, + "loss": 2.2032, + "step": 6642000 + }, + { + "epoch": 32.91, + "learning_rate": 3.355128738673127e-05, + "loss": 2.1987, + "step": 6642500 + }, + { + "epoch": 32.91, + "learning_rate": 3.355004880030519e-05, + "loss": 2.2311, + "step": 6643000 + }, + { + "epoch": 32.91, + "learning_rate": 3.3548810213879106e-05, + "loss": 2.2356, + "step": 6643500 + }, + { + "epoch": 32.92, + "learning_rate": 3.354757162745302e-05, + "loss": 2.2262, + "step": 6644000 + }, + { + "epoch": 32.92, + "learning_rate": 3.354633304102694e-05, + "loss": 2.196, + "step": 6644500 + }, + { + "epoch": 32.92, + "learning_rate": 3.354509445460086e-05, + "loss": 2.2177, + "step": 6645000 + }, + { + "epoch": 32.92, + "learning_rate": 3.3543855868174774e-05, + "loss": 2.2002, + "step": 6645500 + }, + { + "epoch": 32.93, + "learning_rate": 3.354261728174869e-05, + "loss": 2.2402, + "step": 6646000 + }, + { + "epoch": 32.93, + "learning_rate": 3.354137869532261e-05, + "loss": 2.2201, + "step": 6646500 + }, + { + "epoch": 32.93, + "learning_rate": 3.3540140108896525e-05, + "loss": 2.2249, + "step": 6647000 + }, + { + "epoch": 32.93, + "learning_rate": 3.3538901522470435e-05, + "loss": 2.2081, + "step": 6647500 + }, + { + "epoch": 32.94, + "learning_rate": 3.3537665413217204e-05, + "loss": 2.2199, + "step": 6648000 + }, + { + "epoch": 32.94, + "learning_rate": 3.353642682679112e-05, + "loss": 2.2042, + "step": 6648500 + }, + { + "epoch": 32.94, + "learning_rate": 3.353518824036504e-05, + "loss": 2.2251, + "step": 6649000 + }, + { + "epoch": 32.94, + "learning_rate": 3.3533952131111806e-05, + "loss": 2.2405, + "step": 6649500 + }, + { + "epoch": 32.95, + "learning_rate": 3.353271354468572e-05, + "loss": 2.2288, + "step": 6650000 + }, + { + "epoch": 32.95, + "learning_rate": 3.353147495825964e-05, + "loss": 2.2055, + "step": 6650500 + }, + { + "epoch": 32.95, + "learning_rate": 3.353023637183356e-05, + "loss": 2.2243, + "step": 6651000 + }, + { + "epoch": 32.95, + "learning_rate": 3.3528997785407474e-05, + "loss": 2.2378, + "step": 6651500 + }, + { + "epoch": 32.96, + "learning_rate": 3.352775919898139e-05, + "loss": 2.2031, + "step": 6652000 + }, + { + "epoch": 32.96, + "learning_rate": 3.352652061255531e-05, + "loss": 2.2246, + "step": 6652500 + }, + { + "epoch": 32.96, + "learning_rate": 3.3525282026129225e-05, + "loss": 2.1836, + "step": 6653000 + }, + { + "epoch": 32.96, + "learning_rate": 3.3524045916875994e-05, + "loss": 2.2215, + "step": 6653500 + }, + { + "epoch": 32.97, + "learning_rate": 3.3522807330449904e-05, + "loss": 2.2132, + "step": 6654000 + }, + { + "epoch": 32.97, + "learning_rate": 3.352157122119667e-05, + "loss": 2.2263, + "step": 6654500 + }, + { + "epoch": 32.97, + "learning_rate": 3.352033511194344e-05, + "loss": 2.2455, + "step": 6655000 + }, + { + "epoch": 32.97, + "learning_rate": 3.351909652551736e-05, + "loss": 2.2298, + "step": 6655500 + }, + { + "epoch": 32.98, + "learning_rate": 3.3517857939091275e-05, + "loss": 2.2223, + "step": 6656000 + }, + { + "epoch": 32.98, + "learning_rate": 3.351661935266519e-05, + "loss": 2.2317, + "step": 6656500 + }, + { + "epoch": 32.98, + "learning_rate": 3.351538324341196e-05, + "loss": 2.2343, + "step": 6657000 + }, + { + "epoch": 32.98, + "learning_rate": 3.351414465698587e-05, + "loss": 2.2235, + "step": 6657500 + }, + { + "epoch": 32.99, + "learning_rate": 3.351290607055979e-05, + "loss": 2.2268, + "step": 6658000 + }, + { + "epoch": 32.99, + "learning_rate": 3.3511667484133705e-05, + "loss": 2.2339, + "step": 6658500 + }, + { + "epoch": 32.99, + "learning_rate": 3.351042889770762e-05, + "loss": 2.2136, + "step": 6659000 + }, + { + "epoch": 32.99, + "learning_rate": 3.350919031128154e-05, + "loss": 2.1893, + "step": 6659500 + }, + { + "epoch": 33.0, + "learning_rate": 3.3507951724855456e-05, + "loss": 2.2058, + "step": 6660000 + }, + { + "epoch": 33.0, + "learning_rate": 3.350671313842937e-05, + "loss": 2.2326, + "step": 6660500 + }, + { + "epoch": 33.0, + "eval_accuracy": 0.660817023764832, + "eval_accuracy_mlm": 0.6167854855212918, + "eval_accuracy_nsp": 0.8684219815735079, + "eval_loss": 2.316751480102539, + "eval_runtime": 145.7034, + "eval_samples_per_second": 1749.849, + "eval_steps_per_second": 72.915, + "step": 6660819 + }, + { + "epoch": 33.0, + "learning_rate": 3.350547455200329e-05, + "loss": 2.1953, + "step": 6661000 + }, + { + "epoch": 33.0, + "learning_rate": 3.3504235965577206e-05, + "loss": 2.1906, + "step": 6661500 + }, + { + "epoch": 33.01, + "learning_rate": 3.350299737915112e-05, + "loss": 2.1715, + "step": 6662000 + }, + { + "epoch": 33.01, + "learning_rate": 3.350175879272504e-05, + "loss": 2.1679, + "step": 6662500 + }, + { + "epoch": 33.01, + "learning_rate": 3.350052020629896e-05, + "loss": 2.2096, + "step": 6663000 + }, + { + "epoch": 33.01, + "learning_rate": 3.3499281619872874e-05, + "loss": 2.1684, + "step": 6663500 + }, + { + "epoch": 33.02, + "learning_rate": 3.349804551061964e-05, + "loss": 2.1789, + "step": 6664000 + }, + { + "epoch": 33.02, + "learning_rate": 3.349680692419356e-05, + "loss": 2.1883, + "step": 6664500 + }, + { + "epoch": 33.02, + "learning_rate": 3.349557081494032e-05, + "loss": 2.1976, + "step": 6665000 + }, + { + "epoch": 33.02, + "learning_rate": 3.349433222851424e-05, + "loss": 2.1609, + "step": 6665500 + }, + { + "epoch": 33.03, + "learning_rate": 3.3493093642088156e-05, + "loss": 2.1907, + "step": 6666000 + }, + { + "epoch": 33.03, + "learning_rate": 3.349185505566207e-05, + "loss": 2.1754, + "step": 6666500 + }, + { + "epoch": 33.03, + "learning_rate": 3.349061894640884e-05, + "loss": 2.1871, + "step": 6667000 + }, + { + "epoch": 33.03, + "learning_rate": 3.348938035998276e-05, + "loss": 2.1858, + "step": 6667500 + }, + { + "epoch": 33.04, + "learning_rate": 3.348814425072953e-05, + "loss": 2.1992, + "step": 6668000 + }, + { + "epoch": 33.04, + "learning_rate": 3.3486905664303444e-05, + "loss": 2.1737, + "step": 6668500 + }, + { + "epoch": 33.04, + "learning_rate": 3.348566707787736e-05, + "loss": 2.1944, + "step": 6669000 + }, + { + "epoch": 33.04, + "learning_rate": 3.348442849145128e-05, + "loss": 2.1859, + "step": 6669500 + }, + { + "epoch": 33.05, + "learning_rate": 3.348319238219805e-05, + "loss": 2.1786, + "step": 6670000 + }, + { + "epoch": 33.05, + "learning_rate": 3.3481953795771964e-05, + "loss": 2.1959, + "step": 6670500 + }, + { + "epoch": 33.05, + "learning_rate": 3.348071520934588e-05, + "loss": 2.2086, + "step": 6671000 + }, + { + "epoch": 33.05, + "learning_rate": 3.34794766229198e-05, + "loss": 2.1758, + "step": 6671500 + }, + { + "epoch": 33.06, + "learning_rate": 3.3478238036493715e-05, + "loss": 2.1904, + "step": 6672000 + }, + { + "epoch": 33.06, + "learning_rate": 3.347699945006763e-05, + "loss": 2.1964, + "step": 6672500 + }, + { + "epoch": 33.06, + "learning_rate": 3.347576086364154e-05, + "loss": 2.1856, + "step": 6673000 + }, + { + "epoch": 33.06, + "learning_rate": 3.347452227721546e-05, + "loss": 2.1881, + "step": 6673500 + }, + { + "epoch": 33.07, + "learning_rate": 3.3473283690789376e-05, + "loss": 2.1798, + "step": 6674000 + }, + { + "epoch": 33.07, + "learning_rate": 3.347204510436329e-05, + "loss": 2.1917, + "step": 6674500 + }, + { + "epoch": 33.07, + "learning_rate": 3.347080651793721e-05, + "loss": 2.1751, + "step": 6675000 + }, + { + "epoch": 33.07, + "learning_rate": 3.3469567931511126e-05, + "loss": 2.2065, + "step": 6675500 + }, + { + "epoch": 33.08, + "learning_rate": 3.346832934508504e-05, + "loss": 2.1857, + "step": 6676000 + }, + { + "epoch": 33.08, + "learning_rate": 3.346709075865896e-05, + "loss": 2.1952, + "step": 6676500 + }, + { + "epoch": 33.08, + "learning_rate": 3.346585217223288e-05, + "loss": 2.1908, + "step": 6677000 + }, + { + "epoch": 33.08, + "learning_rate": 3.3464613585806794e-05, + "loss": 2.1971, + "step": 6677500 + }, + { + "epoch": 33.09, + "learning_rate": 3.346337499938071e-05, + "loss": 2.194, + "step": 6678000 + }, + { + "epoch": 33.09, + "learning_rate": 3.346213889012747e-05, + "loss": 2.164, + "step": 6678500 + }, + { + "epoch": 33.09, + "learning_rate": 3.346090030370139e-05, + "loss": 2.1589, + "step": 6679000 + }, + { + "epoch": 33.09, + "learning_rate": 3.345966171727531e-05, + "loss": 2.216, + "step": 6679500 + }, + { + "epoch": 33.1, + "learning_rate": 3.3458423130849224e-05, + "loss": 2.1777, + "step": 6680000 + }, + { + "epoch": 33.1, + "learning_rate": 3.345718454442314e-05, + "loss": 2.2036, + "step": 6680500 + }, + { + "epoch": 33.1, + "learning_rate": 3.345594595799706e-05, + "loss": 2.1821, + "step": 6681000 + }, + { + "epoch": 33.1, + "learning_rate": 3.3454707371570975e-05, + "loss": 2.1714, + "step": 6681500 + }, + { + "epoch": 33.1, + "learning_rate": 3.345346878514489e-05, + "loss": 2.199, + "step": 6682000 + }, + { + "epoch": 33.11, + "learning_rate": 3.345223019871881e-05, + "loss": 2.1905, + "step": 6682500 + }, + { + "epoch": 33.11, + "learning_rate": 3.3450991612292725e-05, + "loss": 2.1875, + "step": 6683000 + }, + { + "epoch": 33.11, + "learning_rate": 3.344975302586664e-05, + "loss": 2.1735, + "step": 6683500 + }, + { + "epoch": 33.11, + "learning_rate": 3.344851443944056e-05, + "loss": 2.2194, + "step": 6684000 + }, + { + "epoch": 33.12, + "learning_rate": 3.3447275853014476e-05, + "loss": 2.1537, + "step": 6684500 + }, + { + "epoch": 33.12, + "learning_rate": 3.3446037266588386e-05, + "loss": 2.209, + "step": 6685000 + }, + { + "epoch": 33.12, + "learning_rate": 3.344480115733516e-05, + "loss": 2.1662, + "step": 6685500 + }, + { + "epoch": 33.12, + "learning_rate": 3.344356257090908e-05, + "loss": 2.1802, + "step": 6686000 + }, + { + "epoch": 33.13, + "learning_rate": 3.344232646165584e-05, + "loss": 2.211, + "step": 6686500 + }, + { + "epoch": 33.13, + "learning_rate": 3.344108787522976e-05, + "loss": 2.1945, + "step": 6687000 + }, + { + "epoch": 33.13, + "learning_rate": 3.3439849288803675e-05, + "loss": 2.1963, + "step": 6687500 + }, + { + "epoch": 33.13, + "learning_rate": 3.343861070237759e-05, + "loss": 2.1963, + "step": 6688000 + }, + { + "epoch": 33.14, + "learning_rate": 3.343737211595151e-05, + "loss": 2.1822, + "step": 6688500 + }, + { + "epoch": 33.14, + "learning_rate": 3.3436133529525425e-05, + "loss": 2.1843, + "step": 6689000 + }, + { + "epoch": 33.14, + "learning_rate": 3.343489494309934e-05, + "loss": 2.1904, + "step": 6689500 + }, + { + "epoch": 33.14, + "learning_rate": 3.343365635667326e-05, + "loss": 2.1861, + "step": 6690000 + }, + { + "epoch": 33.15, + "learning_rate": 3.3432417770247176e-05, + "loss": 2.2003, + "step": 6690500 + }, + { + "epoch": 33.15, + "learning_rate": 3.3431181660993945e-05, + "loss": 2.1901, + "step": 6691000 + }, + { + "epoch": 33.15, + "learning_rate": 3.342994307456786e-05, + "loss": 2.2043, + "step": 6691500 + }, + { + "epoch": 33.15, + "learning_rate": 3.342870448814178e-05, + "loss": 2.198, + "step": 6692000 + }, + { + "epoch": 33.16, + "learning_rate": 3.342746837888854e-05, + "loss": 2.1838, + "step": 6692500 + }, + { + "epoch": 33.16, + "learning_rate": 3.342622979246246e-05, + "loss": 2.196, + "step": 6693000 + }, + { + "epoch": 33.16, + "learning_rate": 3.342499368320923e-05, + "loss": 2.1899, + "step": 6693500 + }, + { + "epoch": 33.16, + "learning_rate": 3.3423757573956e-05, + "loss": 2.1982, + "step": 6694000 + }, + { + "epoch": 33.17, + "learning_rate": 3.342251898752992e-05, + "loss": 2.1961, + "step": 6694500 + }, + { + "epoch": 33.17, + "learning_rate": 3.342128040110383e-05, + "loss": 2.2007, + "step": 6695000 + }, + { + "epoch": 33.17, + "learning_rate": 3.3420041814677746e-05, + "loss": 2.1791, + "step": 6695500 + }, + { + "epoch": 33.17, + "learning_rate": 3.341880322825166e-05, + "loss": 2.2048, + "step": 6696000 + }, + { + "epoch": 33.18, + "learning_rate": 3.341756464182558e-05, + "loss": 2.197, + "step": 6696500 + }, + { + "epoch": 33.18, + "learning_rate": 3.34163260553995e-05, + "loss": 2.2261, + "step": 6697000 + }, + { + "epoch": 33.18, + "learning_rate": 3.3415089946146266e-05, + "loss": 2.1916, + "step": 6697500 + }, + { + "epoch": 33.18, + "learning_rate": 3.3413851359720176e-05, + "loss": 2.1948, + "step": 6698000 + }, + { + "epoch": 33.19, + "learning_rate": 3.341261277329409e-05, + "loss": 2.1977, + "step": 6698500 + }, + { + "epoch": 33.19, + "learning_rate": 3.341137418686801e-05, + "loss": 2.1964, + "step": 6699000 + }, + { + "epoch": 33.19, + "learning_rate": 3.341013560044193e-05, + "loss": 2.2078, + "step": 6699500 + }, + { + "epoch": 33.19, + "learning_rate": 3.3408897014015844e-05, + "loss": 2.1995, + "step": 6700000 + }, + { + "epoch": 33.2, + "learning_rate": 3.340765842758976e-05, + "loss": 2.1763, + "step": 6700500 + }, + { + "epoch": 33.2, + "learning_rate": 3.340641984116368e-05, + "loss": 2.1989, + "step": 6701000 + }, + { + "epoch": 33.2, + "learning_rate": 3.3405181254737595e-05, + "loss": 2.1885, + "step": 6701500 + }, + { + "epoch": 33.2, + "learning_rate": 3.340394266831151e-05, + "loss": 2.1716, + "step": 6702000 + }, + { + "epoch": 33.21, + "learning_rate": 3.340270408188543e-05, + "loss": 2.2069, + "step": 6702500 + }, + { + "epoch": 33.21, + "learning_rate": 3.3401465495459345e-05, + "loss": 2.1995, + "step": 6703000 + }, + { + "epoch": 33.21, + "learning_rate": 3.3400229386206114e-05, + "loss": 2.2251, + "step": 6703500 + }, + { + "epoch": 33.21, + "learning_rate": 3.339899079978003e-05, + "loss": 2.2079, + "step": 6704000 + }, + { + "epoch": 33.22, + "learning_rate": 3.339775221335394e-05, + "loss": 2.1914, + "step": 6704500 + }, + { + "epoch": 33.22, + "learning_rate": 3.339651610410071e-05, + "loss": 2.1867, + "step": 6705000 + }, + { + "epoch": 33.22, + "learning_rate": 3.339527751767463e-05, + "loss": 2.2132, + "step": 6705500 + }, + { + "epoch": 33.22, + "learning_rate": 3.33940414084214e-05, + "loss": 2.1824, + "step": 6706000 + }, + { + "epoch": 33.23, + "learning_rate": 3.339280282199532e-05, + "loss": 2.1999, + "step": 6706500 + }, + { + "epoch": 33.23, + "learning_rate": 3.3391564235569236e-05, + "loss": 2.1785, + "step": 6707000 + }, + { + "epoch": 33.23, + "learning_rate": 3.3390325649143147e-05, + "loss": 2.2085, + "step": 6707500 + }, + { + "epoch": 33.23, + "learning_rate": 3.3389087062717063e-05, + "loss": 2.2031, + "step": 6708000 + }, + { + "epoch": 33.24, + "learning_rate": 3.338784847629098e-05, + "loss": 2.181, + "step": 6708500 + }, + { + "epoch": 33.24, + "learning_rate": 3.33866098898649e-05, + "loss": 2.2132, + "step": 6709000 + }, + { + "epoch": 33.24, + "learning_rate": 3.3385371303438814e-05, + "loss": 2.1918, + "step": 6709500 + }, + { + "epoch": 33.24, + "learning_rate": 3.338413271701273e-05, + "loss": 2.1732, + "step": 6710000 + }, + { + "epoch": 33.25, + "learning_rate": 3.338289413058665e-05, + "loss": 2.1797, + "step": 6710500 + }, + { + "epoch": 33.25, + "learning_rate": 3.3381655544160565e-05, + "loss": 2.2188, + "step": 6711000 + }, + { + "epoch": 33.25, + "learning_rate": 3.3380416957734475e-05, + "loss": 2.1945, + "step": 6711500 + }, + { + "epoch": 33.25, + "learning_rate": 3.3379180848481244e-05, + "loss": 2.1775, + "step": 6712000 + }, + { + "epoch": 33.26, + "learning_rate": 3.337794226205516e-05, + "loss": 2.2078, + "step": 6712500 + }, + { + "epoch": 33.26, + "learning_rate": 3.337670367562908e-05, + "loss": 2.1919, + "step": 6713000 + }, + { + "epoch": 33.26, + "learning_rate": 3.3375465089202995e-05, + "loss": 2.2168, + "step": 6713500 + }, + { + "epoch": 33.26, + "learning_rate": 3.337422650277691e-05, + "loss": 2.1926, + "step": 6714000 + }, + { + "epoch": 33.27, + "learning_rate": 3.337299039352368e-05, + "loss": 2.2111, + "step": 6714500 + }, + { + "epoch": 33.27, + "learning_rate": 3.33717518070976e-05, + "loss": 2.1717, + "step": 6715000 + }, + { + "epoch": 33.27, + "learning_rate": 3.3370515697844366e-05, + "loss": 2.1892, + "step": 6715500 + }, + { + "epoch": 33.27, + "learning_rate": 3.336927711141828e-05, + "loss": 2.1819, + "step": 6716000 + }, + { + "epoch": 33.28, + "learning_rate": 3.33680385249922e-05, + "loss": 2.1656, + "step": 6716500 + }, + { + "epoch": 33.28, + "learning_rate": 3.336680241573897e-05, + "loss": 2.1817, + "step": 6717000 + }, + { + "epoch": 33.28, + "learning_rate": 3.3365563829312886e-05, + "loss": 2.2084, + "step": 6717500 + }, + { + "epoch": 33.28, + "learning_rate": 3.33643252428868e-05, + "loss": 2.1876, + "step": 6718000 + }, + { + "epoch": 33.29, + "learning_rate": 3.336308665646072e-05, + "loss": 2.2138, + "step": 6718500 + }, + { + "epoch": 33.29, + "learning_rate": 3.3361848070034637e-05, + "loss": 2.2026, + "step": 6719000 + }, + { + "epoch": 33.29, + "learning_rate": 3.3360609483608554e-05, + "loss": 2.1882, + "step": 6719500 + }, + { + "epoch": 33.29, + "learning_rate": 3.3359370897182464e-05, + "loss": 2.2171, + "step": 6720000 + }, + { + "epoch": 33.3, + "learning_rate": 3.335813231075638e-05, + "loss": 2.1742, + "step": 6720500 + }, + { + "epoch": 33.3, + "learning_rate": 3.33568937243303e-05, + "loss": 2.2045, + "step": 6721000 + }, + { + "epoch": 33.3, + "learning_rate": 3.3355655137904214e-05, + "loss": 2.2029, + "step": 6721500 + }, + { + "epoch": 33.3, + "learning_rate": 3.335441655147813e-05, + "loss": 2.1792, + "step": 6722000 + }, + { + "epoch": 33.31, + "learning_rate": 3.335317796505205e-05, + "loss": 2.1961, + "step": 6722500 + }, + { + "epoch": 33.31, + "learning_rate": 3.3351939378625965e-05, + "loss": 2.2094, + "step": 6723000 + }, + { + "epoch": 33.31, + "learning_rate": 3.335070079219988e-05, + "loss": 2.2066, + "step": 6723500 + }, + { + "epoch": 33.31, + "learning_rate": 3.33494622057738e-05, + "loss": 2.1974, + "step": 6724000 + }, + { + "epoch": 33.32, + "learning_rate": 3.3348223619347716e-05, + "loss": 2.1959, + "step": 6724500 + }, + { + "epoch": 33.32, + "learning_rate": 3.334698998726734e-05, + "loss": 2.2206, + "step": 6725000 + }, + { + "epoch": 33.32, + "learning_rate": 3.3345751400841254e-05, + "loss": 2.1876, + "step": 6725500 + }, + { + "epoch": 33.32, + "learning_rate": 3.334451281441517e-05, + "loss": 2.1978, + "step": 6726000 + }, + { + "epoch": 33.33, + "learning_rate": 3.334327422798908e-05, + "loss": 2.1713, + "step": 6726500 + }, + { + "epoch": 33.33, + "learning_rate": 3.3342035641563e-05, + "loss": 2.2117, + "step": 6727000 + }, + { + "epoch": 33.33, + "learning_rate": 3.3340799532309766e-05, + "loss": 2.2119, + "step": 6727500 + }, + { + "epoch": 33.33, + "learning_rate": 3.3339560945883683e-05, + "loss": 2.191, + "step": 6728000 + }, + { + "epoch": 33.34, + "learning_rate": 3.33383223594576e-05, + "loss": 2.1935, + "step": 6728500 + }, + { + "epoch": 33.34, + "learning_rate": 3.333708377303152e-05, + "loss": 2.1783, + "step": 6729000 + }, + { + "epoch": 33.34, + "learning_rate": 3.333584518660543e-05, + "loss": 2.2009, + "step": 6729500 + }, + { + "epoch": 33.34, + "learning_rate": 3.3334606600179344e-05, + "loss": 2.1902, + "step": 6730000 + }, + { + "epoch": 33.35, + "learning_rate": 3.333336801375326e-05, + "loss": 2.2013, + "step": 6730500 + }, + { + "epoch": 33.35, + "learning_rate": 3.333212942732718e-05, + "loss": 2.2053, + "step": 6731000 + }, + { + "epoch": 33.35, + "learning_rate": 3.3330893318073954e-05, + "loss": 2.2068, + "step": 6731500 + }, + { + "epoch": 33.35, + "learning_rate": 3.332965473164787e-05, + "loss": 2.1893, + "step": 6732000 + }, + { + "epoch": 33.36, + "learning_rate": 3.332841614522178e-05, + "loss": 2.1971, + "step": 6732500 + }, + { + "epoch": 33.36, + "learning_rate": 3.33271775587957e-05, + "loss": 2.2038, + "step": 6733000 + }, + { + "epoch": 33.36, + "learning_rate": 3.3325938972369615e-05, + "loss": 2.1941, + "step": 6733500 + }, + { + "epoch": 33.36, + "learning_rate": 3.3324705340289235e-05, + "loss": 2.2214, + "step": 6734000 + }, + { + "epoch": 33.37, + "learning_rate": 3.332346675386315e-05, + "loss": 2.1986, + "step": 6734500 + }, + { + "epoch": 33.37, + "learning_rate": 3.332222816743707e-05, + "loss": 2.2197, + "step": 6735000 + }, + { + "epoch": 33.37, + "learning_rate": 3.332099205818384e-05, + "loss": 2.198, + "step": 6735500 + }, + { + "epoch": 33.37, + "learning_rate": 3.3319753471757755e-05, + "loss": 2.2016, + "step": 6736000 + }, + { + "epoch": 33.37, + "learning_rate": 3.331851488533167e-05, + "loss": 2.2062, + "step": 6736500 + }, + { + "epoch": 33.38, + "learning_rate": 3.331727629890559e-05, + "loss": 2.2213, + "step": 6737000 + }, + { + "epoch": 33.38, + "learning_rate": 3.3316037712479506e-05, + "loss": 2.2173, + "step": 6737500 + }, + { + "epoch": 33.38, + "learning_rate": 3.331479912605342e-05, + "loss": 2.196, + "step": 6738000 + }, + { + "epoch": 33.38, + "learning_rate": 3.331356053962734e-05, + "loss": 2.2056, + "step": 6738500 + }, + { + "epoch": 33.39, + "learning_rate": 3.331232195320125e-05, + "loss": 2.2122, + "step": 6739000 + }, + { + "epoch": 33.39, + "learning_rate": 3.331108336677517e-05, + "loss": 2.1967, + "step": 6739500 + }, + { + "epoch": 33.39, + "learning_rate": 3.3309844780349084e-05, + "loss": 2.2116, + "step": 6740000 + }, + { + "epoch": 33.39, + "learning_rate": 3.3308606193923e-05, + "loss": 2.2288, + "step": 6740500 + }, + { + "epoch": 33.4, + "learning_rate": 3.330736760749692e-05, + "loss": 2.1971, + "step": 6741000 + }, + { + "epoch": 33.4, + "learning_rate": 3.3306129021070834e-05, + "loss": 2.2274, + "step": 6741500 + }, + { + "epoch": 33.4, + "learning_rate": 3.33048929118176e-05, + "loss": 2.2, + "step": 6742000 + }, + { + "epoch": 33.4, + "learning_rate": 3.330365432539152e-05, + "loss": 2.2053, + "step": 6742500 + }, + { + "epoch": 33.41, + "learning_rate": 3.330241573896544e-05, + "loss": 2.2427, + "step": 6743000 + }, + { + "epoch": 33.41, + "learning_rate": 3.3301177152539354e-05, + "loss": 2.2006, + "step": 6743500 + }, + { + "epoch": 33.41, + "learning_rate": 3.329994104328612e-05, + "loss": 2.1955, + "step": 6744000 + }, + { + "epoch": 33.41, + "learning_rate": 3.329870245686004e-05, + "loss": 2.2025, + "step": 6744500 + }, + { + "epoch": 33.42, + "learning_rate": 3.329746387043396e-05, + "loss": 2.2052, + "step": 6745000 + }, + { + "epoch": 33.42, + "learning_rate": 3.3296225284007874e-05, + "loss": 2.199, + "step": 6745500 + }, + { + "epoch": 33.42, + "learning_rate": 3.3294986697581784e-05, + "loss": 2.2119, + "step": 6746000 + }, + { + "epoch": 33.42, + "learning_rate": 3.329375058832855e-05, + "loss": 2.1805, + "step": 6746500 + }, + { + "epoch": 33.43, + "learning_rate": 3.329251200190247e-05, + "loss": 2.2214, + "step": 6747000 + }, + { + "epoch": 33.43, + "learning_rate": 3.3291273415476386e-05, + "loss": 2.1932, + "step": 6747500 + }, + { + "epoch": 33.43, + "learning_rate": 3.32900348290503e-05, + "loss": 2.21, + "step": 6748000 + }, + { + "epoch": 33.43, + "learning_rate": 3.328879871979707e-05, + "loss": 2.1815, + "step": 6748500 + }, + { + "epoch": 33.44, + "learning_rate": 3.328756013337099e-05, + "loss": 2.2231, + "step": 6749000 + }, + { + "epoch": 33.44, + "learning_rate": 3.3286321546944906e-05, + "loss": 2.1896, + "step": 6749500 + }, + { + "epoch": 33.44, + "learning_rate": 3.328508296051882e-05, + "loss": 2.1942, + "step": 6750000 + }, + { + "epoch": 33.44, + "learning_rate": 3.328384437409274e-05, + "loss": 2.2055, + "step": 6750500 + }, + { + "epoch": 33.45, + "learning_rate": 3.328260578766666e-05, + "loss": 2.2069, + "step": 6751000 + }, + { + "epoch": 33.45, + "learning_rate": 3.3281367201240574e-05, + "loss": 2.2045, + "step": 6751500 + }, + { + "epoch": 33.45, + "learning_rate": 3.3280131091987336e-05, + "loss": 2.2114, + "step": 6752000 + }, + { + "epoch": 33.45, + "learning_rate": 3.327889250556125e-05, + "loss": 2.2043, + "step": 6752500 + }, + { + "epoch": 33.46, + "learning_rate": 3.327765391913517e-05, + "loss": 2.2053, + "step": 6753000 + }, + { + "epoch": 33.46, + "learning_rate": 3.3276415332709087e-05, + "loss": 2.2035, + "step": 6753500 + }, + { + "epoch": 33.46, + "learning_rate": 3.3275179223455855e-05, + "loss": 2.1932, + "step": 6754000 + }, + { + "epoch": 33.46, + "learning_rate": 3.327394063702977e-05, + "loss": 2.2068, + "step": 6754500 + }, + { + "epoch": 33.47, + "learning_rate": 3.327270205060369e-05, + "loss": 2.1834, + "step": 6755000 + }, + { + "epoch": 33.47, + "learning_rate": 3.3271463464177606e-05, + "loss": 2.1792, + "step": 6755500 + }, + { + "epoch": 33.47, + "learning_rate": 3.327022735492437e-05, + "loss": 2.1915, + "step": 6756000 + }, + { + "epoch": 33.47, + "learning_rate": 3.3268988768498285e-05, + "loss": 2.2222, + "step": 6756500 + }, + { + "epoch": 33.48, + "learning_rate": 3.32677501820722e-05, + "loss": 2.2125, + "step": 6757000 + }, + { + "epoch": 33.48, + "learning_rate": 3.326651159564612e-05, + "loss": 2.1967, + "step": 6757500 + }, + { + "epoch": 33.48, + "learning_rate": 3.3265273009220036e-05, + "loss": 2.2272, + "step": 6758000 + }, + { + "epoch": 33.48, + "learning_rate": 3.326403442279395e-05, + "loss": 2.2209, + "step": 6758500 + }, + { + "epoch": 33.49, + "learning_rate": 3.326279831354072e-05, + "loss": 2.2223, + "step": 6759000 + }, + { + "epoch": 33.49, + "learning_rate": 3.326155972711464e-05, + "loss": 2.21, + "step": 6759500 + }, + { + "epoch": 33.49, + "learning_rate": 3.3260321140688556e-05, + "loss": 2.2063, + "step": 6760000 + }, + { + "epoch": 33.49, + "learning_rate": 3.325908255426247e-05, + "loss": 2.2272, + "step": 6760500 + }, + { + "epoch": 33.5, + "learning_rate": 3.325784396783639e-05, + "loss": 2.1844, + "step": 6761000 + }, + { + "epoch": 33.5, + "learning_rate": 3.3256605381410306e-05, + "loss": 2.2001, + "step": 6761500 + }, + { + "epoch": 33.5, + "learning_rate": 3.3255369272157075e-05, + "loss": 2.1846, + "step": 6762000 + }, + { + "epoch": 33.5, + "learning_rate": 3.3254133162903844e-05, + "loss": 2.2296, + "step": 6762500 + }, + { + "epoch": 33.51, + "learning_rate": 3.325289457647776e-05, + "loss": 2.2138, + "step": 6763000 + }, + { + "epoch": 33.51, + "learning_rate": 3.325165599005168e-05, + "loss": 2.2029, + "step": 6763500 + }, + { + "epoch": 33.51, + "learning_rate": 3.3250417403625595e-05, + "loss": 2.1842, + "step": 6764000 + }, + { + "epoch": 33.51, + "learning_rate": 3.3249181294372364e-05, + "loss": 2.216, + "step": 6764500 + }, + { + "epoch": 33.52, + "learning_rate": 3.324794270794628e-05, + "loss": 2.2137, + "step": 6765000 + }, + { + "epoch": 33.52, + "learning_rate": 3.32467041215202e-05, + "loss": 2.1953, + "step": 6765500 + }, + { + "epoch": 33.52, + "learning_rate": 3.324546801226696e-05, + "loss": 2.209, + "step": 6766000 + }, + { + "epoch": 33.52, + "learning_rate": 3.3244229425840876e-05, + "loss": 2.2116, + "step": 6766500 + }, + { + "epoch": 33.53, + "learning_rate": 3.324299083941479e-05, + "loss": 2.2116, + "step": 6767000 + }, + { + "epoch": 33.53, + "learning_rate": 3.324175225298871e-05, + "loss": 2.2068, + "step": 6767500 + }, + { + "epoch": 33.53, + "learning_rate": 3.324051366656263e-05, + "loss": 2.1877, + "step": 6768000 + }, + { + "epoch": 33.53, + "learning_rate": 3.3239275080136544e-05, + "loss": 2.2073, + "step": 6768500 + }, + { + "epoch": 33.54, + "learning_rate": 3.323803649371046e-05, + "loss": 2.1873, + "step": 6769000 + }, + { + "epoch": 33.54, + "learning_rate": 3.323679790728438e-05, + "loss": 2.2067, + "step": 6769500 + }, + { + "epoch": 33.54, + "learning_rate": 3.3235559320858295e-05, + "loss": 2.1925, + "step": 6770000 + }, + { + "epoch": 33.54, + "learning_rate": 3.3234320734432205e-05, + "loss": 2.2227, + "step": 6770500 + }, + { + "epoch": 33.55, + "learning_rate": 3.323308214800612e-05, + "loss": 2.1975, + "step": 6771000 + }, + { + "epoch": 33.55, + "learning_rate": 3.323184356158004e-05, + "loss": 2.1838, + "step": 6771500 + }, + { + "epoch": 33.55, + "learning_rate": 3.3230604975153956e-05, + "loss": 2.2219, + "step": 6772000 + }, + { + "epoch": 33.55, + "learning_rate": 3.322936638872787e-05, + "loss": 2.1908, + "step": 6772500 + }, + { + "epoch": 33.56, + "learning_rate": 3.322812780230179e-05, + "loss": 2.1813, + "step": 6773000 + }, + { + "epoch": 33.56, + "learning_rate": 3.3226889215875707e-05, + "loss": 2.1919, + "step": 6773500 + }, + { + "epoch": 33.56, + "learning_rate": 3.3225650629449623e-05, + "loss": 2.2287, + "step": 6774000 + }, + { + "epoch": 33.56, + "learning_rate": 3.322441204302354e-05, + "loss": 2.2185, + "step": 6774500 + }, + { + "epoch": 33.57, + "learning_rate": 3.322317345659746e-05, + "loss": 2.1899, + "step": 6775000 + }, + { + "epoch": 33.57, + "learning_rate": 3.3221934870171374e-05, + "loss": 2.204, + "step": 6775500 + }, + { + "epoch": 33.57, + "learning_rate": 3.322069628374529e-05, + "loss": 2.2152, + "step": 6776000 + }, + { + "epoch": 33.57, + "learning_rate": 3.321945769731921e-05, + "loss": 2.1904, + "step": 6776500 + }, + { + "epoch": 33.58, + "learning_rate": 3.3218219110893125e-05, + "loss": 2.2197, + "step": 6777000 + }, + { + "epoch": 33.58, + "learning_rate": 3.321698052446704e-05, + "loss": 2.1919, + "step": 6777500 + }, + { + "epoch": 33.58, + "learning_rate": 3.321574193804096e-05, + "loss": 2.2174, + "step": 6778000 + }, + { + "epoch": 33.58, + "learning_rate": 3.3214503351614876e-05, + "loss": 2.1984, + "step": 6778500 + }, + { + "epoch": 33.59, + "learning_rate": 3.321326971953449e-05, + "loss": 2.2013, + "step": 6779000 + }, + { + "epoch": 33.59, + "learning_rate": 3.3212033610281265e-05, + "loss": 2.2408, + "step": 6779500 + }, + { + "epoch": 33.59, + "learning_rate": 3.3210795023855175e-05, + "loss": 2.2103, + "step": 6780000 + }, + { + "epoch": 33.59, + "learning_rate": 3.320955643742909e-05, + "loss": 2.1746, + "step": 6780500 + }, + { + "epoch": 33.6, + "learning_rate": 3.320831785100301e-05, + "loss": 2.2278, + "step": 6781000 + }, + { + "epoch": 33.6, + "learning_rate": 3.3207079264576926e-05, + "loss": 2.1785, + "step": 6781500 + }, + { + "epoch": 33.6, + "learning_rate": 3.320584067815084e-05, + "loss": 2.2099, + "step": 6782000 + }, + { + "epoch": 33.6, + "learning_rate": 3.320460209172476e-05, + "loss": 2.1942, + "step": 6782500 + }, + { + "epoch": 33.61, + "learning_rate": 3.320336845964438e-05, + "loss": 2.2025, + "step": 6783000 + }, + { + "epoch": 33.61, + "learning_rate": 3.32021298732183e-05, + "loss": 2.2365, + "step": 6783500 + }, + { + "epoch": 33.61, + "learning_rate": 3.3200891286792215e-05, + "loss": 2.2065, + "step": 6784000 + }, + { + "epoch": 33.61, + "learning_rate": 3.319965270036613e-05, + "loss": 2.1954, + "step": 6784500 + }, + { + "epoch": 33.62, + "learning_rate": 3.319841411394005e-05, + "loss": 2.2004, + "step": 6785000 + }, + { + "epoch": 33.62, + "learning_rate": 3.3197175527513965e-05, + "loss": 2.211, + "step": 6785500 + }, + { + "epoch": 33.62, + "learning_rate": 3.319593694108788e-05, + "loss": 2.2276, + "step": 6786000 + }, + { + "epoch": 33.62, + "learning_rate": 3.319469835466179e-05, + "loss": 2.2108, + "step": 6786500 + }, + { + "epoch": 33.63, + "learning_rate": 3.319345976823571e-05, + "loss": 2.2176, + "step": 6787000 + }, + { + "epoch": 33.63, + "learning_rate": 3.3192221181809626e-05, + "loss": 2.2349, + "step": 6787500 + }, + { + "epoch": 33.63, + "learning_rate": 3.3190985072556395e-05, + "loss": 2.2048, + "step": 6788000 + }, + { + "epoch": 33.63, + "learning_rate": 3.318974648613031e-05, + "loss": 2.1891, + "step": 6788500 + }, + { + "epoch": 33.64, + "learning_rate": 3.318850789970423e-05, + "loss": 2.2304, + "step": 6789000 + }, + { + "epoch": 33.64, + "learning_rate": 3.318726931327814e-05, + "loss": 2.21, + "step": 6789500 + }, + { + "epoch": 33.64, + "learning_rate": 3.3186030726852056e-05, + "loss": 2.1899, + "step": 6790000 + }, + { + "epoch": 33.64, + "learning_rate": 3.318479214042597e-05, + "loss": 2.2241, + "step": 6790500 + }, + { + "epoch": 33.64, + "learning_rate": 3.318355355399989e-05, + "loss": 2.2191, + "step": 6791000 + }, + { + "epoch": 33.65, + "learning_rate": 3.318231496757381e-05, + "loss": 2.2102, + "step": 6791500 + }, + { + "epoch": 33.65, + "learning_rate": 3.318107885832058e-05, + "loss": 2.1883, + "step": 6792000 + }, + { + "epoch": 33.65, + "learning_rate": 3.317984027189449e-05, + "loss": 2.1964, + "step": 6792500 + }, + { + "epoch": 33.65, + "learning_rate": 3.317860168546841e-05, + "loss": 2.2054, + "step": 6793000 + }, + { + "epoch": 33.66, + "learning_rate": 3.3177363099042327e-05, + "loss": 2.1987, + "step": 6793500 + }, + { + "epoch": 33.66, + "learning_rate": 3.3176126989789095e-05, + "loss": 2.2227, + "step": 6794000 + }, + { + "epoch": 33.66, + "learning_rate": 3.317488840336301e-05, + "loss": 2.217, + "step": 6794500 + }, + { + "epoch": 33.66, + "learning_rate": 3.317364981693693e-05, + "loss": 2.1929, + "step": 6795000 + }, + { + "epoch": 33.67, + "learning_rate": 3.317241123051084e-05, + "loss": 2.2129, + "step": 6795500 + }, + { + "epoch": 33.67, + "learning_rate": 3.3171172644084756e-05, + "loss": 2.2122, + "step": 6796000 + }, + { + "epoch": 33.67, + "learning_rate": 3.316993405765867e-05, + "loss": 2.2232, + "step": 6796500 + }, + { + "epoch": 33.67, + "learning_rate": 3.316869547123259e-05, + "loss": 2.2027, + "step": 6797000 + }, + { + "epoch": 33.68, + "learning_rate": 3.316745688480651e-05, + "loss": 2.1981, + "step": 6797500 + }, + { + "epoch": 33.68, + "learning_rate": 3.3166218298380424e-05, + "loss": 2.1982, + "step": 6798000 + }, + { + "epoch": 33.68, + "learning_rate": 3.316497971195434e-05, + "loss": 2.1927, + "step": 6798500 + }, + { + "epoch": 33.68, + "learning_rate": 3.316374112552826e-05, + "loss": 2.1843, + "step": 6799000 + }, + { + "epoch": 33.69, + "learning_rate": 3.3162502539102175e-05, + "loss": 2.2093, + "step": 6799500 + }, + { + "epoch": 33.69, + "learning_rate": 3.3161266429848944e-05, + "loss": 2.1841, + "step": 6800000 + }, + { + "epoch": 33.69, + "learning_rate": 3.316003032059571e-05, + "loss": 2.2062, + "step": 6800500 + }, + { + "epoch": 33.69, + "learning_rate": 3.315879173416963e-05, + "loss": 2.1887, + "step": 6801000 + }, + { + "epoch": 33.7, + "learning_rate": 3.3157553147743546e-05, + "loss": 2.2124, + "step": 6801500 + }, + { + "epoch": 33.7, + "learning_rate": 3.3156314561317456e-05, + "loss": 2.18, + "step": 6802000 + }, + { + "epoch": 33.7, + "learning_rate": 3.315507597489137e-05, + "loss": 2.2071, + "step": 6802500 + }, + { + "epoch": 33.7, + "learning_rate": 3.315383986563815e-05, + "loss": 2.1993, + "step": 6803000 + }, + { + "epoch": 33.71, + "learning_rate": 3.315260375638492e-05, + "loss": 2.1961, + "step": 6803500 + }, + { + "epoch": 33.71, + "learning_rate": 3.3151365169958835e-05, + "loss": 2.1915, + "step": 6804000 + }, + { + "epoch": 33.71, + "learning_rate": 3.3150126583532745e-05, + "loss": 2.1792, + "step": 6804500 + }, + { + "epoch": 33.71, + "learning_rate": 3.314888799710666e-05, + "loss": 2.192, + "step": 6805000 + }, + { + "epoch": 33.72, + "learning_rate": 3.314764941068058e-05, + "loss": 2.1913, + "step": 6805500 + }, + { + "epoch": 33.72, + "learning_rate": 3.3146410824254496e-05, + "loss": 2.1953, + "step": 6806000 + }, + { + "epoch": 33.72, + "learning_rate": 3.314517223782841e-05, + "loss": 2.1918, + "step": 6806500 + }, + { + "epoch": 33.72, + "learning_rate": 3.314393365140233e-05, + "loss": 2.189, + "step": 6807000 + }, + { + "epoch": 33.73, + "learning_rate": 3.31426975421491e-05, + "loss": 2.1915, + "step": 6807500 + }, + { + "epoch": 33.73, + "learning_rate": 3.314146143289587e-05, + "loss": 2.2173, + "step": 6808000 + }, + { + "epoch": 33.73, + "learning_rate": 3.3140222846469784e-05, + "loss": 2.2431, + "step": 6808500 + }, + { + "epoch": 33.73, + "learning_rate": 3.31389842600437e-05, + "loss": 2.2125, + "step": 6809000 + }, + { + "epoch": 33.74, + "learning_rate": 3.313774815079046e-05, + "loss": 2.224, + "step": 6809500 + }, + { + "epoch": 33.74, + "learning_rate": 3.313651204153724e-05, + "loss": 2.2055, + "step": 6810000 + }, + { + "epoch": 33.74, + "learning_rate": 3.3135273455111155e-05, + "loss": 2.21, + "step": 6810500 + }, + { + "epoch": 33.74, + "learning_rate": 3.313403486868507e-05, + "loss": 2.2161, + "step": 6811000 + }, + { + "epoch": 33.75, + "learning_rate": 3.313279628225899e-05, + "loss": 2.1847, + "step": 6811500 + }, + { + "epoch": 33.75, + "learning_rate": 3.31315576958329e-05, + "loss": 2.2189, + "step": 6812000 + }, + { + "epoch": 33.75, + "learning_rate": 3.3130319109406816e-05, + "loss": 2.2207, + "step": 6812500 + }, + { + "epoch": 33.75, + "learning_rate": 3.312908052298073e-05, + "loss": 2.1814, + "step": 6813000 + }, + { + "epoch": 33.76, + "learning_rate": 3.312784193655465e-05, + "loss": 2.2075, + "step": 6813500 + }, + { + "epoch": 33.76, + "learning_rate": 3.312660335012857e-05, + "loss": 2.2068, + "step": 6814000 + }, + { + "epoch": 33.76, + "learning_rate": 3.3125364763702484e-05, + "loss": 2.2041, + "step": 6814500 + }, + { + "epoch": 33.76, + "learning_rate": 3.3124128654449246e-05, + "loss": 2.2088, + "step": 6815000 + }, + { + "epoch": 33.77, + "learning_rate": 3.312289006802316e-05, + "loss": 2.2029, + "step": 6815500 + }, + { + "epoch": 33.77, + "learning_rate": 3.312165148159708e-05, + "loss": 2.1921, + "step": 6816000 + }, + { + "epoch": 33.77, + "learning_rate": 3.3120412895171e-05, + "loss": 2.216, + "step": 6816500 + }, + { + "epoch": 33.77, + "learning_rate": 3.3119174308744914e-05, + "loss": 2.195, + "step": 6817000 + }, + { + "epoch": 33.78, + "learning_rate": 3.311793572231883e-05, + "loss": 2.2238, + "step": 6817500 + }, + { + "epoch": 33.78, + "learning_rate": 3.311669713589275e-05, + "loss": 2.2144, + "step": 6818000 + }, + { + "epoch": 33.78, + "learning_rate": 3.3115458549466665e-05, + "loss": 2.2092, + "step": 6818500 + }, + { + "epoch": 33.78, + "learning_rate": 3.3114222440213433e-05, + "loss": 2.2108, + "step": 6819000 + }, + { + "epoch": 33.79, + "learning_rate": 3.311298385378735e-05, + "loss": 2.2043, + "step": 6819500 + }, + { + "epoch": 33.79, + "learning_rate": 3.311174526736127e-05, + "loss": 2.1984, + "step": 6820000 + }, + { + "epoch": 33.79, + "learning_rate": 3.3110509158108036e-05, + "loss": 2.1975, + "step": 6820500 + }, + { + "epoch": 33.79, + "learning_rate": 3.310927057168195e-05, + "loss": 2.1769, + "step": 6821000 + }, + { + "epoch": 33.8, + "learning_rate": 3.310803198525586e-05, + "loss": 2.1828, + "step": 6821500 + }, + { + "epoch": 33.8, + "learning_rate": 3.310679339882978e-05, + "loss": 2.2122, + "step": 6822000 + }, + { + "epoch": 33.8, + "learning_rate": 3.31055548124037e-05, + "loss": 2.1925, + "step": 6822500 + }, + { + "epoch": 33.8, + "learning_rate": 3.3104316225977614e-05, + "loss": 2.1946, + "step": 6823000 + }, + { + "epoch": 33.81, + "learning_rate": 3.310307763955153e-05, + "loss": 2.2058, + "step": 6823500 + }, + { + "epoch": 33.81, + "learning_rate": 3.310183905312545e-05, + "loss": 2.2011, + "step": 6824000 + }, + { + "epoch": 33.81, + "learning_rate": 3.3100600466699365e-05, + "loss": 2.2053, + "step": 6824500 + }, + { + "epoch": 33.81, + "learning_rate": 3.3099364357446134e-05, + "loss": 2.2091, + "step": 6825000 + }, + { + "epoch": 33.82, + "learning_rate": 3.309812577102005e-05, + "loss": 2.2024, + "step": 6825500 + }, + { + "epoch": 33.82, + "learning_rate": 3.309688718459397e-05, + "loss": 2.1989, + "step": 6826000 + }, + { + "epoch": 33.82, + "learning_rate": 3.3095648598167884e-05, + "loss": 2.197, + "step": 6826500 + }, + { + "epoch": 33.82, + "learning_rate": 3.30944100117418e-05, + "loss": 2.2224, + "step": 6827000 + }, + { + "epoch": 33.83, + "learning_rate": 3.309317142531572e-05, + "loss": 2.2263, + "step": 6827500 + }, + { + "epoch": 33.83, + "learning_rate": 3.3091932838889635e-05, + "loss": 2.2002, + "step": 6828000 + }, + { + "epoch": 33.83, + "learning_rate": 3.309069425246355e-05, + "loss": 2.2009, + "step": 6828500 + }, + { + "epoch": 33.83, + "learning_rate": 3.308945566603747e-05, + "loss": 2.2113, + "step": 6829000 + }, + { + "epoch": 33.84, + "learning_rate": 3.3088217079611386e-05, + "loss": 2.2273, + "step": 6829500 + }, + { + "epoch": 33.84, + "learning_rate": 3.30869784931853e-05, + "loss": 2.2292, + "step": 6830000 + }, + { + "epoch": 33.84, + "learning_rate": 3.308573990675922e-05, + "loss": 2.1992, + "step": 6830500 + }, + { + "epoch": 33.84, + "learning_rate": 3.308450132033314e-05, + "loss": 2.217, + "step": 6831000 + }, + { + "epoch": 33.85, + "learning_rate": 3.308326273390705e-05, + "loss": 2.2222, + "step": 6831500 + }, + { + "epoch": 33.85, + "learning_rate": 3.3082026624653816e-05, + "loss": 2.1777, + "step": 6832000 + }, + { + "epoch": 33.85, + "learning_rate": 3.308078803822773e-05, + "loss": 2.1957, + "step": 6832500 + }, + { + "epoch": 33.85, + "learning_rate": 3.30795519289745e-05, + "loss": 2.2035, + "step": 6833000 + }, + { + "epoch": 33.86, + "learning_rate": 3.307831334254842e-05, + "loss": 2.2322, + "step": 6833500 + }, + { + "epoch": 33.86, + "learning_rate": 3.3077074756122335e-05, + "loss": 2.2006, + "step": 6834000 + }, + { + "epoch": 33.86, + "learning_rate": 3.307583616969625e-05, + "loss": 2.2075, + "step": 6834500 + }, + { + "epoch": 33.86, + "learning_rate": 3.3074600060443014e-05, + "loss": 2.2022, + "step": 6835000 + }, + { + "epoch": 33.87, + "learning_rate": 3.307336147401693e-05, + "loss": 2.239, + "step": 6835500 + }, + { + "epoch": 33.87, + "learning_rate": 3.307212288759085e-05, + "loss": 2.2284, + "step": 6836000 + }, + { + "epoch": 33.87, + "learning_rate": 3.3070884301164765e-05, + "loss": 2.2016, + "step": 6836500 + }, + { + "epoch": 33.87, + "learning_rate": 3.3069648191911534e-05, + "loss": 2.2318, + "step": 6837000 + }, + { + "epoch": 33.88, + "learning_rate": 3.306840960548545e-05, + "loss": 2.2189, + "step": 6837500 + }, + { + "epoch": 33.88, + "learning_rate": 3.306717101905937e-05, + "loss": 2.2088, + "step": 6838000 + }, + { + "epoch": 33.88, + "learning_rate": 3.3065932432633285e-05, + "loss": 2.215, + "step": 6838500 + }, + { + "epoch": 33.88, + "learning_rate": 3.30646938462072e-05, + "loss": 2.2143, + "step": 6839000 + }, + { + "epoch": 33.89, + "learning_rate": 3.306345525978112e-05, + "loss": 2.2258, + "step": 6839500 + }, + { + "epoch": 33.89, + "learning_rate": 3.3062216673355035e-05, + "loss": 2.2073, + "step": 6840000 + }, + { + "epoch": 33.89, + "learning_rate": 3.306097808692895e-05, + "loss": 2.2141, + "step": 6840500 + }, + { + "epoch": 33.89, + "learning_rate": 3.305973950050287e-05, + "loss": 2.2215, + "step": 6841000 + }, + { + "epoch": 33.9, + "learning_rate": 3.3058500914076786e-05, + "loss": 2.2281, + "step": 6841500 + }, + { + "epoch": 33.9, + "learning_rate": 3.30572623276507e-05, + "loss": 2.1636, + "step": 6842000 + }, + { + "epoch": 33.9, + "learning_rate": 3.305602374122462e-05, + "loss": 2.2143, + "step": 6842500 + }, + { + "epoch": 33.9, + "learning_rate": 3.305478515479854e-05, + "loss": 2.2349, + "step": 6843000 + }, + { + "epoch": 33.91, + "learning_rate": 3.30535490455453e-05, + "loss": 2.2016, + "step": 6843500 + }, + { + "epoch": 33.91, + "learning_rate": 3.305231293629207e-05, + "loss": 2.1975, + "step": 6844000 + }, + { + "epoch": 33.91, + "learning_rate": 3.3051074349865985e-05, + "loss": 2.2012, + "step": 6844500 + }, + { + "epoch": 33.91, + "learning_rate": 3.30498357634399e-05, + "loss": 2.2201, + "step": 6845000 + }, + { + "epoch": 33.91, + "learning_rate": 3.304859717701382e-05, + "loss": 2.1905, + "step": 6845500 + }, + { + "epoch": 33.92, + "learning_rate": 3.3047358590587735e-05, + "loss": 2.2118, + "step": 6846000 + }, + { + "epoch": 33.92, + "learning_rate": 3.304612000416165e-05, + "loss": 2.2002, + "step": 6846500 + }, + { + "epoch": 33.92, + "learning_rate": 3.304488141773557e-05, + "loss": 2.2046, + "step": 6847000 + }, + { + "epoch": 33.92, + "learning_rate": 3.3043642831309486e-05, + "loss": 2.192, + "step": 6847500 + }, + { + "epoch": 33.93, + "learning_rate": 3.304240672205625e-05, + "loss": 2.2318, + "step": 6848000 + }, + { + "epoch": 33.93, + "learning_rate": 3.3041168135630165e-05, + "loss": 2.2128, + "step": 6848500 + }, + { + "epoch": 33.93, + "learning_rate": 3.303992954920408e-05, + "loss": 2.2345, + "step": 6849000 + }, + { + "epoch": 33.93, + "learning_rate": 3.303869343995085e-05, + "loss": 2.2106, + "step": 6849500 + }, + { + "epoch": 33.94, + "learning_rate": 3.303745485352477e-05, + "loss": 2.2147, + "step": 6850000 + }, + { + "epoch": 33.94, + "learning_rate": 3.3036216267098685e-05, + "loss": 2.2091, + "step": 6850500 + }, + { + "epoch": 33.94, + "learning_rate": 3.30349776806726e-05, + "loss": 2.1914, + "step": 6851000 + }, + { + "epoch": 33.94, + "learning_rate": 3.303373909424652e-05, + "loss": 2.2098, + "step": 6851500 + }, + { + "epoch": 33.95, + "learning_rate": 3.3032500507820436e-05, + "loss": 2.1919, + "step": 6852000 + }, + { + "epoch": 33.95, + "learning_rate": 3.303126192139435e-05, + "loss": 2.1861, + "step": 6852500 + }, + { + "epoch": 33.95, + "learning_rate": 3.303002828931397e-05, + "loss": 2.202, + "step": 6853000 + }, + { + "epoch": 33.95, + "learning_rate": 3.302878970288789e-05, + "loss": 2.2175, + "step": 6853500 + }, + { + "epoch": 33.96, + "learning_rate": 3.302755111646181e-05, + "loss": 2.2061, + "step": 6854000 + }, + { + "epoch": 33.96, + "learning_rate": 3.3026312530035724e-05, + "loss": 2.1985, + "step": 6854500 + }, + { + "epoch": 33.96, + "learning_rate": 3.302507394360964e-05, + "loss": 2.2186, + "step": 6855000 + }, + { + "epoch": 33.96, + "learning_rate": 3.302383783435641e-05, + "loss": 2.2056, + "step": 6855500 + }, + { + "epoch": 33.97, + "learning_rate": 3.302259924793033e-05, + "loss": 2.2069, + "step": 6856000 + }, + { + "epoch": 33.97, + "learning_rate": 3.3021360661504244e-05, + "loss": 2.2249, + "step": 6856500 + }, + { + "epoch": 33.97, + "learning_rate": 3.302012207507816e-05, + "loss": 2.2189, + "step": 6857000 + }, + { + "epoch": 33.97, + "learning_rate": 3.301888348865208e-05, + "loss": 2.2219, + "step": 6857500 + }, + { + "epoch": 33.98, + "learning_rate": 3.3017644902225994e-05, + "loss": 2.1775, + "step": 6858000 + }, + { + "epoch": 33.98, + "learning_rate": 3.301640631579991e-05, + "loss": 2.1861, + "step": 6858500 + }, + { + "epoch": 33.98, + "learning_rate": 3.301516772937382e-05, + "loss": 2.2028, + "step": 6859000 + }, + { + "epoch": 33.98, + "learning_rate": 3.301393162012059e-05, + "loss": 2.1903, + "step": 6859500 + }, + { + "epoch": 33.99, + "learning_rate": 3.301269303369451e-05, + "loss": 2.2199, + "step": 6860000 + }, + { + "epoch": 33.99, + "learning_rate": 3.3011454447268424e-05, + "loss": 2.2266, + "step": 6860500 + }, + { + "epoch": 33.99, + "learning_rate": 3.301021586084234e-05, + "loss": 2.223, + "step": 6861000 + }, + { + "epoch": 33.99, + "learning_rate": 3.300897975158911e-05, + "loss": 2.2017, + "step": 6861500 + }, + { + "epoch": 34.0, + "learning_rate": 3.300774116516303e-05, + "loss": 2.2275, + "step": 6862000 + }, + { + "epoch": 34.0, + "learning_rate": 3.3006502578736944e-05, + "loss": 2.1995, + "step": 6862500 + }, + { + "epoch": 34.0, + "eval_accuracy": 0.6615834582009188, + "eval_accuracy_mlm": 0.6176810786763514, + "eval_accuracy_nsp": 0.8686612357280975, + "eval_loss": 2.2982945442199707, + "eval_runtime": 145.6589, + "eval_samples_per_second": 1750.384, + "eval_steps_per_second": 72.938, + "step": 6862662 } ], "max_steps": 20184300, "num_train_epochs": 100, - "total_flos": 4.441905477808464e+18, + "total_flos": 8.884340001928284e+18, "trial_name": null, "trial_params": null }