|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997828447339848, |
|
"eval_steps": 500, |
|
"global_step": 1151, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008686210640608035, |
|
"grad_norm": 17.493449382349226, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 8.7443, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001737242128121607, |
|
"grad_norm": 14.990450099593053, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 8.6957, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0026058631921824105, |
|
"grad_norm": 16.423535165287575, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 8.692, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.003474484256243214, |
|
"grad_norm": 13.143676841573155, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 8.6702, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004343105320304018, |
|
"grad_norm": 8.964338427609599, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 8.6436, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005211726384364821, |
|
"grad_norm": 5.711486770337647, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 8.6472, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.006080347448425625, |
|
"grad_norm": 8.527323895494213, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 8.7054, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.006948968512486428, |
|
"grad_norm": 9.12482767652379, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 8.7084, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007817589576547232, |
|
"grad_norm": 5.337796907422001, |
|
"learning_rate": 3e-06, |
|
"loss": 8.5973, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.008686210640608035, |
|
"grad_norm": 3.563435103977848, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 8.594, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.009554831704668839, |
|
"grad_norm": 2.8337150046011836, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 8.562, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.010423452768729642, |
|
"grad_norm": 5.305490346258808, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 8.5552, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.011292073832790446, |
|
"grad_norm": 3.8824803788805204, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 8.5319, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01216069489685125, |
|
"grad_norm": 2.779267764600503, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 8.5131, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.013029315960912053, |
|
"grad_norm": 2.8850795560965943, |
|
"learning_rate": 5e-06, |
|
"loss": 8.4689, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.013897937024972856, |
|
"grad_norm": 2.7572282850785186, |
|
"learning_rate": 5.333333333333334e-06, |
|
"loss": 8.574, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01476655808903366, |
|
"grad_norm": 2.573141583562948, |
|
"learning_rate": 5.666666666666667e-06, |
|
"loss": 8.4873, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.015635179153094463, |
|
"grad_norm": 3.180322337220607, |
|
"learning_rate": 6e-06, |
|
"loss": 8.4521, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.016503800217155265, |
|
"grad_norm": 3.491770308520874, |
|
"learning_rate": 6.333333333333333e-06, |
|
"loss": 8.3846, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01737242128121607, |
|
"grad_norm": 2.540124589419276, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 8.4451, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.018241042345276872, |
|
"grad_norm": 3.121998583339167, |
|
"learning_rate": 7e-06, |
|
"loss": 8.2468, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.019109663409337677, |
|
"grad_norm": 4.290808656004097, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 8.3109, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01997828447339848, |
|
"grad_norm": 4.450357829892193, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 8.2419, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.020846905537459284, |
|
"grad_norm": 3.6215464038427188, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 8.2952, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.021715526601520086, |
|
"grad_norm": 4.442035772364645, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 8.377, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02258414766558089, |
|
"grad_norm": 4.843618321464985, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 8.2759, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.023452768729641693, |
|
"grad_norm": 2.3677599699211487, |
|
"learning_rate": 9e-06, |
|
"loss": 8.2335, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0243213897937025, |
|
"grad_norm": 2.7402708582612223, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 8.1205, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0251900108577633, |
|
"grad_norm": 3.367587996037935, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 8.1712, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.026058631921824105, |
|
"grad_norm": 3.7838438130817647, |
|
"learning_rate": 1e-05, |
|
"loss": 8.155, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.026927252985884907, |
|
"grad_norm": 4.375332206336344, |
|
"learning_rate": 9.999982328608275e-06, |
|
"loss": 8.143, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.027795874049945712, |
|
"grad_norm": 6.575802280327978, |
|
"learning_rate": 9.999929314571892e-06, |
|
"loss": 8.1258, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.028664495114006514, |
|
"grad_norm": 9.035297148846244, |
|
"learning_rate": 9.99984095830722e-06, |
|
"loss": 8.0348, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.02953311617806732, |
|
"grad_norm": 6.634311793226191, |
|
"learning_rate": 9.999717260508202e-06, |
|
"loss": 8.0355, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.03040173724212812, |
|
"grad_norm": 8.124508838108945, |
|
"learning_rate": 9.999558222146359e-06, |
|
"loss": 7.9736, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.031270358306188926, |
|
"grad_norm": 5.8285745211892515, |
|
"learning_rate": 9.999363844470767e-06, |
|
"loss": 7.9337, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03213897937024973, |
|
"grad_norm": 9.094040714600302, |
|
"learning_rate": 9.999134129008061e-06, |
|
"loss": 7.8325, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03300760043431053, |
|
"grad_norm": 8.468740095393665, |
|
"learning_rate": 9.998869077562416e-06, |
|
"loss": 7.8086, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.033876221498371335, |
|
"grad_norm": 6.732505858210668, |
|
"learning_rate": 9.998568692215532e-06, |
|
"loss": 7.6726, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03474484256243214, |
|
"grad_norm": 21.57246390769929, |
|
"learning_rate": 9.998232975326619e-06, |
|
"loss": 7.8017, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.035613463626492946, |
|
"grad_norm": 12.672871887843293, |
|
"learning_rate": 9.997861929532384e-06, |
|
"loss": 7.6013, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.036482084690553744, |
|
"grad_norm": 9.395510661131551, |
|
"learning_rate": 9.997455557747002e-06, |
|
"loss": 7.4226, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03735070575461455, |
|
"grad_norm": 12.735314205401677, |
|
"learning_rate": 9.9970138631621e-06, |
|
"loss": 7.3855, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.038219326818675355, |
|
"grad_norm": 10.285907675522205, |
|
"learning_rate": 9.99653684924672e-06, |
|
"loss": 7.3198, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03908794788273615, |
|
"grad_norm": 9.965187203305367, |
|
"learning_rate": 9.996024519747312e-06, |
|
"loss": 7.1805, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03995656894679696, |
|
"grad_norm": 17.926836165323984, |
|
"learning_rate": 9.995476878687687e-06, |
|
"loss": 6.9857, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.04082519001085776, |
|
"grad_norm": 23.24576845075726, |
|
"learning_rate": 9.994893930368987e-06, |
|
"loss": 7.0586, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.04169381107491857, |
|
"grad_norm": 10.789226700023386, |
|
"learning_rate": 9.994275679369664e-06, |
|
"loss": 6.8635, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.04256243213897937, |
|
"grad_norm": 20.819248588210748, |
|
"learning_rate": 9.99362213054543e-06, |
|
"loss": 6.8014, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.04343105320304017, |
|
"grad_norm": 13.054140096829588, |
|
"learning_rate": 9.992933289029225e-06, |
|
"loss": 6.6718, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04429967426710098, |
|
"grad_norm": 31.11350513451145, |
|
"learning_rate": 9.992209160231182e-06, |
|
"loss": 6.7444, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04516829533116178, |
|
"grad_norm": 18.66011619667928, |
|
"learning_rate": 9.991449749838567e-06, |
|
"loss": 6.4907, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04603691639522258, |
|
"grad_norm": 30.236445483678942, |
|
"learning_rate": 9.990655063815758e-06, |
|
"loss": 6.4475, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.046905537459283386, |
|
"grad_norm": 21.993099100665887, |
|
"learning_rate": 9.989825108404178e-06, |
|
"loss": 6.4676, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04777415852334419, |
|
"grad_norm": 19.83551768600084, |
|
"learning_rate": 9.988959890122257e-06, |
|
"loss": 6.2759, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.048642779587405, |
|
"grad_norm": 13.170118097146572, |
|
"learning_rate": 9.988059415765378e-06, |
|
"loss": 6.1095, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.049511400651465795, |
|
"grad_norm": 22.67994571341824, |
|
"learning_rate": 9.987123692405825e-06, |
|
"loss": 6.0883, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0503800217155266, |
|
"grad_norm": 13.038280028112723, |
|
"learning_rate": 9.986152727392721e-06, |
|
"loss": 6.0249, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.051248642779587406, |
|
"grad_norm": 14.901505032251393, |
|
"learning_rate": 9.985146528351983e-06, |
|
"loss": 6.1762, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.05211726384364821, |
|
"grad_norm": 26.832853386529305, |
|
"learning_rate": 9.984105103186245e-06, |
|
"loss": 6.08, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.05298588490770901, |
|
"grad_norm": 15.17632623872157, |
|
"learning_rate": 9.983028460074811e-06, |
|
"loss": 5.9952, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.053854505971769814, |
|
"grad_norm": 24.418647213861092, |
|
"learning_rate": 9.981916607473589e-06, |
|
"loss": 5.9961, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.05472312703583062, |
|
"grad_norm": 18.746932536918226, |
|
"learning_rate": 9.98076955411501e-06, |
|
"loss": 5.8426, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.055591748099891425, |
|
"grad_norm": 21.558720998266843, |
|
"learning_rate": 9.97958730900798e-06, |
|
"loss": 5.7789, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.05646036916395222, |
|
"grad_norm": 28.510478407780514, |
|
"learning_rate": 9.97836988143779e-06, |
|
"loss": 5.7467, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.05732899022801303, |
|
"grad_norm": 15.832636672012686, |
|
"learning_rate": 9.977117280966065e-06, |
|
"loss": 5.8336, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.058197611292073834, |
|
"grad_norm": 29.18156046853244, |
|
"learning_rate": 9.975829517430662e-06, |
|
"loss": 5.8591, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.05906623235613464, |
|
"grad_norm": 18.247831823792946, |
|
"learning_rate": 9.974506600945618e-06, |
|
"loss": 5.7356, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05993485342019544, |
|
"grad_norm": 23.96714109884484, |
|
"learning_rate": 9.973148541901053e-06, |
|
"loss": 5.7721, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.06080347448425624, |
|
"grad_norm": 18.93369316823687, |
|
"learning_rate": 9.9717553509631e-06, |
|
"loss": 5.7324, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.06167209554831705, |
|
"grad_norm": 22.87674608218244, |
|
"learning_rate": 9.97032703907381e-06, |
|
"loss": 5.7795, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.06254071661237785, |
|
"grad_norm": 22.61900902318279, |
|
"learning_rate": 9.968863617451078e-06, |
|
"loss": 5.7367, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.06340933767643865, |
|
"grad_norm": 17.381975236661205, |
|
"learning_rate": 9.967365097588548e-06, |
|
"loss": 5.4779, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.06427795874049946, |
|
"grad_norm": 16.21661033530993, |
|
"learning_rate": 9.965831491255521e-06, |
|
"loss": 5.7334, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.06514657980456026, |
|
"grad_norm": 13.791459531600026, |
|
"learning_rate": 9.964262810496867e-06, |
|
"loss": 5.5777, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06601520086862106, |
|
"grad_norm": 20.390991279830846, |
|
"learning_rate": 9.962659067632933e-06, |
|
"loss": 5.5614, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.06688382193268187, |
|
"grad_norm": 16.095238532163904, |
|
"learning_rate": 9.961020275259433e-06, |
|
"loss": 5.6062, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.06775244299674267, |
|
"grad_norm": 9.877965182839311, |
|
"learning_rate": 9.959346446247367e-06, |
|
"loss": 5.6125, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.06862106406080347, |
|
"grad_norm": 17.028948309558636, |
|
"learning_rate": 9.957637593742905e-06, |
|
"loss": 5.5812, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06948968512486428, |
|
"grad_norm": 8.979583407927759, |
|
"learning_rate": 9.955893731167295e-06, |
|
"loss": 5.5746, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.07035830618892508, |
|
"grad_norm": 12.03923638076693, |
|
"learning_rate": 9.95411487221675e-06, |
|
"loss": 5.6653, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.07122692725298589, |
|
"grad_norm": 12.390003162693889, |
|
"learning_rate": 9.952301030862337e-06, |
|
"loss": 5.4735, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.07209554831704669, |
|
"grad_norm": 9.315040920552768, |
|
"learning_rate": 9.950452221349887e-06, |
|
"loss": 5.5041, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.07296416938110749, |
|
"grad_norm": 10.904257522944858, |
|
"learning_rate": 9.948568458199856e-06, |
|
"loss": 5.4586, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.0738327904451683, |
|
"grad_norm": 12.975062616231535, |
|
"learning_rate": 9.94664975620723e-06, |
|
"loss": 5.3053, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0747014115092291, |
|
"grad_norm": 20.113827062713174, |
|
"learning_rate": 9.944696130441399e-06, |
|
"loss": 5.5913, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0755700325732899, |
|
"grad_norm": 12.517008621704704, |
|
"learning_rate": 9.942707596246051e-06, |
|
"loss": 5.337, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.07643865363735071, |
|
"grad_norm": 9.758313745988033, |
|
"learning_rate": 9.940684169239035e-06, |
|
"loss": 5.3173, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.07730727470141151, |
|
"grad_norm": 10.515920909580231, |
|
"learning_rate": 9.938625865312252e-06, |
|
"loss": 5.4537, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.0781758957654723, |
|
"grad_norm": 14.20391683761756, |
|
"learning_rate": 9.936532700631518e-06, |
|
"loss": 5.5486, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07904451682953312, |
|
"grad_norm": 6.514270269870405, |
|
"learning_rate": 9.93440469163645e-06, |
|
"loss": 5.538, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.07991313789359392, |
|
"grad_norm": 24.656805213333442, |
|
"learning_rate": 9.932241855040328e-06, |
|
"loss": 5.3992, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.08078175895765473, |
|
"grad_norm": 15.24785447513823, |
|
"learning_rate": 9.930044207829966e-06, |
|
"loss": 5.4491, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.08165038002171553, |
|
"grad_norm": 17.034838291407535, |
|
"learning_rate": 9.927811767265581e-06, |
|
"loss": 5.5654, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.08251900108577633, |
|
"grad_norm": 16.66779749233834, |
|
"learning_rate": 9.925544550880653e-06, |
|
"loss": 5.3037, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.08338762214983714, |
|
"grad_norm": 13.53801819272121, |
|
"learning_rate": 9.92324257648179e-06, |
|
"loss": 5.1831, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.08425624321389794, |
|
"grad_norm": 13.925819342029348, |
|
"learning_rate": 9.920905862148586e-06, |
|
"loss": 5.3506, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.08512486427795873, |
|
"grad_norm": 11.595896262884168, |
|
"learning_rate": 9.918534426233486e-06, |
|
"loss": 5.3379, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.08599348534201955, |
|
"grad_norm": 10.793238449505045, |
|
"learning_rate": 9.916128287361634e-06, |
|
"loss": 5.2807, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.08686210640608034, |
|
"grad_norm": 9.100694836617444, |
|
"learning_rate": 9.913687464430727e-06, |
|
"loss": 5.3927, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08773072747014116, |
|
"grad_norm": 10.570344665746836, |
|
"learning_rate": 9.91121197661087e-06, |
|
"loss": 5.3416, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.08859934853420195, |
|
"grad_norm": 8.054172083905852, |
|
"learning_rate": 9.908701843344427e-06, |
|
"loss": 5.4551, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.08946796959826275, |
|
"grad_norm": 9.349388343495786, |
|
"learning_rate": 9.906157084345865e-06, |
|
"loss": 5.3192, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.09033659066232357, |
|
"grad_norm": 7.475662385329425, |
|
"learning_rate": 9.903577719601597e-06, |
|
"loss": 5.2733, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.09120521172638436, |
|
"grad_norm": 7.943968157184266, |
|
"learning_rate": 9.900963769369827e-06, |
|
"loss": 5.3985, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.09207383279044516, |
|
"grad_norm": 7.531045700331419, |
|
"learning_rate": 9.8983152541804e-06, |
|
"loss": 5.3622, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.09294245385450597, |
|
"grad_norm": 15.575311318718667, |
|
"learning_rate": 9.895632194834625e-06, |
|
"loss": 5.1931, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.09381107491856677, |
|
"grad_norm": 11.029594288829802, |
|
"learning_rate": 9.892914612405117e-06, |
|
"loss": 5.2938, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.09467969598262758, |
|
"grad_norm": 11.455193260474546, |
|
"learning_rate": 9.890162528235641e-06, |
|
"loss": 5.1901, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.09554831704668838, |
|
"grad_norm": 11.086046323655495, |
|
"learning_rate": 9.887375963940936e-06, |
|
"loss": 5.4614, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.09641693811074918, |
|
"grad_norm": 7.999590835458638, |
|
"learning_rate": 9.884554941406539e-06, |
|
"loss": 5.2932, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.09728555917481, |
|
"grad_norm": 11.326641474544164, |
|
"learning_rate": 9.881699482788627e-06, |
|
"loss": 5.1322, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.09815418023887079, |
|
"grad_norm": 7.633538464939616, |
|
"learning_rate": 9.878809610513836e-06, |
|
"loss": 5.1649, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.09902280130293159, |
|
"grad_norm": 9.626046071760667, |
|
"learning_rate": 9.87588534727908e-06, |
|
"loss": 5.3233, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.0998914223669924, |
|
"grad_norm": 9.133312938258387, |
|
"learning_rate": 9.872926716051387e-06, |
|
"loss": 5.1132, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1007600434310532, |
|
"grad_norm": 9.505514994492872, |
|
"learning_rate": 9.869933740067703e-06, |
|
"loss": 5.1579, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.10162866449511401, |
|
"grad_norm": 8.791259591754573, |
|
"learning_rate": 9.866906442834713e-06, |
|
"loss": 5.2137, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.10249728555917481, |
|
"grad_norm": 10.053006259051758, |
|
"learning_rate": 9.863844848128668e-06, |
|
"loss": 5.2787, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.10336590662323561, |
|
"grad_norm": 13.131490348069805, |
|
"learning_rate": 9.860748979995183e-06, |
|
"loss": 4.9206, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.10423452768729642, |
|
"grad_norm": 6.631095589009018, |
|
"learning_rate": 9.857618862749062e-06, |
|
"loss": 5.2466, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.10510314875135722, |
|
"grad_norm": 13.217875916209636, |
|
"learning_rate": 9.85445452097409e-06, |
|
"loss": 5.2414, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.10597176981541802, |
|
"grad_norm": 8.029250043406146, |
|
"learning_rate": 9.851255979522856e-06, |
|
"loss": 5.2792, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.10684039087947883, |
|
"grad_norm": 17.54497296410541, |
|
"learning_rate": 9.848023263516552e-06, |
|
"loss": 5.4227, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.10770901194353963, |
|
"grad_norm": 11.789384484790114, |
|
"learning_rate": 9.844756398344773e-06, |
|
"loss": 5.2564, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.10857763300760044, |
|
"grad_norm": 12.89494048195122, |
|
"learning_rate": 9.841455409665322e-06, |
|
"loss": 5.173, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.10944625407166124, |
|
"grad_norm": 10.08813144239029, |
|
"learning_rate": 9.838120323404004e-06, |
|
"loss": 5.2173, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.11031487513572204, |
|
"grad_norm": 8.840110668733463, |
|
"learning_rate": 9.834751165754428e-06, |
|
"loss": 5.237, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.11118349619978285, |
|
"grad_norm": 10.088550995951381, |
|
"learning_rate": 9.831347963177794e-06, |
|
"loss": 5.2767, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.11205211726384365, |
|
"grad_norm": 8.195942675919266, |
|
"learning_rate": 9.827910742402693e-06, |
|
"loss": 5.2096, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.11292073832790445, |
|
"grad_norm": 11.245032884796654, |
|
"learning_rate": 9.824439530424888e-06, |
|
"loss": 5.1405, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.11378935939196526, |
|
"grad_norm": 10.023313323111374, |
|
"learning_rate": 9.820934354507113e-06, |
|
"loss": 5.2706, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.11465798045602606, |
|
"grad_norm": 5.266854203910289, |
|
"learning_rate": 9.817395242178854e-06, |
|
"loss": 5.2976, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.11552660152008687, |
|
"grad_norm": 13.356121226198322, |
|
"learning_rate": 9.813822221236125e-06, |
|
"loss": 5.1784, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.11639522258414767, |
|
"grad_norm": 7.209004502332046, |
|
"learning_rate": 9.810215319741262e-06, |
|
"loss": 5.2083, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.11726384364820847, |
|
"grad_norm": 9.476007633968416, |
|
"learning_rate": 9.806574566022693e-06, |
|
"loss": 5.1428, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.11813246471226928, |
|
"grad_norm": 8.330598001164512, |
|
"learning_rate": 9.802899988674728e-06, |
|
"loss": 5.0473, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.11900108577633008, |
|
"grad_norm": 6.96008730119411, |
|
"learning_rate": 9.799191616557317e-06, |
|
"loss": 5.2599, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.11986970684039087, |
|
"grad_norm": 7.736962765205919, |
|
"learning_rate": 9.795449478795836e-06, |
|
"loss": 5.2652, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.12073832790445169, |
|
"grad_norm": 6.749695837587577, |
|
"learning_rate": 9.791673604780856e-06, |
|
"loss": 5.238, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.12160694896851248, |
|
"grad_norm": 10.329369152037573, |
|
"learning_rate": 9.787864024167911e-06, |
|
"loss": 5.1537, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.12247557003257328, |
|
"grad_norm": 6.905477249008306, |
|
"learning_rate": 9.78402076687726e-06, |
|
"loss": 5.2558, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1233441910966341, |
|
"grad_norm": 11.627003258310488, |
|
"learning_rate": 9.780143863093663e-06, |
|
"loss": 5.2708, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.1242128121606949, |
|
"grad_norm": 7.001100481839455, |
|
"learning_rate": 9.776233343266138e-06, |
|
"loss": 5.1064, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.1250814332247557, |
|
"grad_norm": 11.285622964490454, |
|
"learning_rate": 9.772289238107717e-06, |
|
"loss": 4.9967, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.1259500542888165, |
|
"grad_norm": 7.568164194677143, |
|
"learning_rate": 9.768311578595212e-06, |
|
"loss": 5.014, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1268186753528773, |
|
"grad_norm": 9.381543499879124, |
|
"learning_rate": 9.764300395968969e-06, |
|
"loss": 5.0742, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.1276872964169381, |
|
"grad_norm": 8.76903592042971, |
|
"learning_rate": 9.760255721732626e-06, |
|
"loss": 4.9568, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.12855591748099893, |
|
"grad_norm": 7.998187265875589, |
|
"learning_rate": 9.756177587652857e-06, |
|
"loss": 5.1517, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.12942453854505973, |
|
"grad_norm": 10.119399253599614, |
|
"learning_rate": 9.752066025759132e-06, |
|
"loss": 5.0338, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.13029315960912052, |
|
"grad_norm": 6.547018882815891, |
|
"learning_rate": 9.74792106834346e-06, |
|
"loss": 5.0788, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.13116178067318132, |
|
"grad_norm": 8.201656095763651, |
|
"learning_rate": 9.743742747960138e-06, |
|
"loss": 5.193, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.13203040173724212, |
|
"grad_norm": 6.751595098502055, |
|
"learning_rate": 9.739531097425493e-06, |
|
"loss": 5.0545, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.13289902280130292, |
|
"grad_norm": 11.073060727681726, |
|
"learning_rate": 9.735286149817623e-06, |
|
"loss": 4.9874, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.13376764386536374, |
|
"grad_norm": 6.413004734395765, |
|
"learning_rate": 9.731007938476145e-06, |
|
"loss": 5.1605, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.13463626492942454, |
|
"grad_norm": 9.902861982329972, |
|
"learning_rate": 9.726696497001923e-06, |
|
"loss": 5.0762, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.13550488599348534, |
|
"grad_norm": 9.135620944817346, |
|
"learning_rate": 9.722351859256815e-06, |
|
"loss": 5.1326, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.13637350705754614, |
|
"grad_norm": 5.094932628448521, |
|
"learning_rate": 9.717974059363392e-06, |
|
"loss": 5.0871, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.13724212812160694, |
|
"grad_norm": 13.8159898342583, |
|
"learning_rate": 9.713563131704685e-06, |
|
"loss": 5.0887, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.13811074918566776, |
|
"grad_norm": 7.776188941220426, |
|
"learning_rate": 9.709119110923911e-06, |
|
"loss": 5.0136, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.13897937024972856, |
|
"grad_norm": 14.140298714913962, |
|
"learning_rate": 9.70464203192419e-06, |
|
"loss": 5.1441, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.13984799131378936, |
|
"grad_norm": 7.397703190515995, |
|
"learning_rate": 9.700131929868289e-06, |
|
"loss": 5.0998, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.14071661237785016, |
|
"grad_norm": 13.939460570037966, |
|
"learning_rate": 9.695588840178331e-06, |
|
"loss": 5.0197, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.14158523344191096, |
|
"grad_norm": 10.270022957942269, |
|
"learning_rate": 9.691012798535524e-06, |
|
"loss": 4.8067, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.14245385450597178, |
|
"grad_norm": 10.393393548384978, |
|
"learning_rate": 9.686403840879877e-06, |
|
"loss": 5.1063, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.14332247557003258, |
|
"grad_norm": 9.993662702844784, |
|
"learning_rate": 9.681762003409926e-06, |
|
"loss": 5.1959, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.14419109663409338, |
|
"grad_norm": 7.693407344731637, |
|
"learning_rate": 9.677087322582434e-06, |
|
"loss": 5.0605, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.14505971769815418, |
|
"grad_norm": 9.965954257327015, |
|
"learning_rate": 9.672379835112124e-06, |
|
"loss": 5.1052, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.14592833876221498, |
|
"grad_norm": 6.623907098912022, |
|
"learning_rate": 9.667639577971372e-06, |
|
"loss": 5.2321, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.14679695982627577, |
|
"grad_norm": 12.30970638465487, |
|
"learning_rate": 9.662866588389931e-06, |
|
"loss": 4.9307, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.1476655808903366, |
|
"grad_norm": 9.854445105895712, |
|
"learning_rate": 9.658060903854633e-06, |
|
"loss": 5.2573, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1485342019543974, |
|
"grad_norm": 9.733745744797229, |
|
"learning_rate": 9.653222562109093e-06, |
|
"loss": 5.1312, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.1494028230184582, |
|
"grad_norm": 7.148106476376709, |
|
"learning_rate": 9.64835160115341e-06, |
|
"loss": 4.9459, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.150271444082519, |
|
"grad_norm": 9.910271107591148, |
|
"learning_rate": 9.643448059243881e-06, |
|
"loss": 4.9896, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.1511400651465798, |
|
"grad_norm": 6.743151416523807, |
|
"learning_rate": 9.638511974892689e-06, |
|
"loss": 5.0992, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.15200868621064062, |
|
"grad_norm": 9.278378594694823, |
|
"learning_rate": 9.633543386867599e-06, |
|
"loss": 5.0205, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.15287730727470142, |
|
"grad_norm": 8.006244545055509, |
|
"learning_rate": 9.628542334191665e-06, |
|
"loss": 5.1466, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.15374592833876222, |
|
"grad_norm": 6.372417255435819, |
|
"learning_rate": 9.623508856142914e-06, |
|
"loss": 5.078, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.15461454940282301, |
|
"grad_norm": 8.38636496529513, |
|
"learning_rate": 9.61844299225404e-06, |
|
"loss": 5.1724, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.1554831704668838, |
|
"grad_norm": 7.43768649121031, |
|
"learning_rate": 9.613344782312093e-06, |
|
"loss": 4.8699, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.1563517915309446, |
|
"grad_norm": 12.808038040940353, |
|
"learning_rate": 9.608214266358171e-06, |
|
"loss": 5.0929, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.15722041259500544, |
|
"grad_norm": 10.021409419376392, |
|
"learning_rate": 9.603051484687096e-06, |
|
"loss": 4.9412, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.15808903365906624, |
|
"grad_norm": 8.383987107054914, |
|
"learning_rate": 9.597856477847111e-06, |
|
"loss": 5.0407, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.15895765472312703, |
|
"grad_norm": 11.839795913237127, |
|
"learning_rate": 9.592629286639545e-06, |
|
"loss": 4.9284, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.15982627578718783, |
|
"grad_norm": 7.10734457748747, |
|
"learning_rate": 9.58736995211851e-06, |
|
"loss": 5.058, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.16069489685124863, |
|
"grad_norm": 9.11261944633933, |
|
"learning_rate": 9.582078515590565e-06, |
|
"loss": 4.8771, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.16156351791530946, |
|
"grad_norm": 9.824025734718676, |
|
"learning_rate": 9.5767550186144e-06, |
|
"loss": 4.8962, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.16243213897937026, |
|
"grad_norm": 8.437193122982974, |
|
"learning_rate": 9.571399503000499e-06, |
|
"loss": 4.9084, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.16330076004343105, |
|
"grad_norm": 5.687318397304989, |
|
"learning_rate": 9.566012010810826e-06, |
|
"loss": 5.08, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.16416938110749185, |
|
"grad_norm": 14.433519675550484, |
|
"learning_rate": 9.560592584358489e-06, |
|
"loss": 4.8611, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.16503800217155265, |
|
"grad_norm": 6.794400971865543, |
|
"learning_rate": 9.555141266207398e-06, |
|
"loss": 4.936, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.16590662323561348, |
|
"grad_norm": 14.402134569349881, |
|
"learning_rate": 9.549658099171945e-06, |
|
"loss": 4.8904, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.16677524429967427, |
|
"grad_norm": 9.083194622310913, |
|
"learning_rate": 9.544143126316661e-06, |
|
"loss": 5.0294, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.16764386536373507, |
|
"grad_norm": 10.639070515809655, |
|
"learning_rate": 9.538596390955876e-06, |
|
"loss": 4.9204, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.16851248642779587, |
|
"grad_norm": 7.267347749331012, |
|
"learning_rate": 9.533017936653385e-06, |
|
"loss": 5.0803, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.16938110749185667, |
|
"grad_norm": 8.011322892386996, |
|
"learning_rate": 9.527407807222096e-06, |
|
"loss": 5.0817, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.17024972855591747, |
|
"grad_norm": 8.33354841480774, |
|
"learning_rate": 9.521766046723699e-06, |
|
"loss": 5.0201, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.1711183496199783, |
|
"grad_norm": 4.646575968820735, |
|
"learning_rate": 9.516092699468305e-06, |
|
"loss": 5.1351, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.1719869706840391, |
|
"grad_norm": 9.81795639706777, |
|
"learning_rate": 9.510387810014115e-06, |
|
"loss": 4.8496, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.1728555917480999, |
|
"grad_norm": 8.118086500928438, |
|
"learning_rate": 9.504651423167056e-06, |
|
"loss": 4.7814, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.1737242128121607, |
|
"grad_norm": 8.90163622203598, |
|
"learning_rate": 9.49888358398043e-06, |
|
"loss": 4.9746, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1745928338762215, |
|
"grad_norm": 9.581229012117431, |
|
"learning_rate": 9.493084337754573e-06, |
|
"loss": 4.9412, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.1754614549402823, |
|
"grad_norm": 5.4797873048345185, |
|
"learning_rate": 9.487253730036484e-06, |
|
"loss": 5.0215, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.1763300760043431, |
|
"grad_norm": 13.799396637492386, |
|
"learning_rate": 9.481391806619475e-06, |
|
"loss": 5.1105, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.1771986970684039, |
|
"grad_norm": 10.196454355029717, |
|
"learning_rate": 9.475498613542808e-06, |
|
"loss": 5.0285, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.1780673181324647, |
|
"grad_norm": 13.961082886424126, |
|
"learning_rate": 9.469574197091345e-06, |
|
"loss": 4.986, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.1789359391965255, |
|
"grad_norm": 9.989829598419863, |
|
"learning_rate": 9.46361860379516e-06, |
|
"loss": 5.021, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.17980456026058633, |
|
"grad_norm": 12.997591407480371, |
|
"learning_rate": 9.4576318804292e-06, |
|
"loss": 4.9919, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.18067318132464713, |
|
"grad_norm": 9.372234356116795, |
|
"learning_rate": 9.451614074012905e-06, |
|
"loss": 5.1485, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.18154180238870793, |
|
"grad_norm": 10.142709831697546, |
|
"learning_rate": 9.445565231809832e-06, |
|
"loss": 5.0171, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.18241042345276873, |
|
"grad_norm": 9.226181067395016, |
|
"learning_rate": 9.439485401327296e-06, |
|
"loss": 5.0061, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.18327904451682953, |
|
"grad_norm": 8.279353289891953, |
|
"learning_rate": 9.433374630315997e-06, |
|
"loss": 4.9115, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.18414766558089032, |
|
"grad_norm": 11.929279731379703, |
|
"learning_rate": 9.427232966769634e-06, |
|
"loss": 4.6847, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.18501628664495115, |
|
"grad_norm": 5.482572771970624, |
|
"learning_rate": 9.421060458924539e-06, |
|
"loss": 4.9443, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.18588490770901195, |
|
"grad_norm": 7.427326598312704, |
|
"learning_rate": 9.414857155259289e-06, |
|
"loss": 4.9023, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.18675352877307275, |
|
"grad_norm": 8.360757486256176, |
|
"learning_rate": 9.408623104494336e-06, |
|
"loss": 4.984, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.18762214983713354, |
|
"grad_norm": 7.208982202085643, |
|
"learning_rate": 9.402358355591609e-06, |
|
"loss": 4.963, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.18849077090119434, |
|
"grad_norm": 5.301087365446961, |
|
"learning_rate": 9.39606295775415e-06, |
|
"loss": 4.998, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.18935939196525517, |
|
"grad_norm": 7.799837692221157, |
|
"learning_rate": 9.389736960425708e-06, |
|
"loss": 4.9741, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.19022801302931597, |
|
"grad_norm": 7.151033616802984, |
|
"learning_rate": 9.383380413290361e-06, |
|
"loss": 4.8871, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.19109663409337677, |
|
"grad_norm": 5.787982552484058, |
|
"learning_rate": 9.376993366272128e-06, |
|
"loss": 4.9864, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.19196525515743756, |
|
"grad_norm": 7.905445175850379, |
|
"learning_rate": 9.370575869534565e-06, |
|
"loss": 5.0129, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.19283387622149836, |
|
"grad_norm": 4.339162197971099, |
|
"learning_rate": 9.364127973480385e-06, |
|
"loss": 5.0006, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.1937024972855592, |
|
"grad_norm": 9.706062286895799, |
|
"learning_rate": 9.35764972875105e-06, |
|
"loss": 4.8678, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.19457111834962, |
|
"grad_norm": 6.364386062061041, |
|
"learning_rate": 9.351141186226387e-06, |
|
"loss": 4.8405, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.19543973941368079, |
|
"grad_norm": 8.97734562182805, |
|
"learning_rate": 9.344602397024172e-06, |
|
"loss": 4.9155, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.19630836047774158, |
|
"grad_norm": 7.362245536098451, |
|
"learning_rate": 9.338033412499743e-06, |
|
"loss": 4.7595, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.19717698154180238, |
|
"grad_norm": 7.54540953421405, |
|
"learning_rate": 9.331434284245585e-06, |
|
"loss": 4.8956, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.19804560260586318, |
|
"grad_norm": 7.423100629581046, |
|
"learning_rate": 9.324805064090939e-06, |
|
"loss": 4.9622, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.198914223669924, |
|
"grad_norm": 6.020905403406819, |
|
"learning_rate": 9.318145804101377e-06, |
|
"loss": 4.959, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.1997828447339848, |
|
"grad_norm": 4.670252852600942, |
|
"learning_rate": 9.31145655657841e-06, |
|
"loss": 5.0506, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2006514657980456, |
|
"grad_norm": 7.795934999989379, |
|
"learning_rate": 9.30473737405906e-06, |
|
"loss": 4.9765, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.2015200868621064, |
|
"grad_norm": 6.01240795364037, |
|
"learning_rate": 9.29798830931547e-06, |
|
"loss": 4.9211, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.2023887079261672, |
|
"grad_norm": 5.698601126510405, |
|
"learning_rate": 9.291209415354466e-06, |
|
"loss": 4.7963, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.20325732899022803, |
|
"grad_norm": 12.382329963742128, |
|
"learning_rate": 9.284400745417154e-06, |
|
"loss": 4.8329, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.20412595005428882, |
|
"grad_norm": 6.0887020552234645, |
|
"learning_rate": 9.277562352978504e-06, |
|
"loss": 4.9987, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.20499457111834962, |
|
"grad_norm": 9.258532830886514, |
|
"learning_rate": 9.270694291746918e-06, |
|
"loss": 4.7606, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.20586319218241042, |
|
"grad_norm": 5.467188874710136, |
|
"learning_rate": 9.26379661566382e-06, |
|
"loss": 4.8058, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.20673181324647122, |
|
"grad_norm": 8.326138039007624, |
|
"learning_rate": 9.256869378903226e-06, |
|
"loss": 4.8709, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.20760043431053202, |
|
"grad_norm": 6.953137100468736, |
|
"learning_rate": 9.249912635871317e-06, |
|
"loss": 4.877, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.20846905537459284, |
|
"grad_norm": 7.746260501101959, |
|
"learning_rate": 9.242926441206024e-06, |
|
"loss": 4.866, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.20933767643865364, |
|
"grad_norm": 5.662860561699009, |
|
"learning_rate": 9.235910849776578e-06, |
|
"loss": 4.9476, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.21020629750271444, |
|
"grad_norm": 9.471074354024374, |
|
"learning_rate": 9.2288659166831e-06, |
|
"loss": 4.8759, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.21107491856677524, |
|
"grad_norm": 5.294803045493394, |
|
"learning_rate": 9.221791697256152e-06, |
|
"loss": 4.8172, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.21194353963083604, |
|
"grad_norm": 9.913380058373116, |
|
"learning_rate": 9.214688247056316e-06, |
|
"loss": 4.9476, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.21281216069489686, |
|
"grad_norm": 6.941540338932768, |
|
"learning_rate": 9.207555621873748e-06, |
|
"loss": 4.8194, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.21368078175895766, |
|
"grad_norm": 9.460762282346524, |
|
"learning_rate": 9.20039387772774e-06, |
|
"loss": 4.9644, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.21454940282301846, |
|
"grad_norm": 7.830249212526504, |
|
"learning_rate": 9.19320307086629e-06, |
|
"loss": 4.9704, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.21541802388707926, |
|
"grad_norm": 6.925252918501548, |
|
"learning_rate": 9.185983257765648e-06, |
|
"loss": 4.9168, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.21628664495114006, |
|
"grad_norm": 7.233949141897864, |
|
"learning_rate": 9.178734495129876e-06, |
|
"loss": 4.8646, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.21715526601520088, |
|
"grad_norm": 5.897422894008084, |
|
"learning_rate": 9.171456839890408e-06, |
|
"loss": 5.0017, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.21802388707926168, |
|
"grad_norm": 8.435251394805453, |
|
"learning_rate": 9.1641503492056e-06, |
|
"loss": 4.8114, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.21889250814332248, |
|
"grad_norm": 5.760910243649614, |
|
"learning_rate": 9.156815080460277e-06, |
|
"loss": 4.8976, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.21976112920738328, |
|
"grad_norm": 7.520437903960479, |
|
"learning_rate": 9.149451091265286e-06, |
|
"loss": 4.7124, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.22062975027144408, |
|
"grad_norm": 7.013652737318022, |
|
"learning_rate": 9.142058439457044e-06, |
|
"loss": 4.9533, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.22149837133550487, |
|
"grad_norm": 7.159149521810479, |
|
"learning_rate": 9.134637183097083e-06, |
|
"loss": 4.8566, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.2223669923995657, |
|
"grad_norm": 6.499343085246989, |
|
"learning_rate": 9.127187380471595e-06, |
|
"loss": 4.6676, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.2232356134636265, |
|
"grad_norm": 5.21337701054524, |
|
"learning_rate": 9.11970909009097e-06, |
|
"loss": 4.922, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.2241042345276873, |
|
"grad_norm": 6.349567353113073, |
|
"learning_rate": 9.112202370689337e-06, |
|
"loss": 4.8532, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.2249728555917481, |
|
"grad_norm": 6.818694420622126, |
|
"learning_rate": 9.104667281224114e-06, |
|
"loss": 4.9436, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.2258414766558089, |
|
"grad_norm": 7.214238229211862, |
|
"learning_rate": 9.09710388087553e-06, |
|
"loss": 5.0028, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.22671009771986972, |
|
"grad_norm": 5.209070263470396, |
|
"learning_rate": 9.089512229046167e-06, |
|
"loss": 4.5818, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.22757871878393052, |
|
"grad_norm": 10.360050755845188, |
|
"learning_rate": 9.08189238536049e-06, |
|
"loss": 4.8978, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.22844733984799132, |
|
"grad_norm": 6.643043793388568, |
|
"learning_rate": 9.07424440966439e-06, |
|
"loss": 4.7172, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.2293159609120521, |
|
"grad_norm": 7.816417260568394, |
|
"learning_rate": 9.066568362024697e-06, |
|
"loss": 4.7677, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.2301845819761129, |
|
"grad_norm": 7.24101938038275, |
|
"learning_rate": 9.058864302728722e-06, |
|
"loss": 4.9232, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.23105320304017374, |
|
"grad_norm": 5.882755828325563, |
|
"learning_rate": 9.051132292283772e-06, |
|
"loss": 4.8006, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.23192182410423454, |
|
"grad_norm": 7.033058546114696, |
|
"learning_rate": 9.043372391416687e-06, |
|
"loss": 4.8018, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.23279044516829533, |
|
"grad_norm": 6.638958562180367, |
|
"learning_rate": 9.035584661073357e-06, |
|
"loss": 4.8614, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.23365906623235613, |
|
"grad_norm": 5.164804077792831, |
|
"learning_rate": 9.02776916241824e-06, |
|
"loss": 4.6965, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.23452768729641693, |
|
"grad_norm": 7.8053882165369055, |
|
"learning_rate": 9.019925956833884e-06, |
|
"loss": 4.8867, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.23539630836047773, |
|
"grad_norm": 5.302096025761552, |
|
"learning_rate": 9.012055105920452e-06, |
|
"loss": 4.895, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.23626492942453856, |
|
"grad_norm": 5.851233842251707, |
|
"learning_rate": 9.004156671495224e-06, |
|
"loss": 4.9754, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.23713355048859935, |
|
"grad_norm": 6.290732292956187, |
|
"learning_rate": 8.996230715592129e-06, |
|
"loss": 4.8171, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.23800217155266015, |
|
"grad_norm": 6.760963585753358, |
|
"learning_rate": 8.988277300461238e-06, |
|
"loss": 4.6128, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.23887079261672095, |
|
"grad_norm": 5.437936675855679, |
|
"learning_rate": 8.980296488568296e-06, |
|
"loss": 4.8612, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.23973941368078175, |
|
"grad_norm": 8.546817288458861, |
|
"learning_rate": 8.972288342594211e-06, |
|
"loss": 4.7358, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.24060803474484257, |
|
"grad_norm": 5.168018360375808, |
|
"learning_rate": 8.96425292543458e-06, |
|
"loss": 4.8366, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.24147665580890337, |
|
"grad_norm": 8.339946348434717, |
|
"learning_rate": 8.95619030019918e-06, |
|
"loss": 4.9408, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.24234527687296417, |
|
"grad_norm": 4.812823437728942, |
|
"learning_rate": 8.94810053021148e-06, |
|
"loss": 4.8325, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.24321389793702497, |
|
"grad_norm": 9.00150446795358, |
|
"learning_rate": 8.939983679008147e-06, |
|
"loss": 4.766, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.24408251900108577, |
|
"grad_norm": 6.85159808721181, |
|
"learning_rate": 8.931839810338541e-06, |
|
"loss": 4.9044, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.24495114006514657, |
|
"grad_norm": 5.419252004137471, |
|
"learning_rate": 8.923668988164213e-06, |
|
"loss": 4.7154, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.2458197611292074, |
|
"grad_norm": 8.042101516011865, |
|
"learning_rate": 8.915471276658405e-06, |
|
"loss": 4.8391, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.2466883821932682, |
|
"grad_norm": 6.113748424857356, |
|
"learning_rate": 8.907246740205553e-06, |
|
"loss": 4.7499, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.247557003257329, |
|
"grad_norm": 7.989470062325969, |
|
"learning_rate": 8.898995443400767e-06, |
|
"loss": 4.8469, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.2484256243213898, |
|
"grad_norm": 4.646955530626333, |
|
"learning_rate": 8.890717451049335e-06, |
|
"loss": 4.7358, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.24929424538545059, |
|
"grad_norm": 7.744655477388046, |
|
"learning_rate": 8.882412828166213e-06, |
|
"loss": 4.7807, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.2501628664495114, |
|
"grad_norm": 5.999315404456719, |
|
"learning_rate": 8.874081639975508e-06, |
|
"loss": 4.7731, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.2510314875135722, |
|
"grad_norm": 7.366537198808325, |
|
"learning_rate": 8.865723951909972e-06, |
|
"loss": 4.9502, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.251900108577633, |
|
"grad_norm": 5.915226962483226, |
|
"learning_rate": 8.857339829610483e-06, |
|
"loss": 4.7959, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.25276872964169383, |
|
"grad_norm": 9.035720639467963, |
|
"learning_rate": 8.848929338925536e-06, |
|
"loss": 4.7853, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.2536373507057546, |
|
"grad_norm": 5.777665004211188, |
|
"learning_rate": 8.84049254591072e-06, |
|
"loss": 4.8299, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.25450597176981543, |
|
"grad_norm": 9.211239169808724, |
|
"learning_rate": 8.8320295168282e-06, |
|
"loss": 4.6564, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.2553745928338762, |
|
"grad_norm": 6.121012460613405, |
|
"learning_rate": 8.8235403181462e-06, |
|
"loss": 4.6625, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.256243213897937, |
|
"grad_norm": 7.943211261154958, |
|
"learning_rate": 8.815025016538477e-06, |
|
"loss": 4.9183, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.25711183496199785, |
|
"grad_norm": 7.520085515355943, |
|
"learning_rate": 8.806483678883803e-06, |
|
"loss": 4.7658, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.2579804560260586, |
|
"grad_norm": 6.255511074855573, |
|
"learning_rate": 8.79791637226543e-06, |
|
"loss": 4.8817, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.25884907709011945, |
|
"grad_norm": 8.574508707280637, |
|
"learning_rate": 8.789323163970573e-06, |
|
"loss": 4.8496, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.2597176981541802, |
|
"grad_norm": 5.222979665146013, |
|
"learning_rate": 8.780704121489876e-06, |
|
"loss": 4.7925, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.26058631921824105, |
|
"grad_norm": 9.382159121398075, |
|
"learning_rate": 8.772059312516883e-06, |
|
"loss": 4.88, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2614549402823019, |
|
"grad_norm": 6.8357894514157245, |
|
"learning_rate": 8.76338880494751e-06, |
|
"loss": 4.6685, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.26232356134636264, |
|
"grad_norm": 6.517300496516373, |
|
"learning_rate": 8.754692666879504e-06, |
|
"loss": 4.8814, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.26319218241042347, |
|
"grad_norm": 9.702116008549124, |
|
"learning_rate": 8.745970966611917e-06, |
|
"loss": 4.6584, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.26406080347448424, |
|
"grad_norm": 6.260530698612518, |
|
"learning_rate": 8.737223772644562e-06, |
|
"loss": 4.6473, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.26492942453854507, |
|
"grad_norm": 9.829705946999965, |
|
"learning_rate": 8.72845115367748e-06, |
|
"loss": 4.771, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.26579804560260584, |
|
"grad_norm": 6.69025005399437, |
|
"learning_rate": 8.719653178610396e-06, |
|
"loss": 4.778, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.26666666666666666, |
|
"grad_norm": 6.70318175356247, |
|
"learning_rate": 8.710829916542184e-06, |
|
"loss": 4.8116, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.2675352877307275, |
|
"grad_norm": 7.751822349780793, |
|
"learning_rate": 8.701981436770322e-06, |
|
"loss": 4.6695, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.26840390879478826, |
|
"grad_norm": 7.108916006280296, |
|
"learning_rate": 8.69310780879034e-06, |
|
"loss": 4.7317, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.2692725298588491, |
|
"grad_norm": 4.328672737830481, |
|
"learning_rate": 8.684209102295292e-06, |
|
"loss": 4.6794, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.27014115092290986, |
|
"grad_norm": 8.322594402892245, |
|
"learning_rate": 8.675285387175183e-06, |
|
"loss": 4.8747, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.2710097719869707, |
|
"grad_norm": 4.508165025699175, |
|
"learning_rate": 8.666336733516447e-06, |
|
"loss": 4.9213, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.2718783930510315, |
|
"grad_norm": 6.5835338898059055, |
|
"learning_rate": 8.657363211601375e-06, |
|
"loss": 4.7728, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.2727470141150923, |
|
"grad_norm": 8.537697102858328, |
|
"learning_rate": 8.64836489190758e-06, |
|
"loss": 4.7095, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.2736156351791531, |
|
"grad_norm": 8.365612338039389, |
|
"learning_rate": 8.639341845107432e-06, |
|
"loss": 4.8467, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2744842562432139, |
|
"grad_norm": 6.665043705485368, |
|
"learning_rate": 8.630294142067505e-06, |
|
"loss": 4.8167, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.2753528773072747, |
|
"grad_norm": 7.82967483334624, |
|
"learning_rate": 8.621221853848022e-06, |
|
"loss": 4.7616, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.2762214983713355, |
|
"grad_norm": 4.138871108394252, |
|
"learning_rate": 8.6121250517023e-06, |
|
"loss": 4.7277, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.2770901194353963, |
|
"grad_norm": 5.604431351464519, |
|
"learning_rate": 8.603003807076184e-06, |
|
"loss": 4.6664, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.2779587404994571, |
|
"grad_norm": 8.601258868804189, |
|
"learning_rate": 8.593858191607492e-06, |
|
"loss": 4.8013, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.2788273615635179, |
|
"grad_norm": 4.906802688431626, |
|
"learning_rate": 8.584688277125446e-06, |
|
"loss": 4.6898, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.2796959826275787, |
|
"grad_norm": 10.116257183106686, |
|
"learning_rate": 8.575494135650115e-06, |
|
"loss": 4.8905, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.28056460369163955, |
|
"grad_norm": 5.885384217150105, |
|
"learning_rate": 8.566275839391842e-06, |
|
"loss": 4.8431, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.2814332247557003, |
|
"grad_norm": 7.268094086044695, |
|
"learning_rate": 8.557033460750685e-06, |
|
"loss": 4.8863, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.28230184581976114, |
|
"grad_norm": 7.115813734582664, |
|
"learning_rate": 8.547767072315835e-06, |
|
"loss": 4.8921, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2831704668838219, |
|
"grad_norm": 5.851149266761853, |
|
"learning_rate": 8.538476746865066e-06, |
|
"loss": 4.6311, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.28403908794788274, |
|
"grad_norm": 11.032345343462614, |
|
"learning_rate": 8.529162557364148e-06, |
|
"loss": 4.751, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.28490770901194357, |
|
"grad_norm": 5.0859521313383045, |
|
"learning_rate": 8.519824576966274e-06, |
|
"loss": 4.7293, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.28577633007600434, |
|
"grad_norm": 13.803163681395665, |
|
"learning_rate": 8.510462879011492e-06, |
|
"loss": 4.8309, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.28664495114006516, |
|
"grad_norm": 10.999396408035773, |
|
"learning_rate": 8.50107753702613e-06, |
|
"loss": 4.7266, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.28751357220412593, |
|
"grad_norm": 9.25286217238097, |
|
"learning_rate": 8.49166862472221e-06, |
|
"loss": 4.7683, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.28838219326818676, |
|
"grad_norm": 6.4323672461710135, |
|
"learning_rate": 8.482236215996881e-06, |
|
"loss": 4.7786, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.28925081433224753, |
|
"grad_norm": 11.742176348664781, |
|
"learning_rate": 8.47278038493182e-06, |
|
"loss": 4.7423, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.29011943539630836, |
|
"grad_norm": 7.418561647305186, |
|
"learning_rate": 8.463301205792675e-06, |
|
"loss": 4.7125, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.2909880564603692, |
|
"grad_norm": 13.421309741389427, |
|
"learning_rate": 8.45379875302846e-06, |
|
"loss": 4.7378, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.29185667752442995, |
|
"grad_norm": 12.62311847789775, |
|
"learning_rate": 8.444273101270982e-06, |
|
"loss": 4.6425, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.2927252985884908, |
|
"grad_norm": 7.036866908269808, |
|
"learning_rate": 8.434724325334252e-06, |
|
"loss": 4.7556, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.29359391965255155, |
|
"grad_norm": 8.910330385460947, |
|
"learning_rate": 8.425152500213898e-06, |
|
"loss": 4.6621, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.2944625407166124, |
|
"grad_norm": 6.680041055622366, |
|
"learning_rate": 8.415557701086572e-06, |
|
"loss": 4.58, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.2953311617806732, |
|
"grad_norm": 5.917224901051049, |
|
"learning_rate": 8.405940003309366e-06, |
|
"loss": 4.764, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.29619978284473397, |
|
"grad_norm": 6.025660992004857, |
|
"learning_rate": 8.396299482419213e-06, |
|
"loss": 4.9539, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.2970684039087948, |
|
"grad_norm": 6.8290925321533855, |
|
"learning_rate": 8.386636214132303e-06, |
|
"loss": 4.7968, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.29793702497285557, |
|
"grad_norm": 6.766388648711616, |
|
"learning_rate": 8.376950274343476e-06, |
|
"loss": 4.7316, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.2988056460369164, |
|
"grad_norm": 5.199438881706867, |
|
"learning_rate": 8.367241739125645e-06, |
|
"loss": 4.6717, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.2996742671009772, |
|
"grad_norm": 5.537976824790018, |
|
"learning_rate": 8.35751068472917e-06, |
|
"loss": 4.5794, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.300542888165038, |
|
"grad_norm": 7.434218743498804, |
|
"learning_rate": 8.347757187581288e-06, |
|
"loss": 4.5797, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.3014115092290988, |
|
"grad_norm": 3.7410734562690102, |
|
"learning_rate": 8.337981324285495e-06, |
|
"loss": 4.6053, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.3022801302931596, |
|
"grad_norm": 7.494539433625744, |
|
"learning_rate": 8.328183171620953e-06, |
|
"loss": 4.6358, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.3031487513572204, |
|
"grad_norm": 8.577268240575261, |
|
"learning_rate": 8.318362806541878e-06, |
|
"loss": 4.6568, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.30401737242128124, |
|
"grad_norm": 5.870073757957237, |
|
"learning_rate": 8.308520306176948e-06, |
|
"loss": 4.8527, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.304885993485342, |
|
"grad_norm": 10.403543599580171, |
|
"learning_rate": 8.298655747828685e-06, |
|
"loss": 4.6002, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.30575461454940284, |
|
"grad_norm": 5.381680003218319, |
|
"learning_rate": 8.288769208972858e-06, |
|
"loss": 4.7809, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.3066232356134636, |
|
"grad_norm": 9.877174040443599, |
|
"learning_rate": 8.278860767257865e-06, |
|
"loss": 4.7145, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.30749185667752443, |
|
"grad_norm": 6.336637930519997, |
|
"learning_rate": 8.26893050050413e-06, |
|
"loss": 4.8645, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.30836047774158526, |
|
"grad_norm": 9.02647409091144, |
|
"learning_rate": 8.258978486703493e-06, |
|
"loss": 4.6869, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.30922909880564603, |
|
"grad_norm": 7.279729496607072, |
|
"learning_rate": 8.24900480401859e-06, |
|
"loss": 4.883, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.31009771986970686, |
|
"grad_norm": 6.615700438254423, |
|
"learning_rate": 8.239009530782244e-06, |
|
"loss": 4.774, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.3109663409337676, |
|
"grad_norm": 8.777698668573509, |
|
"learning_rate": 8.228992745496851e-06, |
|
"loss": 4.9262, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.31183496199782845, |
|
"grad_norm": 5.9973840840887505, |
|
"learning_rate": 8.21895452683376e-06, |
|
"loss": 4.7127, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.3127035830618892, |
|
"grad_norm": 10.806900681675172, |
|
"learning_rate": 8.20889495363266e-06, |
|
"loss": 4.7254, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.31357220412595005, |
|
"grad_norm": 8.826153430021765, |
|
"learning_rate": 8.198814104900951e-06, |
|
"loss": 4.5981, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.3144408251900109, |
|
"grad_norm": 9.306758043660382, |
|
"learning_rate": 8.188712059813135e-06, |
|
"loss": 4.6332, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.31530944625407165, |
|
"grad_norm": 5.858792200617635, |
|
"learning_rate": 8.178588897710189e-06, |
|
"loss": 4.58, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.31617806731813247, |
|
"grad_norm": 6.68100229149933, |
|
"learning_rate": 8.16844469809894e-06, |
|
"loss": 4.7159, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.31704668838219324, |
|
"grad_norm": 5.735283629069225, |
|
"learning_rate": 8.158279540651446e-06, |
|
"loss": 4.8563, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.31791530944625407, |
|
"grad_norm": 6.584386859485198, |
|
"learning_rate": 8.14809350520436e-06, |
|
"loss": 4.751, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.3187839305103149, |
|
"grad_norm": 6.582333473666524, |
|
"learning_rate": 8.137886671758317e-06, |
|
"loss": 4.7886, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.31965255157437567, |
|
"grad_norm": 6.4883303364815275, |
|
"learning_rate": 8.127659120477296e-06, |
|
"loss": 4.6657, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.3205211726384365, |
|
"grad_norm": 5.918831827629274, |
|
"learning_rate": 8.117410931687992e-06, |
|
"loss": 4.8691, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.32138979370249726, |
|
"grad_norm": 5.283964365819055, |
|
"learning_rate": 8.107142185879185e-06, |
|
"loss": 4.8074, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3222584147665581, |
|
"grad_norm": 3.8890066265932184, |
|
"learning_rate": 8.096852963701113e-06, |
|
"loss": 4.5405, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.3231270358306189, |
|
"grad_norm": 6.990269124593145, |
|
"learning_rate": 8.086543345964833e-06, |
|
"loss": 4.6027, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.3239956568946797, |
|
"grad_norm": 8.43721076775689, |
|
"learning_rate": 8.07621341364158e-06, |
|
"loss": 4.647, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.3248642779587405, |
|
"grad_norm": 5.58990373211778, |
|
"learning_rate": 8.065863247862153e-06, |
|
"loss": 4.6015, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.3257328990228013, |
|
"grad_norm": 6.97196709970691, |
|
"learning_rate": 8.05549292991625e-06, |
|
"loss": 4.6307, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3266015200868621, |
|
"grad_norm": 6.1054642976234215, |
|
"learning_rate": 8.045102541251855e-06, |
|
"loss": 4.5934, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.32747014115092293, |
|
"grad_norm": 5.762512151008954, |
|
"learning_rate": 8.034692163474576e-06, |
|
"loss": 4.5073, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.3283387622149837, |
|
"grad_norm": 7.3544384303060575, |
|
"learning_rate": 8.02426187834702e-06, |
|
"loss": 4.7555, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.32920738327904453, |
|
"grad_norm": 4.541115018669931, |
|
"learning_rate": 8.013811767788144e-06, |
|
"loss": 4.5251, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.3300760043431053, |
|
"grad_norm": 7.061819858205978, |
|
"learning_rate": 8.003341913872616e-06, |
|
"loss": 4.6566, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3309446254071661, |
|
"grad_norm": 5.949506061458709, |
|
"learning_rate": 7.992852398830164e-06, |
|
"loss": 4.6047, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.33181324647122695, |
|
"grad_norm": 8.186210627662106, |
|
"learning_rate": 7.982343305044932e-06, |
|
"loss": 4.5416, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.3326818675352877, |
|
"grad_norm": 5.5358466371451645, |
|
"learning_rate": 7.971814715054837e-06, |
|
"loss": 4.6173, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.33355048859934855, |
|
"grad_norm": 7.682761597446245, |
|
"learning_rate": 7.961266711550922e-06, |
|
"loss": 4.5791, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.3344191096634093, |
|
"grad_norm": 5.430465140999319, |
|
"learning_rate": 7.950699377376696e-06, |
|
"loss": 4.4514, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.33528773072747015, |
|
"grad_norm": 7.022253684816494, |
|
"learning_rate": 7.940112795527493e-06, |
|
"loss": 4.7673, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.33615635179153097, |
|
"grad_norm": 6.175962941485549, |
|
"learning_rate": 7.929507049149817e-06, |
|
"loss": 4.5196, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.33702497285559174, |
|
"grad_norm": 7.343612570596667, |
|
"learning_rate": 7.918882221540692e-06, |
|
"loss": 4.6791, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.33789359391965257, |
|
"grad_norm": 5.998391268508065, |
|
"learning_rate": 7.908238396147002e-06, |
|
"loss": 4.8548, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.33876221498371334, |
|
"grad_norm": 7.019440378415661, |
|
"learning_rate": 7.897575656564836e-06, |
|
"loss": 4.6303, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.33963083604777417, |
|
"grad_norm": 6.839076558244222, |
|
"learning_rate": 7.886894086538841e-06, |
|
"loss": 4.6871, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.34049945711183494, |
|
"grad_norm": 5.40567329714043, |
|
"learning_rate": 7.876193769961555e-06, |
|
"loss": 4.6996, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.34136807817589576, |
|
"grad_norm": 7.504268933018013, |
|
"learning_rate": 7.865474790872749e-06, |
|
"loss": 4.6356, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.3422366992399566, |
|
"grad_norm": 4.823115754529949, |
|
"learning_rate": 7.854737233458764e-06, |
|
"loss": 4.5891, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.34310532030401736, |
|
"grad_norm": 7.282830148401436, |
|
"learning_rate": 7.843981182051866e-06, |
|
"loss": 4.5814, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3439739413680782, |
|
"grad_norm": 7.366691458186102, |
|
"learning_rate": 7.83320672112956e-06, |
|
"loss": 4.6604, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.34484256243213895, |
|
"grad_norm": 4.92486295778556, |
|
"learning_rate": 7.822413935313947e-06, |
|
"loss": 4.6913, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.3457111834961998, |
|
"grad_norm": 8.210710382886061, |
|
"learning_rate": 7.811602909371044e-06, |
|
"loss": 4.5104, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.3465798045602606, |
|
"grad_norm": 5.177887473136741, |
|
"learning_rate": 7.800773728210133e-06, |
|
"loss": 4.7158, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.3474484256243214, |
|
"grad_norm": 6.985161948689458, |
|
"learning_rate": 7.789926476883079e-06, |
|
"loss": 4.5611, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3483170466883822, |
|
"grad_norm": 6.148668513601841, |
|
"learning_rate": 7.779061240583669e-06, |
|
"loss": 4.7723, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.349185667752443, |
|
"grad_norm": 5.763986287210963, |
|
"learning_rate": 7.768178104646953e-06, |
|
"loss": 4.6559, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.3500542888165038, |
|
"grad_norm": 5.7385002364185915, |
|
"learning_rate": 7.757277154548552e-06, |
|
"loss": 4.6866, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.3509229098805646, |
|
"grad_norm": 5.490491868927748, |
|
"learning_rate": 7.746358475904006e-06, |
|
"loss": 4.4739, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.3517915309446254, |
|
"grad_norm": 7.746007698882787, |
|
"learning_rate": 7.735422154468087e-06, |
|
"loss": 4.6623, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3526601520086862, |
|
"grad_norm": 5.140117986756119, |
|
"learning_rate": 7.724468276134143e-06, |
|
"loss": 4.5921, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.353528773072747, |
|
"grad_norm": 5.822784492855372, |
|
"learning_rate": 7.713496926933405e-06, |
|
"loss": 4.5497, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.3543973941368078, |
|
"grad_norm": 4.504836312173092, |
|
"learning_rate": 7.70250819303432e-06, |
|
"loss": 4.6658, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.35526601520086865, |
|
"grad_norm": 10.535092606649714, |
|
"learning_rate": 7.691502160741879e-06, |
|
"loss": 4.5037, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.3561346362649294, |
|
"grad_norm": 6.1030400833165555, |
|
"learning_rate": 7.680478916496927e-06, |
|
"loss": 4.7045, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.35700325732899024, |
|
"grad_norm": 7.459890595821586, |
|
"learning_rate": 7.669438546875495e-06, |
|
"loss": 4.7035, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.357871878393051, |
|
"grad_norm": 8.136016337416066, |
|
"learning_rate": 7.658381138588111e-06, |
|
"loss": 4.5822, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.35874049945711184, |
|
"grad_norm": 7.256847852406638, |
|
"learning_rate": 7.647306778479135e-06, |
|
"loss": 4.5594, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.35960912052117266, |
|
"grad_norm": 7.032905775099178, |
|
"learning_rate": 7.636215553526054e-06, |
|
"loss": 4.6578, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.36047774158523344, |
|
"grad_norm": 6.984223271346296, |
|
"learning_rate": 7.625107550838813e-06, |
|
"loss": 4.5949, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.36134636264929426, |
|
"grad_norm": 8.223177636534274, |
|
"learning_rate": 7.613982857659134e-06, |
|
"loss": 4.5468, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.36221498371335503, |
|
"grad_norm": 5.980657620361127, |
|
"learning_rate": 7.602841561359822e-06, |
|
"loss": 4.495, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.36308360477741586, |
|
"grad_norm": 6.056077053853974, |
|
"learning_rate": 7.591683749444077e-06, |
|
"loss": 4.7391, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.36395222584147663, |
|
"grad_norm": 7.884243252755605, |
|
"learning_rate": 7.5805095095448245e-06, |
|
"loss": 4.5782, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.36482084690553745, |
|
"grad_norm": 8.363070322855709, |
|
"learning_rate": 7.569318929424002e-06, |
|
"loss": 4.5914, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.3656894679695983, |
|
"grad_norm": 6.163289948356193, |
|
"learning_rate": 7.558112096971889e-06, |
|
"loss": 4.6895, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.36655808903365905, |
|
"grad_norm": 6.6531465448851135, |
|
"learning_rate": 7.5468891002064045e-06, |
|
"loss": 4.5072, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.3674267100977199, |
|
"grad_norm": 6.718432675278386, |
|
"learning_rate": 7.535650027272432e-06, |
|
"loss": 4.5402, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.36829533116178065, |
|
"grad_norm": 6.071906565126357, |
|
"learning_rate": 7.5243949664411035e-06, |
|
"loss": 4.6823, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.3691639522258415, |
|
"grad_norm": 7.151230840698435, |
|
"learning_rate": 7.5131240061091285e-06, |
|
"loss": 4.5005, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3700325732899023, |
|
"grad_norm": 5.338683591643838, |
|
"learning_rate": 7.501837234798084e-06, |
|
"loss": 4.5487, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.37090119435396307, |
|
"grad_norm": 6.10815615028733, |
|
"learning_rate": 7.490534741153733e-06, |
|
"loss": 4.6367, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.3717698154180239, |
|
"grad_norm": 6.115503279804057, |
|
"learning_rate": 7.47921661394531e-06, |
|
"loss": 4.5094, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.37263843648208467, |
|
"grad_norm": 5.269106785049928, |
|
"learning_rate": 7.46788294206485e-06, |
|
"loss": 4.5126, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.3735070575461455, |
|
"grad_norm": 7.458849192557889, |
|
"learning_rate": 7.4565338145264595e-06, |
|
"loss": 4.7316, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3743756786102063, |
|
"grad_norm": 4.724897099377871, |
|
"learning_rate": 7.445169320465645e-06, |
|
"loss": 4.7243, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.3752442996742671, |
|
"grad_norm": 7.229325419079616, |
|
"learning_rate": 7.433789549138592e-06, |
|
"loss": 4.4991, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.3761129207383279, |
|
"grad_norm": 6.540185537119028, |
|
"learning_rate": 7.42239458992148e-06, |
|
"loss": 4.4751, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3769815418023887, |
|
"grad_norm": 7.538652093884085, |
|
"learning_rate": 7.410984532309768e-06, |
|
"loss": 4.5819, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.3778501628664495, |
|
"grad_norm": 10.09025026929928, |
|
"learning_rate": 7.399559465917499e-06, |
|
"loss": 4.5811, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.37871878393051034, |
|
"grad_norm": 5.99150737185514, |
|
"learning_rate": 7.3881194804765975e-06, |
|
"loss": 4.6508, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.3795874049945711, |
|
"grad_norm": 10.294753301023693, |
|
"learning_rate": 7.376664665836156e-06, |
|
"loss": 4.6694, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.38045602605863194, |
|
"grad_norm": 10.09163312709563, |
|
"learning_rate": 7.3651951119617415e-06, |
|
"loss": 4.515, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.3813246471226927, |
|
"grad_norm": 8.660200561326562, |
|
"learning_rate": 7.353710908934672e-06, |
|
"loss": 4.7109, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.38219326818675353, |
|
"grad_norm": 5.980585172331498, |
|
"learning_rate": 7.342212146951329e-06, |
|
"loss": 4.62, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.38306188925081436, |
|
"grad_norm": 7.2092542635747705, |
|
"learning_rate": 7.3306989163224365e-06, |
|
"loss": 4.6266, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.38393051031487513, |
|
"grad_norm": 8.245957827229953, |
|
"learning_rate": 7.319171307472355e-06, |
|
"loss": 4.5792, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.38479913137893595, |
|
"grad_norm": 4.686151630126687, |
|
"learning_rate": 7.307629410938364e-06, |
|
"loss": 4.6665, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.3856677524429967, |
|
"grad_norm": 7.408155624716384, |
|
"learning_rate": 7.296073317369967e-06, |
|
"loss": 4.3742, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.38653637350705755, |
|
"grad_norm": 7.497505710273664, |
|
"learning_rate": 7.284503117528167e-06, |
|
"loss": 4.5901, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.3874049945711184, |
|
"grad_norm": 4.224052938744046, |
|
"learning_rate": 7.272918902284758e-06, |
|
"loss": 4.625, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.38827361563517915, |
|
"grad_norm": 9.130148186812361, |
|
"learning_rate": 7.261320762621605e-06, |
|
"loss": 4.5953, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.38914223669924, |
|
"grad_norm": 5.04037207826654, |
|
"learning_rate": 7.249708789629944e-06, |
|
"loss": 4.3398, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.39001085776330074, |
|
"grad_norm": 10.934078227299317, |
|
"learning_rate": 7.2380830745096474e-06, |
|
"loss": 4.8164, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.39087947882736157, |
|
"grad_norm": 5.288203351054606, |
|
"learning_rate": 7.226443708568525e-06, |
|
"loss": 4.6176, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.39174809989142234, |
|
"grad_norm": 13.494419416996744, |
|
"learning_rate": 7.214790783221596e-06, |
|
"loss": 4.4502, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.39261672095548317, |
|
"grad_norm": 5.947583732503299, |
|
"learning_rate": 7.2031243899903755e-06, |
|
"loss": 4.6519, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.393485342019544, |
|
"grad_norm": 13.629266769082431, |
|
"learning_rate": 7.191444620502154e-06, |
|
"loss": 4.6178, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.39435396308360476, |
|
"grad_norm": 8.435112519451463, |
|
"learning_rate": 7.17975156648928e-06, |
|
"loss": 4.3477, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.3952225841476656, |
|
"grad_norm": 13.097395961934241, |
|
"learning_rate": 7.168045319788436e-06, |
|
"loss": 4.5158, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.39609120521172636, |
|
"grad_norm": 9.326564191975844, |
|
"learning_rate": 7.1563259723399204e-06, |
|
"loss": 4.6681, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.3969598262757872, |
|
"grad_norm": 12.31102530036286, |
|
"learning_rate": 7.144593616186925e-06, |
|
"loss": 4.4425, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.397828447339848, |
|
"grad_norm": 9.61855296186821, |
|
"learning_rate": 7.13284834347481e-06, |
|
"loss": 4.755, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.3986970684039088, |
|
"grad_norm": 11.132702885829065, |
|
"learning_rate": 7.121090246450381e-06, |
|
"loss": 4.6984, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.3995656894679696, |
|
"grad_norm": 9.438660704133007, |
|
"learning_rate": 7.1093194174611665e-06, |
|
"loss": 4.5854, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.4004343105320304, |
|
"grad_norm": 10.460987068330683, |
|
"learning_rate": 7.0975359489546914e-06, |
|
"loss": 4.6308, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.4013029315960912, |
|
"grad_norm": 8.135876347998378, |
|
"learning_rate": 7.0857399334777525e-06, |
|
"loss": 4.5529, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.40217155266015203, |
|
"grad_norm": 12.65605978727452, |
|
"learning_rate": 7.073931463675685e-06, |
|
"loss": 4.5816, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.4030401737242128, |
|
"grad_norm": 10.229434009840299, |
|
"learning_rate": 7.062110632291641e-06, |
|
"loss": 4.5845, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.40390879478827363, |
|
"grad_norm": 9.117061503179459, |
|
"learning_rate": 7.0502775321658655e-06, |
|
"loss": 4.5246, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.4047774158523344, |
|
"grad_norm": 8.569940050056474, |
|
"learning_rate": 7.038432256234956e-06, |
|
"loss": 4.5735, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.4056460369163952, |
|
"grad_norm": 7.752296760070429, |
|
"learning_rate": 7.026574897531137e-06, |
|
"loss": 4.3389, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.40651465798045605, |
|
"grad_norm": 6.650253151223029, |
|
"learning_rate": 7.014705549181537e-06, |
|
"loss": 4.559, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.4073832790445168, |
|
"grad_norm": 6.044551087774109, |
|
"learning_rate": 7.0028243044074425e-06, |
|
"loss": 4.565, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.40825190010857765, |
|
"grad_norm": 7.013354857314451, |
|
"learning_rate": 6.990931256523583e-06, |
|
"loss": 4.4699, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4091205211726384, |
|
"grad_norm": 7.694869076287624, |
|
"learning_rate": 6.97902649893738e-06, |
|
"loss": 4.5624, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.40998914223669924, |
|
"grad_norm": 4.8526449692385905, |
|
"learning_rate": 6.96711012514823e-06, |
|
"loss": 4.5316, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.41085776330076007, |
|
"grad_norm": 6.982180222415533, |
|
"learning_rate": 6.955182228746757e-06, |
|
"loss": 4.4836, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.41172638436482084, |
|
"grad_norm": 5.126018865451303, |
|
"learning_rate": 6.943242903414087e-06, |
|
"loss": 4.6034, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.41259500542888167, |
|
"grad_norm": 5.649991787266421, |
|
"learning_rate": 6.9312922429211065e-06, |
|
"loss": 4.576, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.41346362649294244, |
|
"grad_norm": 6.484179499960972, |
|
"learning_rate": 6.9193303411277265e-06, |
|
"loss": 4.4123, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.41433224755700326, |
|
"grad_norm": 6.697110498760649, |
|
"learning_rate": 6.907357291982148e-06, |
|
"loss": 4.5219, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.41520086862106403, |
|
"grad_norm": 5.161776605561941, |
|
"learning_rate": 6.895373189520124e-06, |
|
"loss": 4.6172, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.41606948968512486, |
|
"grad_norm": 6.744775916139352, |
|
"learning_rate": 6.883378127864218e-06, |
|
"loss": 4.4805, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.4169381107491857, |
|
"grad_norm": 5.164686600631932, |
|
"learning_rate": 6.871372201223068e-06, |
|
"loss": 4.5713, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.41780673181324646, |
|
"grad_norm": 6.684936475739929, |
|
"learning_rate": 6.859355503890643e-06, |
|
"loss": 4.5636, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.4186753528773073, |
|
"grad_norm": 4.963603500277838, |
|
"learning_rate": 6.847328130245506e-06, |
|
"loss": 4.4979, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.41954397394136805, |
|
"grad_norm": 6.455524904363139, |
|
"learning_rate": 6.83529017475007e-06, |
|
"loss": 4.5704, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.4204125950054289, |
|
"grad_norm": 5.327238544989892, |
|
"learning_rate": 6.8232417319498585e-06, |
|
"loss": 4.5729, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.4212812160694897, |
|
"grad_norm": 7.5408057979505525, |
|
"learning_rate": 6.811182896472764e-06, |
|
"loss": 4.5542, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.4221498371335505, |
|
"grad_norm": 5.148215274061414, |
|
"learning_rate": 6.799113763028296e-06, |
|
"loss": 4.5727, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.4230184581976113, |
|
"grad_norm": 5.126262165864149, |
|
"learning_rate": 6.78703442640685e-06, |
|
"loss": 4.392, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.4238870792616721, |
|
"grad_norm": 6.5704973294653986, |
|
"learning_rate": 6.774944981478953e-06, |
|
"loss": 4.7496, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.4247557003257329, |
|
"grad_norm": 7.6270736279053715, |
|
"learning_rate": 6.762845523194527e-06, |
|
"loss": 4.4043, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.4256243213897937, |
|
"grad_norm": 4.7832965680424975, |
|
"learning_rate": 6.750736146582129e-06, |
|
"loss": 4.4942, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.4264929424538545, |
|
"grad_norm": 8.637265182760746, |
|
"learning_rate": 6.738616946748229e-06, |
|
"loss": 4.6536, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.4273615635179153, |
|
"grad_norm": 5.5509816327572, |
|
"learning_rate": 6.726488018876431e-06, |
|
"loss": 4.5646, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.4282301845819761, |
|
"grad_norm": 7.976660262443447, |
|
"learning_rate": 6.7143494582267565e-06, |
|
"loss": 4.3377, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.4290988056460369, |
|
"grad_norm": 5.8635253190574925, |
|
"learning_rate": 6.702201360134874e-06, |
|
"loss": 4.6456, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.42996742671009774, |
|
"grad_norm": 6.191768041893744, |
|
"learning_rate": 6.690043820011362e-06, |
|
"loss": 4.4138, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.4308360477741585, |
|
"grad_norm": 6.21848167204258, |
|
"learning_rate": 6.677876933340952e-06, |
|
"loss": 4.5893, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.43170466883821934, |
|
"grad_norm": 6.974213096926323, |
|
"learning_rate": 6.665700795681795e-06, |
|
"loss": 4.488, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.4325732899022801, |
|
"grad_norm": 4.90870178244335, |
|
"learning_rate": 6.65351550266468e-06, |
|
"loss": 4.4874, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.43344191096634094, |
|
"grad_norm": 8.263746621881054, |
|
"learning_rate": 6.64132114999232e-06, |
|
"loss": 4.3805, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.43431053203040176, |
|
"grad_norm": 7.323957132091585, |
|
"learning_rate": 6.6291178334385695e-06, |
|
"loss": 4.4165, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.43517915309446253, |
|
"grad_norm": 6.480701522877974, |
|
"learning_rate": 6.616905648847693e-06, |
|
"loss": 4.3516, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.43604777415852336, |
|
"grad_norm": 7.079757216369442, |
|
"learning_rate": 6.604684692133597e-06, |
|
"loss": 4.6046, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.43691639522258413, |
|
"grad_norm": 7.375345908223872, |
|
"learning_rate": 6.5924550592790894e-06, |
|
"loss": 4.5464, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.43778501628664496, |
|
"grad_norm": 7.222616053445717, |
|
"learning_rate": 6.580216846335118e-06, |
|
"loss": 4.5919, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.4386536373507057, |
|
"grad_norm": 4.689789965685166, |
|
"learning_rate": 6.567970149420018e-06, |
|
"loss": 4.3752, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.43952225841476655, |
|
"grad_norm": 5.714230313496368, |
|
"learning_rate": 6.555715064718756e-06, |
|
"loss": 4.5501, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.4403908794788274, |
|
"grad_norm": 7.038923018037996, |
|
"learning_rate": 6.543451688482182e-06, |
|
"loss": 4.4103, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.44125950054288815, |
|
"grad_norm": 7.943178319434065, |
|
"learning_rate": 6.531180117026258e-06, |
|
"loss": 4.5398, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.442128121606949, |
|
"grad_norm": 5.455050769628667, |
|
"learning_rate": 6.518900446731319e-06, |
|
"loss": 4.5569, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.44299674267100975, |
|
"grad_norm": 9.260245349881815, |
|
"learning_rate": 6.506612774041302e-06, |
|
"loss": 4.6123, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4438653637350706, |
|
"grad_norm": 8.258092780806443, |
|
"learning_rate": 6.494317195462999e-06, |
|
"loss": 4.4937, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.4447339847991314, |
|
"grad_norm": 7.575744472833341, |
|
"learning_rate": 6.482013807565292e-06, |
|
"loss": 4.479, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.44560260586319217, |
|
"grad_norm": 6.848928494852384, |
|
"learning_rate": 6.469702706978397e-06, |
|
"loss": 4.6304, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.446471226927253, |
|
"grad_norm": 6.429893016072357, |
|
"learning_rate": 6.457383990393105e-06, |
|
"loss": 4.4752, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.44733984799131377, |
|
"grad_norm": 6.645971348847954, |
|
"learning_rate": 6.445057754560025e-06, |
|
"loss": 4.4823, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.4482084690553746, |
|
"grad_norm": 7.649197834317788, |
|
"learning_rate": 6.432724096288818e-06, |
|
"loss": 4.4119, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.4490770901194354, |
|
"grad_norm": 4.915321016306434, |
|
"learning_rate": 6.420383112447446e-06, |
|
"loss": 4.5927, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.4499457111834962, |
|
"grad_norm": 11.680853631510265, |
|
"learning_rate": 6.408034899961398e-06, |
|
"loss": 4.6962, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.450814332247557, |
|
"grad_norm": 8.30571918662033, |
|
"learning_rate": 6.395679555812942e-06, |
|
"loss": 4.4227, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.4516829533116178, |
|
"grad_norm": 9.811824442503674, |
|
"learning_rate": 6.383317177040357e-06, |
|
"loss": 4.5434, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4525515743756786, |
|
"grad_norm": 8.160240990996138, |
|
"learning_rate": 6.370947860737173e-06, |
|
"loss": 4.5247, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.45342019543973944, |
|
"grad_norm": 9.499135895845495, |
|
"learning_rate": 6.358571704051401e-06, |
|
"loss": 4.5605, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.4542888165038002, |
|
"grad_norm": 8.5708728035736, |
|
"learning_rate": 6.346188804184782e-06, |
|
"loss": 4.4378, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.45515743756786103, |
|
"grad_norm": 8.020265826405886, |
|
"learning_rate": 6.333799258392015e-06, |
|
"loss": 4.3991, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.4560260586319218, |
|
"grad_norm": 8.224933821515378, |
|
"learning_rate": 6.3214031639799975e-06, |
|
"loss": 4.5, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.45689467969598263, |
|
"grad_norm": 7.476113586399829, |
|
"learning_rate": 6.309000618307058e-06, |
|
"loss": 4.5767, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.45776330076004346, |
|
"grad_norm": 7.324699664493708, |
|
"learning_rate": 6.296591718782193e-06, |
|
"loss": 4.6139, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.4586319218241042, |
|
"grad_norm": 8.277908852422534, |
|
"learning_rate": 6.284176562864303e-06, |
|
"loss": 4.4586, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.45950054288816505, |
|
"grad_norm": 6.4211734472674005, |
|
"learning_rate": 6.271755248061425e-06, |
|
"loss": 4.4396, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.4603691639522258, |
|
"grad_norm": 8.333565942506247, |
|
"learning_rate": 6.259327871929968e-06, |
|
"loss": 4.4319, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.46123778501628665, |
|
"grad_norm": 5.765983896353204, |
|
"learning_rate": 6.246894532073945e-06, |
|
"loss": 4.5144, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.4621064060803475, |
|
"grad_norm": 8.432302157463148, |
|
"learning_rate": 6.234455326144208e-06, |
|
"loss": 4.4091, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.46297502714440825, |
|
"grad_norm": 6.536784604924449, |
|
"learning_rate": 6.222010351837684e-06, |
|
"loss": 4.438, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.4638436482084691, |
|
"grad_norm": 7.775568222041834, |
|
"learning_rate": 6.209559706896603e-06, |
|
"loss": 4.4694, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.46471226927252984, |
|
"grad_norm": 7.053360240600179, |
|
"learning_rate": 6.197103489107726e-06, |
|
"loss": 4.5039, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.46558089033659067, |
|
"grad_norm": 6.449780887258137, |
|
"learning_rate": 6.184641796301596e-06, |
|
"loss": 4.4826, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.46644951140065144, |
|
"grad_norm": 6.784307145245734, |
|
"learning_rate": 6.172174726351743e-06, |
|
"loss": 4.6193, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.46731813246471227, |
|
"grad_norm": 6.2467877899636575, |
|
"learning_rate": 6.159702377173935e-06, |
|
"loss": 4.4363, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.4681867535287731, |
|
"grad_norm": 7.5393857303754155, |
|
"learning_rate": 6.147224846725402e-06, |
|
"loss": 4.4544, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.46905537459283386, |
|
"grad_norm": 6.6525514703420585, |
|
"learning_rate": 6.134742233004073e-06, |
|
"loss": 4.5408, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4699239956568947, |
|
"grad_norm": 7.973280585624998, |
|
"learning_rate": 6.122254634047787e-06, |
|
"loss": 4.5086, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.47079261672095546, |
|
"grad_norm": 6.6800439107137635, |
|
"learning_rate": 6.109762147933553e-06, |
|
"loss": 4.3572, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.4716612377850163, |
|
"grad_norm": 6.076538888634451, |
|
"learning_rate": 6.097264872776749e-06, |
|
"loss": 4.5751, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.4725298588490771, |
|
"grad_norm": 8.835225489595787, |
|
"learning_rate": 6.084762906730379e-06, |
|
"loss": 4.5004, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.4733984799131379, |
|
"grad_norm": 4.829662247046767, |
|
"learning_rate": 6.0722563479842764e-06, |
|
"loss": 4.3311, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.4742671009771987, |
|
"grad_norm": 6.986298812339827, |
|
"learning_rate": 6.059745294764359e-06, |
|
"loss": 4.365, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.4751357220412595, |
|
"grad_norm": 7.574390765696185, |
|
"learning_rate": 6.04722984533183e-06, |
|
"loss": 4.4365, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.4760043431053203, |
|
"grad_norm": 5.7402675424404395, |
|
"learning_rate": 6.034710097982432e-06, |
|
"loss": 4.2018, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.47687296416938113, |
|
"grad_norm": 7.703977261086262, |
|
"learning_rate": 6.022186151045652e-06, |
|
"loss": 4.4048, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.4777415852334419, |
|
"grad_norm": 6.289071645181934, |
|
"learning_rate": 6.009658102883974e-06, |
|
"loss": 4.5359, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4786102062975027, |
|
"grad_norm": 6.972078426295843, |
|
"learning_rate": 5.997126051892082e-06, |
|
"loss": 4.4349, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.4794788273615635, |
|
"grad_norm": 7.66830398869564, |
|
"learning_rate": 5.984590096496099e-06, |
|
"loss": 4.5563, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.4803474484256243, |
|
"grad_norm": 6.0227616340295365, |
|
"learning_rate": 5.972050335152819e-06, |
|
"loss": 4.2982, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.48121606948968515, |
|
"grad_norm": 8.678277229194649, |
|
"learning_rate": 5.959506866348924e-06, |
|
"loss": 4.4519, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.4820846905537459, |
|
"grad_norm": 4.787842386763273, |
|
"learning_rate": 5.94695978860021e-06, |
|
"loss": 4.438, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.48295331161780675, |
|
"grad_norm": 10.597087001950817, |
|
"learning_rate": 5.934409200450828e-06, |
|
"loss": 4.3361, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.4838219326818675, |
|
"grad_norm": 6.769261685937347, |
|
"learning_rate": 5.9218552004724895e-06, |
|
"loss": 4.2451, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.48469055374592834, |
|
"grad_norm": 8.774187421231344, |
|
"learning_rate": 5.909297887263708e-06, |
|
"loss": 4.4513, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.48555917480998917, |
|
"grad_norm": 6.831684880334275, |
|
"learning_rate": 5.896737359449015e-06, |
|
"loss": 4.5108, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.48642779587404994, |
|
"grad_norm": 7.414601371689213, |
|
"learning_rate": 5.884173715678193e-06, |
|
"loss": 4.4822, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.48729641693811077, |
|
"grad_norm": 6.332769305151126, |
|
"learning_rate": 5.871607054625497e-06, |
|
"loss": 4.3, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.48816503800217154, |
|
"grad_norm": 7.518053075962911, |
|
"learning_rate": 5.859037474988875e-06, |
|
"loss": 4.2958, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.48903365906623236, |
|
"grad_norm": 7.791263799758281, |
|
"learning_rate": 5.846465075489202e-06, |
|
"loss": 4.6028, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.48990228013029313, |
|
"grad_norm": 7.99797189120995, |
|
"learning_rate": 5.8338899548695004e-06, |
|
"loss": 4.362, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.49077090119435396, |
|
"grad_norm": 7.553380214852644, |
|
"learning_rate": 5.821312211894159e-06, |
|
"loss": 4.5026, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.4916395222584148, |
|
"grad_norm": 5.392709057685767, |
|
"learning_rate": 5.808731945348168e-06, |
|
"loss": 4.3386, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.49250814332247556, |
|
"grad_norm": 7.863510875655094, |
|
"learning_rate": 5.7961492540363365e-06, |
|
"loss": 4.564, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.4933767643865364, |
|
"grad_norm": 7.618164313464631, |
|
"learning_rate": 5.783564236782514e-06, |
|
"loss": 4.4442, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.49424538545059715, |
|
"grad_norm": 8.728366265826342, |
|
"learning_rate": 5.770976992428821e-06, |
|
"loss": 4.4746, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.495114006514658, |
|
"grad_norm": 7.8924314705792025, |
|
"learning_rate": 5.758387619834872e-06, |
|
"loss": 4.4972, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4959826275787188, |
|
"grad_norm": 6.344676137880645, |
|
"learning_rate": 5.74579621787699e-06, |
|
"loss": 4.4232, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.4968512486427796, |
|
"grad_norm": 5.552402668916067, |
|
"learning_rate": 5.73320288544744e-06, |
|
"loss": 4.46, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.4977198697068404, |
|
"grad_norm": 7.611624637980265, |
|
"learning_rate": 5.720607721453651e-06, |
|
"loss": 4.5376, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.49858849077090117, |
|
"grad_norm": 7.077190569309273, |
|
"learning_rate": 5.708010824817432e-06, |
|
"loss": 4.3802, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.499457111834962, |
|
"grad_norm": 6.943034504994782, |
|
"learning_rate": 5.695412294474208e-06, |
|
"loss": 4.4052, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5003257328990228, |
|
"grad_norm": 7.847100098403116, |
|
"learning_rate": 5.682812229372225e-06, |
|
"loss": 4.3222, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.5011943539630836, |
|
"grad_norm": 4.926170240297532, |
|
"learning_rate": 5.67021072847179e-06, |
|
"loss": 4.3407, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.5020629750271444, |
|
"grad_norm": 4.81827513225177, |
|
"learning_rate": 5.657607890744485e-06, |
|
"loss": 4.5482, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.5029315960912052, |
|
"grad_norm": 9.69284798196995, |
|
"learning_rate": 5.64500381517239e-06, |
|
"loss": 4.5489, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.503800217155266, |
|
"grad_norm": 6.4397567914279525, |
|
"learning_rate": 5.632398600747307e-06, |
|
"loss": 4.5095, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5046688382193268, |
|
"grad_norm": 7.564842572267876, |
|
"learning_rate": 5.619792346469988e-06, |
|
"loss": 4.3857, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.5055374592833877, |
|
"grad_norm": 5.717273086517985, |
|
"learning_rate": 5.607185151349342e-06, |
|
"loss": 4.601, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.5064060803474484, |
|
"grad_norm": 5.765808842291691, |
|
"learning_rate": 5.594577114401677e-06, |
|
"loss": 4.4892, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.5072747014115092, |
|
"grad_norm": 8.975257143861999, |
|
"learning_rate": 5.581968334649906e-06, |
|
"loss": 4.3199, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.50814332247557, |
|
"grad_norm": 5.004707622689579, |
|
"learning_rate": 5.56935891112278e-06, |
|
"loss": 4.5351, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5090119435396309, |
|
"grad_norm": 12.075805744578952, |
|
"learning_rate": 5.5567489428541035e-06, |
|
"loss": 4.5282, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.5098805646036917, |
|
"grad_norm": 7.78389463909419, |
|
"learning_rate": 5.54413852888196e-06, |
|
"loss": 4.3581, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.5107491856677524, |
|
"grad_norm": 7.658427280297901, |
|
"learning_rate": 5.531527768247935e-06, |
|
"loss": 4.2974, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.5116178067318132, |
|
"grad_norm": 7.231975397294993, |
|
"learning_rate": 5.518916759996337e-06, |
|
"loss": 4.3415, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.512486427795874, |
|
"grad_norm": 6.751836330853958, |
|
"learning_rate": 5.506305603173414e-06, |
|
"loss": 4.3004, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5133550488599349, |
|
"grad_norm": 8.355115868210659, |
|
"learning_rate": 5.493694396826589e-06, |
|
"loss": 4.4837, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.5142236699239957, |
|
"grad_norm": 6.193051769061237, |
|
"learning_rate": 5.481083240003665e-06, |
|
"loss": 4.4574, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.5150922909880564, |
|
"grad_norm": 8.721016177095263, |
|
"learning_rate": 5.468472231752065e-06, |
|
"loss": 4.5605, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.5159609120521172, |
|
"grad_norm": 8.690619765588671, |
|
"learning_rate": 5.455861471118041e-06, |
|
"loss": 4.4022, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.5168295331161781, |
|
"grad_norm": 6.149157342377077, |
|
"learning_rate": 5.443251057145899e-06, |
|
"loss": 4.3339, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.5176981541802389, |
|
"grad_norm": 9.505811782297348, |
|
"learning_rate": 5.430641088877221e-06, |
|
"loss": 4.2946, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.5185667752442997, |
|
"grad_norm": 7.387109532510519, |
|
"learning_rate": 5.418031665350096e-06, |
|
"loss": 4.4647, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.5194353963083604, |
|
"grad_norm": 8.365180622825434, |
|
"learning_rate": 5.405422885598324e-06, |
|
"loss": 4.376, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.5203040173724213, |
|
"grad_norm": 7.489996252960465, |
|
"learning_rate": 5.3928148486506584e-06, |
|
"loss": 4.452, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.5211726384364821, |
|
"grad_norm": 7.038197518326514, |
|
"learning_rate": 5.380207653530014e-06, |
|
"loss": 4.433, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5220412595005429, |
|
"grad_norm": 6.5440132590329725, |
|
"learning_rate": 5.367601399252694e-06, |
|
"loss": 4.339, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.5229098805646037, |
|
"grad_norm": 7.590698363002852, |
|
"learning_rate": 5.354996184827612e-06, |
|
"loss": 4.5249, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.5237785016286645, |
|
"grad_norm": 5.330185549680669, |
|
"learning_rate": 5.3423921092555184e-06, |
|
"loss": 4.2862, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.5246471226927253, |
|
"grad_norm": 11.75232584431798, |
|
"learning_rate": 5.329789271528212e-06, |
|
"loss": 4.5025, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.5255157437567861, |
|
"grad_norm": 7.036744842599782, |
|
"learning_rate": 5.3171877706277785e-06, |
|
"loss": 4.5778, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.5263843648208469, |
|
"grad_norm": 14.767373768502868, |
|
"learning_rate": 5.304587705525795e-06, |
|
"loss": 4.6429, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.5272529858849077, |
|
"grad_norm": 13.155922558039405, |
|
"learning_rate": 5.291989175182569e-06, |
|
"loss": 4.308, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.5281216069489685, |
|
"grad_norm": 10.112650168109823, |
|
"learning_rate": 5.2793922785463515e-06, |
|
"loss": 4.484, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.5289902280130293, |
|
"grad_norm": 11.115807165127777, |
|
"learning_rate": 5.266797114552562e-06, |
|
"loss": 4.2531, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.5298588490770901, |
|
"grad_norm": 8.789007652727864, |
|
"learning_rate": 5.254203782123013e-06, |
|
"loss": 4.4873, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.530727470141151, |
|
"grad_norm": 8.584208445591985, |
|
"learning_rate": 5.241612380165131e-06, |
|
"loss": 4.474, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.5315960912052117, |
|
"grad_norm": 8.322437053520234, |
|
"learning_rate": 5.229023007571179e-06, |
|
"loss": 4.3145, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.5324647122692725, |
|
"grad_norm": 7.166099901687578, |
|
"learning_rate": 5.216435763217487e-06, |
|
"loss": 4.3139, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.5333333333333333, |
|
"grad_norm": 10.11706910820565, |
|
"learning_rate": 5.203850745963666e-06, |
|
"loss": 4.4996, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.5342019543973942, |
|
"grad_norm": 7.536732454472386, |
|
"learning_rate": 5.191268054651833e-06, |
|
"loss": 4.4032, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.535070575461455, |
|
"grad_norm": 9.963574957880162, |
|
"learning_rate": 5.178687788105842e-06, |
|
"loss": 4.5332, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.5359391965255157, |
|
"grad_norm": 7.28030489177068, |
|
"learning_rate": 5.166110045130503e-06, |
|
"loss": 4.4073, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.5368078175895765, |
|
"grad_norm": 9.51169465858539, |
|
"learning_rate": 5.153534924510799e-06, |
|
"loss": 4.3795, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.5376764386536373, |
|
"grad_norm": 6.905433659455053, |
|
"learning_rate": 5.1409625250111265e-06, |
|
"loss": 4.4542, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.5385450597176982, |
|
"grad_norm": 9.924412663417344, |
|
"learning_rate": 5.1283929453745055e-06, |
|
"loss": 4.2425, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.539413680781759, |
|
"grad_norm": 7.187624451476708, |
|
"learning_rate": 5.1158262843218076e-06, |
|
"loss": 4.509, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.5402823018458197, |
|
"grad_norm": 7.551854215214278, |
|
"learning_rate": 5.103262640550986e-06, |
|
"loss": 4.4906, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.5411509229098805, |
|
"grad_norm": 7.066809081049507, |
|
"learning_rate": 5.090702112736295e-06, |
|
"loss": 4.5479, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.5420195439739414, |
|
"grad_norm": 6.869001279033655, |
|
"learning_rate": 5.078144799527513e-06, |
|
"loss": 4.335, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.5428881650380022, |
|
"grad_norm": 6.184389891480793, |
|
"learning_rate": 5.0655907995491726e-06, |
|
"loss": 4.4417, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.543756786102063, |
|
"grad_norm": 5.725304760538053, |
|
"learning_rate": 5.053040211399792e-06, |
|
"loss": 4.4285, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.5446254071661237, |
|
"grad_norm": 6.858465748404722, |
|
"learning_rate": 5.0404931336510785e-06, |
|
"loss": 4.5386, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.5454940282301846, |
|
"grad_norm": 5.26607902680547, |
|
"learning_rate": 5.027949664847182e-06, |
|
"loss": 4.3599, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.5463626492942454, |
|
"grad_norm": 7.76854318501132, |
|
"learning_rate": 5.015409903503903e-06, |
|
"loss": 4.4496, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.5472312703583062, |
|
"grad_norm": 6.476156741121861, |
|
"learning_rate": 5.00287394810792e-06, |
|
"loss": 4.5308, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.548099891422367, |
|
"grad_norm": 5.759923281782965, |
|
"learning_rate": 4.9903418971160276e-06, |
|
"loss": 4.3798, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.5489685124864278, |
|
"grad_norm": 7.291747973328974, |
|
"learning_rate": 4.977813848954349e-06, |
|
"loss": 4.4205, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.5498371335504886, |
|
"grad_norm": 6.271882053028793, |
|
"learning_rate": 4.9652899020175706e-06, |
|
"loss": 4.4581, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.5507057546145494, |
|
"grad_norm": 7.284670931214164, |
|
"learning_rate": 4.952770154668173e-06, |
|
"loss": 4.5707, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.5515743756786102, |
|
"grad_norm": 4.328191558276064, |
|
"learning_rate": 4.940254705235643e-06, |
|
"loss": 4.5398, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.552442996742671, |
|
"grad_norm": 5.190845151993087, |
|
"learning_rate": 4.927743652015723e-06, |
|
"loss": 4.4066, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.5533116178067318, |
|
"grad_norm": 5.037059057279867, |
|
"learning_rate": 4.915237093269624e-06, |
|
"loss": 4.4995, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.5541802388707926, |
|
"grad_norm": 5.593175005468861, |
|
"learning_rate": 4.902735127223251e-06, |
|
"loss": 4.4209, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.5550488599348534, |
|
"grad_norm": 7.109606325525756, |
|
"learning_rate": 4.890237852066449e-06, |
|
"loss": 4.4592, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.5559174809989142, |
|
"grad_norm": 7.103955309366861, |
|
"learning_rate": 4.877745365952214e-06, |
|
"loss": 4.3799, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5567861020629751, |
|
"grad_norm": 6.9509815611789785, |
|
"learning_rate": 4.865257766995929e-06, |
|
"loss": 4.4195, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.5576547231270358, |
|
"grad_norm": 5.206700465162741, |
|
"learning_rate": 4.852775153274597e-06, |
|
"loss": 4.4101, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.5585233441910966, |
|
"grad_norm": 9.493231366877055, |
|
"learning_rate": 4.8402976228260665e-06, |
|
"loss": 4.4044, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.5593919652551574, |
|
"grad_norm": 7.354474990771557, |
|
"learning_rate": 4.827825273648259e-06, |
|
"loss": 4.4041, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.5602605863192183, |
|
"grad_norm": 7.477143176038921, |
|
"learning_rate": 4.8153582036984055e-06, |
|
"loss": 4.2882, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5611292073832791, |
|
"grad_norm": 6.891158599557778, |
|
"learning_rate": 4.802896510892274e-06, |
|
"loss": 4.4556, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.5619978284473398, |
|
"grad_norm": 6.181299599357454, |
|
"learning_rate": 4.790440293103399e-06, |
|
"loss": 4.5159, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.5628664495114006, |
|
"grad_norm": 6.219602356707123, |
|
"learning_rate": 4.7779896481623165e-06, |
|
"loss": 4.305, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.5637350705754615, |
|
"grad_norm": 7.689806502498994, |
|
"learning_rate": 4.765544673855793e-06, |
|
"loss": 4.588, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.5646036916395223, |
|
"grad_norm": 5.222351629918664, |
|
"learning_rate": 4.753105467926058e-06, |
|
"loss": 4.5114, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5654723127035831, |
|
"grad_norm": 6.0422096833801255, |
|
"learning_rate": 4.740672128070033e-06, |
|
"loss": 4.4576, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.5663409337676438, |
|
"grad_norm": 6.35521507705001, |
|
"learning_rate": 4.728244751938576e-06, |
|
"loss": 4.2865, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.5672095548317047, |
|
"grad_norm": 5.486777370518167, |
|
"learning_rate": 4.715823437135698e-06, |
|
"loss": 4.3724, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.5680781758957655, |
|
"grad_norm": 6.005852070933836, |
|
"learning_rate": 4.703408281217808e-06, |
|
"loss": 4.3402, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.5689467969598263, |
|
"grad_norm": 5.5827526578094595, |
|
"learning_rate": 4.690999381692943e-06, |
|
"loss": 4.38, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.5698154180238871, |
|
"grad_norm": 4.834434200631698, |
|
"learning_rate": 4.678596836020003e-06, |
|
"loss": 4.3628, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.5706840390879478, |
|
"grad_norm": 5.54557315267325, |
|
"learning_rate": 4.666200741607986e-06, |
|
"loss": 4.423, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.5715526601520087, |
|
"grad_norm": 5.839997452850654, |
|
"learning_rate": 4.6538111958152195e-06, |
|
"loss": 4.4357, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.5724212812160695, |
|
"grad_norm": 5.117404141092577, |
|
"learning_rate": 4.6414282959486015e-06, |
|
"loss": 4.2396, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.5732899022801303, |
|
"grad_norm": 4.960346982680198, |
|
"learning_rate": 4.62905213926283e-06, |
|
"loss": 4.299, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5741585233441912, |
|
"grad_norm": 5.666130352112717, |
|
"learning_rate": 4.616682822959644e-06, |
|
"loss": 4.3401, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.5750271444082519, |
|
"grad_norm": 8.838421793842883, |
|
"learning_rate": 4.604320444187058e-06, |
|
"loss": 4.5139, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.5758957654723127, |
|
"grad_norm": 6.818030373290299, |
|
"learning_rate": 4.591965100038604e-06, |
|
"loss": 4.3741, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.5767643865363735, |
|
"grad_norm": 5.238826628516235, |
|
"learning_rate": 4.579616887552556e-06, |
|
"loss": 4.3466, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.5776330076004343, |
|
"grad_norm": 6.774064034828669, |
|
"learning_rate": 4.567275903711182e-06, |
|
"loss": 4.5341, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5785016286644951, |
|
"grad_norm": 10.5839617130325, |
|
"learning_rate": 4.554942245439977e-06, |
|
"loss": 4.55, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.5793702497285559, |
|
"grad_norm": 4.4556947282323875, |
|
"learning_rate": 4.542616009606896e-06, |
|
"loss": 4.4169, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.5802388707926167, |
|
"grad_norm": 7.295340591592647, |
|
"learning_rate": 4.5302972930216035e-06, |
|
"loss": 4.1999, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.5811074918566775, |
|
"grad_norm": 6.167555266777589, |
|
"learning_rate": 4.5179861924347105e-06, |
|
"loss": 4.4135, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.5819761129207384, |
|
"grad_norm": 4.262024109429587, |
|
"learning_rate": 4.505682804537002e-06, |
|
"loss": 4.3303, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5828447339847991, |
|
"grad_norm": 5.436615646031177, |
|
"learning_rate": 4.493387225958698e-06, |
|
"loss": 4.2423, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.5837133550488599, |
|
"grad_norm": 7.121365514253581, |
|
"learning_rate": 4.481099553268683e-06, |
|
"loss": 4.3157, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.5845819761129207, |
|
"grad_norm": 4.638764375663413, |
|
"learning_rate": 4.468819882973743e-06, |
|
"loss": 4.591, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.5854505971769816, |
|
"grad_norm": 5.718150177859018, |
|
"learning_rate": 4.456548311517818e-06, |
|
"loss": 4.4703, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.5863192182410424, |
|
"grad_norm": 6.5924354876426445, |
|
"learning_rate": 4.444284935281245e-06, |
|
"loss": 4.4582, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5871878393051031, |
|
"grad_norm": 5.619242120861036, |
|
"learning_rate": 4.432029850579983e-06, |
|
"loss": 4.2845, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.5880564603691639, |
|
"grad_norm": 8.851755293802896, |
|
"learning_rate": 4.419783153664885e-06, |
|
"loss": 4.4555, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.5889250814332248, |
|
"grad_norm": 4.981757116839891, |
|
"learning_rate": 4.407544940720912e-06, |
|
"loss": 4.4267, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.5897937024972856, |
|
"grad_norm": 8.597721909600045, |
|
"learning_rate": 4.395315307866404e-06, |
|
"loss": 4.3179, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.5906623235613464, |
|
"grad_norm": 6.495128819820371, |
|
"learning_rate": 4.383094351152309e-06, |
|
"loss": 4.4839, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5915309446254071, |
|
"grad_norm": 7.608989760198819, |
|
"learning_rate": 4.370882166561432e-06, |
|
"loss": 4.4416, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.5923995656894679, |
|
"grad_norm": 5.854433397474583, |
|
"learning_rate": 4.358678850007681e-06, |
|
"loss": 4.3324, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.5932681867535288, |
|
"grad_norm": 7.407730937666916, |
|
"learning_rate": 4.3464844973353215e-06, |
|
"loss": 4.3833, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.5941368078175896, |
|
"grad_norm": 4.968500771960475, |
|
"learning_rate": 4.334299204318208e-06, |
|
"loss": 4.2341, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.5950054288816504, |
|
"grad_norm": 7.558518400906316, |
|
"learning_rate": 4.322123066659048e-06, |
|
"loss": 4.4444, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.5958740499457111, |
|
"grad_norm": 4.962150548530693, |
|
"learning_rate": 4.309956179988641e-06, |
|
"loss": 4.3576, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.596742671009772, |
|
"grad_norm": 6.938240211755133, |
|
"learning_rate": 4.2977986398651285e-06, |
|
"loss": 4.3146, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.5976112920738328, |
|
"grad_norm": 7.587763936061048, |
|
"learning_rate": 4.285650541773243e-06, |
|
"loss": 4.4289, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.5984799131378936, |
|
"grad_norm": 5.969538038161447, |
|
"learning_rate": 4.273511981123569e-06, |
|
"loss": 4.3759, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.5993485342019544, |
|
"grad_norm": 7.899549221564559, |
|
"learning_rate": 4.261383053251773e-06, |
|
"loss": 4.3071, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6002171552660152, |
|
"grad_norm": 5.2445353058223505, |
|
"learning_rate": 4.2492638534178695e-06, |
|
"loss": 4.281, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.601085776330076, |
|
"grad_norm": 7.6245739463456985, |
|
"learning_rate": 4.237154476805475e-06, |
|
"loss": 4.2956, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.6019543973941368, |
|
"grad_norm": 5.470116425098481, |
|
"learning_rate": 4.225055018521048e-06, |
|
"loss": 4.2829, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.6028230184581976, |
|
"grad_norm": 8.639572149060786, |
|
"learning_rate": 4.2129655735931514e-06, |
|
"loss": 4.4738, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.6036916395222585, |
|
"grad_norm": 6.152417088405707, |
|
"learning_rate": 4.200886236971707e-06, |
|
"loss": 4.4568, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.6045602605863192, |
|
"grad_norm": 9.3108808124201, |
|
"learning_rate": 4.188817103527238e-06, |
|
"loss": 4.6181, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.60542888165038, |
|
"grad_norm": 6.812741933127771, |
|
"learning_rate": 4.176758268050141e-06, |
|
"loss": 4.3743, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.6062975027144408, |
|
"grad_norm": 7.946624143488305, |
|
"learning_rate": 4.164709825249931e-06, |
|
"loss": 4.3947, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.6071661237785017, |
|
"grad_norm": 7.52895947435545, |
|
"learning_rate": 4.152671869754496e-06, |
|
"loss": 4.4524, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.6080347448425625, |
|
"grad_norm": 6.78338934618417, |
|
"learning_rate": 4.140644496109358e-06, |
|
"loss": 4.4503, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6089033659066232, |
|
"grad_norm": 7.448080226128123, |
|
"learning_rate": 4.128627798776933e-06, |
|
"loss": 4.353, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.609771986970684, |
|
"grad_norm": 5.9613691312207475, |
|
"learning_rate": 4.116621872135782e-06, |
|
"loss": 4.1222, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.6106406080347448, |
|
"grad_norm": 7.5790067996481625, |
|
"learning_rate": 4.104626810479878e-06, |
|
"loss": 4.3692, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.6115092290988057, |
|
"grad_norm": 5.453670887522505, |
|
"learning_rate": 4.092642708017853e-06, |
|
"loss": 4.491, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.6123778501628665, |
|
"grad_norm": 9.305782789439998, |
|
"learning_rate": 4.080669658872275e-06, |
|
"loss": 4.5958, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.6132464712269272, |
|
"grad_norm": 5.699148615360036, |
|
"learning_rate": 4.068707757078895e-06, |
|
"loss": 4.2548, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.614115092290988, |
|
"grad_norm": 7.911694729153124, |
|
"learning_rate": 4.056757096585914e-06, |
|
"loss": 4.4262, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.6149837133550489, |
|
"grad_norm": 6.872921512476096, |
|
"learning_rate": 4.044817771253243e-06, |
|
"loss": 4.3627, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.6158523344191097, |
|
"grad_norm": 7.476546715366341, |
|
"learning_rate": 4.0328898748517715e-06, |
|
"loss": 4.3003, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.6167209554831705, |
|
"grad_norm": 7.118852675240264, |
|
"learning_rate": 4.020973501062621e-06, |
|
"loss": 4.2213, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6175895765472312, |
|
"grad_norm": 6.777324416098178, |
|
"learning_rate": 4.009068743476418e-06, |
|
"loss": 4.4026, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.6184581976112921, |
|
"grad_norm": 6.267261071143136, |
|
"learning_rate": 3.997175695592558e-06, |
|
"loss": 4.3549, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.6193268186753529, |
|
"grad_norm": 5.469188635248625, |
|
"learning_rate": 3.985294450818465e-06, |
|
"loss": 4.5069, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.6201954397394137, |
|
"grad_norm": 5.5464442937088645, |
|
"learning_rate": 3.973425102468864e-06, |
|
"loss": 4.3273, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.6210640608034745, |
|
"grad_norm": 6.3365698216315485, |
|
"learning_rate": 3.961567743765047e-06, |
|
"loss": 4.2548, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.6219326818675353, |
|
"grad_norm": 6.762739461810207, |
|
"learning_rate": 3.949722467834136e-06, |
|
"loss": 4.2804, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.6228013029315961, |
|
"grad_norm": 6.867786850485367, |
|
"learning_rate": 3.9378893677083585e-06, |
|
"loss": 4.4824, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.6236699239956569, |
|
"grad_norm": 4.426890287359104, |
|
"learning_rate": 3.926068536324318e-06, |
|
"loss": 4.2629, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.6245385450597177, |
|
"grad_norm": 5.764947147703928, |
|
"learning_rate": 3.914260066522249e-06, |
|
"loss": 4.4748, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.6254071661237784, |
|
"grad_norm": 6.449173453157844, |
|
"learning_rate": 3.902464051045308e-06, |
|
"loss": 4.2288, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6262757871878393, |
|
"grad_norm": 7.156025732803798, |
|
"learning_rate": 3.890680582538835e-06, |
|
"loss": 4.471, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.6271444082519001, |
|
"grad_norm": 6.044472987310762, |
|
"learning_rate": 3.878909753549621e-06, |
|
"loss": 4.3782, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.6280130293159609, |
|
"grad_norm": 6.041979384384623, |
|
"learning_rate": 3.867151656525191e-06, |
|
"loss": 4.3686, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.6288816503800218, |
|
"grad_norm": 4.965615098879875, |
|
"learning_rate": 3.8554063838130774e-06, |
|
"loss": 4.3014, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.6297502714440825, |
|
"grad_norm": 6.077620537282477, |
|
"learning_rate": 3.84367402766008e-06, |
|
"loss": 4.5319, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.6306188925081433, |
|
"grad_norm": 7.348008590308795, |
|
"learning_rate": 3.831954680211567e-06, |
|
"loss": 4.4329, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.6314875135722041, |
|
"grad_norm": 4.148156488850514, |
|
"learning_rate": 3.820248433510721e-06, |
|
"loss": 4.3825, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.6323561346362649, |
|
"grad_norm": 5.868839788368692, |
|
"learning_rate": 3.8085553794978464e-06, |
|
"loss": 4.4061, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.6332247557003258, |
|
"grad_norm": 5.4956452972488465, |
|
"learning_rate": 3.7968756100096264e-06, |
|
"loss": 4.4363, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.6340933767643865, |
|
"grad_norm": 6.885854053993367, |
|
"learning_rate": 3.7852092167784057e-06, |
|
"loss": 4.4167, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6349619978284473, |
|
"grad_norm": 3.9881851549507723, |
|
"learning_rate": 3.7735562914314753e-06, |
|
"loss": 4.2648, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.6358306188925081, |
|
"grad_norm": 3.4678484500146927, |
|
"learning_rate": 3.761916925490355e-06, |
|
"loss": 4.3543, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.636699239956569, |
|
"grad_norm": 5.980995299387367, |
|
"learning_rate": 3.7502912103700573e-06, |
|
"loss": 4.346, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.6375678610206298, |
|
"grad_norm": 6.151689157843326, |
|
"learning_rate": 3.738679237378395e-06, |
|
"loss": 4.4269, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.6384364820846905, |
|
"grad_norm": 5.540758455448802, |
|
"learning_rate": 3.7270810977152437e-06, |
|
"loss": 4.5648, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.6393051031487513, |
|
"grad_norm": 4.261398640280459, |
|
"learning_rate": 3.7154968824718335e-06, |
|
"loss": 4.1601, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.6401737242128122, |
|
"grad_norm": 5.048852018013335, |
|
"learning_rate": 3.703926682630034e-06, |
|
"loss": 4.1836, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.641042345276873, |
|
"grad_norm": 6.223488653795751, |
|
"learning_rate": 3.6923705890616385e-06, |
|
"loss": 4.2932, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.6419109663409338, |
|
"grad_norm": 4.00296364061148, |
|
"learning_rate": 3.6808286925276476e-06, |
|
"loss": 4.5271, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.6427795874049945, |
|
"grad_norm": 7.6337502276759, |
|
"learning_rate": 3.669301083677563e-06, |
|
"loss": 4.3391, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.6436482084690553, |
|
"grad_norm": 3.648965509349653, |
|
"learning_rate": 3.657787853048671e-06, |
|
"loss": 4.3708, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.6445168295331162, |
|
"grad_norm": 6.430179601615156, |
|
"learning_rate": 3.6462890910653287e-06, |
|
"loss": 4.2363, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.645385450597177, |
|
"grad_norm": 3.8810246258525414, |
|
"learning_rate": 3.6348048880382603e-06, |
|
"loss": 4.4739, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.6462540716612378, |
|
"grad_norm": 6.8458334527081615, |
|
"learning_rate": 3.6233353341638434e-06, |
|
"loss": 4.348, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.6471226927252985, |
|
"grad_norm": 3.560922995570656, |
|
"learning_rate": 3.611880519523403e-06, |
|
"loss": 4.3628, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.6479913137893594, |
|
"grad_norm": 5.703893391333462, |
|
"learning_rate": 3.600440534082501e-06, |
|
"loss": 4.2112, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.6488599348534202, |
|
"grad_norm": 5.21895941854659, |
|
"learning_rate": 3.5890154676902346e-06, |
|
"loss": 4.3305, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.649728555917481, |
|
"grad_norm": 4.447049515180316, |
|
"learning_rate": 3.5776054100785223e-06, |
|
"loss": 4.3878, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.6505971769815418, |
|
"grad_norm": 4.949990794413519, |
|
"learning_rate": 3.56621045086141e-06, |
|
"loss": 4.2244, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.6514657980456026, |
|
"grad_norm": 5.258986448566508, |
|
"learning_rate": 3.554830679534357e-06, |
|
"loss": 4.4186, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6523344191096634, |
|
"grad_norm": 5.064550853101523, |
|
"learning_rate": 3.5434661854735406e-06, |
|
"loss": 4.3671, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.6532030401737242, |
|
"grad_norm": 5.6966971937871635, |
|
"learning_rate": 3.532117057935151e-06, |
|
"loss": 4.1835, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.654071661237785, |
|
"grad_norm": 5.913092142311183, |
|
"learning_rate": 3.520783386054689e-06, |
|
"loss": 4.414, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.6549402823018459, |
|
"grad_norm": 4.6289425679321985, |
|
"learning_rate": 3.5094652588462685e-06, |
|
"loss": 4.3397, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.6558089033659066, |
|
"grad_norm": 4.196557296390773, |
|
"learning_rate": 3.498162765201918e-06, |
|
"loss": 4.4643, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.6566775244299674, |
|
"grad_norm": 4.276537515669203, |
|
"learning_rate": 3.486875993890874e-06, |
|
"loss": 4.316, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.6575461454940282, |
|
"grad_norm": 3.638394175512923, |
|
"learning_rate": 3.475605033558896e-06, |
|
"loss": 4.2535, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.6584147665580891, |
|
"grad_norm": 3.7547136720071044, |
|
"learning_rate": 3.4643499727275704e-06, |
|
"loss": 4.4355, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.6592833876221499, |
|
"grad_norm": 4.529779878719585, |
|
"learning_rate": 3.4531108997935956e-06, |
|
"loss": 4.3798, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.6601520086862106, |
|
"grad_norm": 4.5346637615839205, |
|
"learning_rate": 3.4418879030281133e-06, |
|
"loss": 4.4147, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6610206297502714, |
|
"grad_norm": 5.652353695284788, |
|
"learning_rate": 3.430681070575999e-06, |
|
"loss": 4.3372, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.6618892508143323, |
|
"grad_norm": 7.170934682558012, |
|
"learning_rate": 3.419490490455176e-06, |
|
"loss": 4.3881, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.6627578718783931, |
|
"grad_norm": 5.047705198747088, |
|
"learning_rate": 3.408316250555922e-06, |
|
"loss": 4.3122, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.6636264929424539, |
|
"grad_norm": 7.07536095984844, |
|
"learning_rate": 3.3971584386401816e-06, |
|
"loss": 4.3548, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.6644951140065146, |
|
"grad_norm": 3.3336263913275572, |
|
"learning_rate": 3.386017142340867e-06, |
|
"loss": 4.3743, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.6653637350705754, |
|
"grad_norm": 7.065332392195253, |
|
"learning_rate": 3.374892449161187e-06, |
|
"loss": 4.4953, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.6662323561346363, |
|
"grad_norm": 3.4501029280252937, |
|
"learning_rate": 3.3637844464739492e-06, |
|
"loss": 4.3351, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.6671009771986971, |
|
"grad_norm": 6.687004143766434, |
|
"learning_rate": 3.352693221520867e-06, |
|
"loss": 4.4546, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.6679695982627579, |
|
"grad_norm": 4.50307043857389, |
|
"learning_rate": 3.341618861411887e-06, |
|
"loss": 4.3398, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.6688382193268186, |
|
"grad_norm": 7.8703135527519, |
|
"learning_rate": 3.3305614531245077e-06, |
|
"loss": 4.2638, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6697068403908795, |
|
"grad_norm": 3.922590217758762, |
|
"learning_rate": 3.319521083503075e-06, |
|
"loss": 4.2913, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.6705754614549403, |
|
"grad_norm": 8.346093676191689, |
|
"learning_rate": 3.3084978392581223e-06, |
|
"loss": 4.4285, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.6714440825190011, |
|
"grad_norm": 6.650194245577298, |
|
"learning_rate": 3.2974918069656797e-06, |
|
"loss": 4.2699, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.6723127035830619, |
|
"grad_norm": 6.560011908235111, |
|
"learning_rate": 3.286503073066596e-06, |
|
"loss": 4.4655, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.6731813246471227, |
|
"grad_norm": 7.055749636651488, |
|
"learning_rate": 3.2755317238658585e-06, |
|
"loss": 4.4159, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6740499457111835, |
|
"grad_norm": 5.354746730373342, |
|
"learning_rate": 3.2645778455319143e-06, |
|
"loss": 4.3549, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.6749185667752443, |
|
"grad_norm": 5.146550880864029, |
|
"learning_rate": 3.2536415240959954e-06, |
|
"loss": 4.4694, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.6757871878393051, |
|
"grad_norm": 5.831672070421321, |
|
"learning_rate": 3.2427228454514496e-06, |
|
"loss": 4.1214, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.6766558089033659, |
|
"grad_norm": 4.521596862758205, |
|
"learning_rate": 3.2318218953530485e-06, |
|
"loss": 4.2677, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.6775244299674267, |
|
"grad_norm": 6.314265698622917, |
|
"learning_rate": 3.2209387594163316e-06, |
|
"loss": 4.412, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6783930510314875, |
|
"grad_norm": 4.866756540705389, |
|
"learning_rate": 3.2100735231169238e-06, |
|
"loss": 4.4095, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.6792616720955483, |
|
"grad_norm": 6.493844920886384, |
|
"learning_rate": 3.1992262717898687e-06, |
|
"loss": 4.4263, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.6801302931596092, |
|
"grad_norm": 7.051427175831958, |
|
"learning_rate": 3.1883970906289568e-06, |
|
"loss": 4.3525, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.6809989142236699, |
|
"grad_norm": 6.86360370814285, |
|
"learning_rate": 3.1775860646860566e-06, |
|
"loss": 4.2127, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.6818675352877307, |
|
"grad_norm": 5.647923819665503, |
|
"learning_rate": 3.1667932788704414e-06, |
|
"loss": 4.252, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.6827361563517915, |
|
"grad_norm": 4.838224572668243, |
|
"learning_rate": 3.1560188179481356e-06, |
|
"loss": 4.3531, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.6836047774158523, |
|
"grad_norm": 6.70434806859771, |
|
"learning_rate": 3.1452627665412384e-06, |
|
"loss": 4.2778, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.6844733984799132, |
|
"grad_norm": 3.8436097880252174, |
|
"learning_rate": 3.134525209127255e-06, |
|
"loss": 4.2968, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.6853420195439739, |
|
"grad_norm": 6.343428669639347, |
|
"learning_rate": 3.1238062300384464e-06, |
|
"loss": 4.2696, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.6862106406080347, |
|
"grad_norm": 5.042121553913117, |
|
"learning_rate": 3.1131059134611595e-06, |
|
"loss": 4.3773, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6870792616720955, |
|
"grad_norm": 5.935535940636119, |
|
"learning_rate": 3.1024243434351653e-06, |
|
"loss": 4.3472, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.6879478827361564, |
|
"grad_norm": 4.211630727724998, |
|
"learning_rate": 3.0917616038530006e-06, |
|
"loss": 4.3975, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.6888165038002172, |
|
"grad_norm": 5.39953776616512, |
|
"learning_rate": 3.0811177784593086e-06, |
|
"loss": 4.2212, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.6896851248642779, |
|
"grad_norm": 6.2670789214250355, |
|
"learning_rate": 3.0704929508501836e-06, |
|
"loss": 4.2709, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.6905537459283387, |
|
"grad_norm": 5.3978340661584285, |
|
"learning_rate": 3.059887204472508e-06, |
|
"loss": 4.3752, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6914223669923996, |
|
"grad_norm": 4.222514400176097, |
|
"learning_rate": 3.0493006226233067e-06, |
|
"loss": 4.283, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.6922909880564604, |
|
"grad_norm": 6.363789170503971, |
|
"learning_rate": 3.03873328844908e-06, |
|
"loss": 4.2572, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.6931596091205212, |
|
"grad_norm": 4.996564390552643, |
|
"learning_rate": 3.028185284945164e-06, |
|
"loss": 4.2321, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.6940282301845819, |
|
"grad_norm": 3.7088045379057424, |
|
"learning_rate": 3.01765669495507e-06, |
|
"loss": 4.5402, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.6948968512486428, |
|
"grad_norm": 4.7675482950474475, |
|
"learning_rate": 3.0071476011698387e-06, |
|
"loss": 4.247, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6957654723127036, |
|
"grad_norm": 5.639346110912645, |
|
"learning_rate": 2.9966580861273847e-06, |
|
"loss": 4.4099, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.6966340933767644, |
|
"grad_norm": 4.770187045539365, |
|
"learning_rate": 2.9861882322118565e-06, |
|
"loss": 4.1644, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.6975027144408252, |
|
"grad_norm": 3.6627706854689106, |
|
"learning_rate": 2.9757381216529814e-06, |
|
"loss": 4.3601, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.698371335504886, |
|
"grad_norm": 3.9453583597610256, |
|
"learning_rate": 2.9653078365254267e-06, |
|
"loss": 4.3775, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.6992399565689468, |
|
"grad_norm": 4.121043954930214, |
|
"learning_rate": 2.954897458748147e-06, |
|
"loss": 4.2391, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.7001085776330076, |
|
"grad_norm": 3.5024329761434974, |
|
"learning_rate": 2.9445070700837486e-06, |
|
"loss": 4.4102, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.7009771986970684, |
|
"grad_norm": 5.1118475735166085, |
|
"learning_rate": 2.934136752137849e-06, |
|
"loss": 4.3506, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.7018458197611293, |
|
"grad_norm": 4.68701671319949, |
|
"learning_rate": 2.9237865863584204e-06, |
|
"loss": 4.273, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.70271444082519, |
|
"grad_norm": 4.207677749842511, |
|
"learning_rate": 2.9134566540351695e-06, |
|
"loss": 4.4052, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.7035830618892508, |
|
"grad_norm": 5.588319045300857, |
|
"learning_rate": 2.903147036298888e-06, |
|
"loss": 4.3446, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7044516829533116, |
|
"grad_norm": 4.694144532657754, |
|
"learning_rate": 2.892857814120815e-06, |
|
"loss": 4.3963, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.7053203040173724, |
|
"grad_norm": 5.261041999651582, |
|
"learning_rate": 2.8825890683120087e-06, |
|
"loss": 4.1755, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.7061889250814333, |
|
"grad_norm": 3.537979646262889, |
|
"learning_rate": 2.8723408795227063e-06, |
|
"loss": 4.3071, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.707057546145494, |
|
"grad_norm": 3.8989008087932753, |
|
"learning_rate": 2.8621133282416836e-06, |
|
"loss": 4.262, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.7079261672095548, |
|
"grad_norm": 3.9902100422123175, |
|
"learning_rate": 2.8519064947956403e-06, |
|
"loss": 4.4904, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.7087947882736156, |
|
"grad_norm": 5.206036718500678, |
|
"learning_rate": 2.8417204593485566e-06, |
|
"loss": 4.3229, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.7096634093376765, |
|
"grad_norm": 5.794935718355797, |
|
"learning_rate": 2.831555301901061e-06, |
|
"loss": 4.3783, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.7105320304017373, |
|
"grad_norm": 4.0421897856037114, |
|
"learning_rate": 2.82141110228981e-06, |
|
"loss": 4.3945, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.711400651465798, |
|
"grad_norm": 4.6514330236834, |
|
"learning_rate": 2.811287940186866e-06, |
|
"loss": 4.3617, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.7122692725298588, |
|
"grad_norm": 3.6423689253885194, |
|
"learning_rate": 2.80118589509905e-06, |
|
"loss": 4.2173, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7131378935939197, |
|
"grad_norm": 3.6396624178466896, |
|
"learning_rate": 2.791105046367341e-06, |
|
"loss": 4.3947, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.7140065146579805, |
|
"grad_norm": 3.8347475901940054, |
|
"learning_rate": 2.781045473166239e-06, |
|
"loss": 4.1539, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.7148751357220413, |
|
"grad_norm": 3.9705888799663027, |
|
"learning_rate": 2.771007254503149e-06, |
|
"loss": 4.4397, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.715743756786102, |
|
"grad_norm": 3.6196867847154164, |
|
"learning_rate": 2.7609904692177573e-06, |
|
"loss": 4.0627, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.7166123778501629, |
|
"grad_norm": 3.1130920798740362, |
|
"learning_rate": 2.750995195981412e-06, |
|
"loss": 4.2884, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.7174809989142237, |
|
"grad_norm": 3.288307548785416, |
|
"learning_rate": 2.7410215132965074e-06, |
|
"loss": 4.3433, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.7183496199782845, |
|
"grad_norm": 3.5010329845706596, |
|
"learning_rate": 2.7310694994958713e-06, |
|
"loss": 4.2592, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.7192182410423453, |
|
"grad_norm": 3.0547168595737504, |
|
"learning_rate": 2.721139232742137e-06, |
|
"loss": 4.3653, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.720086862106406, |
|
"grad_norm": 3.1039092159267074, |
|
"learning_rate": 2.711230791027144e-06, |
|
"loss": 4.319, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.7209554831704669, |
|
"grad_norm": 3.000221643527686, |
|
"learning_rate": 2.7013442521713157e-06, |
|
"loss": 4.3168, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7218241042345277, |
|
"grad_norm": 3.56612108454193, |
|
"learning_rate": 2.691479693823053e-06, |
|
"loss": 4.2912, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.7226927252985885, |
|
"grad_norm": 4.045486521817849, |
|
"learning_rate": 2.6816371934581224e-06, |
|
"loss": 4.4497, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.7235613463626493, |
|
"grad_norm": 5.06756019279877, |
|
"learning_rate": 2.6718168283790502e-06, |
|
"loss": 4.2895, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.7244299674267101, |
|
"grad_norm": 5.209971490598599, |
|
"learning_rate": 2.6620186757145055e-06, |
|
"loss": 4.3888, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.7252985884907709, |
|
"grad_norm": 5.759379090394831, |
|
"learning_rate": 2.652242812418712e-06, |
|
"loss": 4.3862, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.7261672095548317, |
|
"grad_norm": 4.223016994293722, |
|
"learning_rate": 2.642489315270832e-06, |
|
"loss": 4.3214, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.7270358306188925, |
|
"grad_norm": 4.5544158674627955, |
|
"learning_rate": 2.632758260874358e-06, |
|
"loss": 4.2358, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.7279044516829533, |
|
"grad_norm": 4.589085818499639, |
|
"learning_rate": 2.6230497256565234e-06, |
|
"loss": 4.1757, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.7287730727470141, |
|
"grad_norm": 4.716082178521095, |
|
"learning_rate": 2.613363785867699e-06, |
|
"loss": 4.2912, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.7296416938110749, |
|
"grad_norm": 4.0122133897679975, |
|
"learning_rate": 2.6037005175807883e-06, |
|
"loss": 4.2088, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7305103148751357, |
|
"grad_norm": 4.519123777136473, |
|
"learning_rate": 2.594059996690636e-06, |
|
"loss": 4.3983, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.7313789359391966, |
|
"grad_norm": 4.366992105875965, |
|
"learning_rate": 2.5844422989134294e-06, |
|
"loss": 4.3502, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.7322475570032573, |
|
"grad_norm": 4.902796136843425, |
|
"learning_rate": 2.574847499786103e-06, |
|
"loss": 4.3827, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.7331161780673181, |
|
"grad_norm": 3.7309923778017335, |
|
"learning_rate": 2.5652756746657474e-06, |
|
"loss": 4.1343, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.7339847991313789, |
|
"grad_norm": 4.633964698796071, |
|
"learning_rate": 2.5557268987290196e-06, |
|
"loss": 4.2807, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.7348534201954398, |
|
"grad_norm": 3.158778304410855, |
|
"learning_rate": 2.546201246971542e-06, |
|
"loss": 4.1983, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.7357220412595006, |
|
"grad_norm": 4.280110117256575, |
|
"learning_rate": 2.536698794207327e-06, |
|
"loss": 4.1502, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.7365906623235613, |
|
"grad_norm": 4.019320202479935, |
|
"learning_rate": 2.527219615068181e-06, |
|
"loss": 4.269, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.7374592833876221, |
|
"grad_norm": 4.684956718222349, |
|
"learning_rate": 2.517763784003121e-06, |
|
"loss": 4.2774, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.738327904451683, |
|
"grad_norm": 4.72042770271221, |
|
"learning_rate": 2.5083313752777893e-06, |
|
"loss": 4.3012, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.7391965255157438, |
|
"grad_norm": 3.9457131710176028, |
|
"learning_rate": 2.4989224629738705e-06, |
|
"loss": 4.3457, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.7400651465798046, |
|
"grad_norm": 3.86871572131023, |
|
"learning_rate": 2.4895371209885082e-06, |
|
"loss": 4.2702, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.7409337676438653, |
|
"grad_norm": 4.162164409779829, |
|
"learning_rate": 2.4801754230337287e-06, |
|
"loss": 4.2957, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.7418023887079261, |
|
"grad_norm": 5.002185565803835, |
|
"learning_rate": 2.4708374426358543e-06, |
|
"loss": 4.2632, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.742671009771987, |
|
"grad_norm": 5.4293250327800004, |
|
"learning_rate": 2.4615232531349332e-06, |
|
"loss": 4.2441, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.7435396308360478, |
|
"grad_norm": 4.995810113032375, |
|
"learning_rate": 2.452232927684166e-06, |
|
"loss": 4.2673, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.7444082519001086, |
|
"grad_norm": 3.2917198811992803, |
|
"learning_rate": 2.442966539249318e-06, |
|
"loss": 4.1662, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.7452768729641693, |
|
"grad_norm": 4.814530155931897, |
|
"learning_rate": 2.4337241606081587e-06, |
|
"loss": 4.2989, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.7461454940282302, |
|
"grad_norm": 5.299603931940639, |
|
"learning_rate": 2.424505864349886e-06, |
|
"loss": 4.3474, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.747014115092291, |
|
"grad_norm": 5.282979213255703, |
|
"learning_rate": 2.4153117228745543e-06, |
|
"loss": 4.2909, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.7478827361563518, |
|
"grad_norm": 4.536156542709376, |
|
"learning_rate": 2.4061418083925085e-06, |
|
"loss": 4.3692, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.7487513572204126, |
|
"grad_norm": 5.026058475783841, |
|
"learning_rate": 2.396996192923818e-06, |
|
"loss": 4.3141, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.7496199782844734, |
|
"grad_norm": 5.465713560303808, |
|
"learning_rate": 2.387874948297701e-06, |
|
"loss": 4.3735, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.7504885993485342, |
|
"grad_norm": 3.9399735021333, |
|
"learning_rate": 2.3787781461519786e-06, |
|
"loss": 4.2175, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.751357220412595, |
|
"grad_norm": 5.576777900877066, |
|
"learning_rate": 2.3697058579324976e-06, |
|
"loss": 4.1978, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.7522258414766558, |
|
"grad_norm": 5.197291281490897, |
|
"learning_rate": 2.3606581548925696e-06, |
|
"loss": 4.3048, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.7530944625407167, |
|
"grad_norm": 3.767610507301271, |
|
"learning_rate": 2.3516351080924206e-06, |
|
"loss": 4.257, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.7539630836047774, |
|
"grad_norm": 5.305437866922308, |
|
"learning_rate": 2.3426367883986254e-06, |
|
"loss": 4.3791, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.7548317046688382, |
|
"grad_norm": 4.698829389303069, |
|
"learning_rate": 2.333663266483555e-06, |
|
"loss": 4.4556, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.755700325732899, |
|
"grad_norm": 4.351560277566826, |
|
"learning_rate": 2.3247146128248183e-06, |
|
"loss": 4.3409, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.7565689467969599, |
|
"grad_norm": 3.103240864167305, |
|
"learning_rate": 2.3157908977047096e-06, |
|
"loss": 4.1706, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.7574375678610207, |
|
"grad_norm": 5.956940596200794, |
|
"learning_rate": 2.3068921912096585e-06, |
|
"loss": 4.3036, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.7583061889250814, |
|
"grad_norm": 4.030266382079339, |
|
"learning_rate": 2.2980185632296797e-06, |
|
"loss": 4.3916, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.7591748099891422, |
|
"grad_norm": 4.907585920293589, |
|
"learning_rate": 2.2891700834578175e-06, |
|
"loss": 4.1033, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.760043431053203, |
|
"grad_norm": 3.297402834871266, |
|
"learning_rate": 2.2803468213896063e-06, |
|
"loss": 4.3624, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.7609120521172639, |
|
"grad_norm": 4.662439533859811, |
|
"learning_rate": 2.2715488463225228e-06, |
|
"loss": 4.3034, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.7617806731813247, |
|
"grad_norm": 5.165185490895554, |
|
"learning_rate": 2.262776227355439e-06, |
|
"loss": 4.2157, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.7626492942453854, |
|
"grad_norm": 7.506559487228573, |
|
"learning_rate": 2.254029033388084e-06, |
|
"loss": 4.4301, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.7635179153094462, |
|
"grad_norm": 3.9252824887426554, |
|
"learning_rate": 2.2453073331204957e-06, |
|
"loss": 4.2351, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.7643865363735071, |
|
"grad_norm": 5.290959184706457, |
|
"learning_rate": 2.2366111950524906e-06, |
|
"loss": 4.2538, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7652551574375679, |
|
"grad_norm": 4.7171299694367175, |
|
"learning_rate": 2.2279406874831164e-06, |
|
"loss": 4.4357, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.7661237785016287, |
|
"grad_norm": 29.979748449798375, |
|
"learning_rate": 2.2192958785101258e-06, |
|
"loss": 4.3621, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.7669923995656894, |
|
"grad_norm": 7.860730063348475, |
|
"learning_rate": 2.210676836029429e-06, |
|
"loss": 4.3751, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.7678610206297503, |
|
"grad_norm": 4.334356696505269, |
|
"learning_rate": 2.20208362773457e-06, |
|
"loss": 4.2966, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.7687296416938111, |
|
"grad_norm": 7.332346221259919, |
|
"learning_rate": 2.193516321116198e-06, |
|
"loss": 4.2862, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.7695982627578719, |
|
"grad_norm": 5.9624071677426125, |
|
"learning_rate": 2.1849749834615235e-06, |
|
"loss": 4.3632, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.7704668838219327, |
|
"grad_norm": 4.954161674367398, |
|
"learning_rate": 2.176459681853801e-06, |
|
"loss": 4.2059, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.7713355048859935, |
|
"grad_norm": 6.378179042127337, |
|
"learning_rate": 2.167970483171801e-06, |
|
"loss": 4.166, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.7722041259500543, |
|
"grad_norm": 4.9618200288070575, |
|
"learning_rate": 2.1595074540892815e-06, |
|
"loss": 4.3356, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.7730727470141151, |
|
"grad_norm": 6.856421677907798, |
|
"learning_rate": 2.1510706610744654e-06, |
|
"loss": 4.3511, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7739413680781759, |
|
"grad_norm": 4.410108580533019, |
|
"learning_rate": 2.1426601703895195e-06, |
|
"loss": 4.2937, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.7748099891422368, |
|
"grad_norm": 7.583217716536857, |
|
"learning_rate": 2.13427604809003e-06, |
|
"loss": 4.3183, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.7756786102062975, |
|
"grad_norm": 5.9361144172296685, |
|
"learning_rate": 2.125918360024493e-06, |
|
"loss": 4.374, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.7765472312703583, |
|
"grad_norm": 6.875470826753328, |
|
"learning_rate": 2.117587171833789e-06, |
|
"loss": 4.4312, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.7774158523344191, |
|
"grad_norm": 5.096832067096056, |
|
"learning_rate": 2.109282548950667e-06, |
|
"loss": 4.278, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.77828447339848, |
|
"grad_norm": 5.729522521702186, |
|
"learning_rate": 2.1010045565992363e-06, |
|
"loss": 4.2937, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.7791530944625407, |
|
"grad_norm": 6.742811437853955, |
|
"learning_rate": 2.0927532597944496e-06, |
|
"loss": 4.2172, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.7800217155266015, |
|
"grad_norm": 5.6301478715572975, |
|
"learning_rate": 2.0845287233415963e-06, |
|
"loss": 4.176, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.7808903365906623, |
|
"grad_norm": 5.235841496697818, |
|
"learning_rate": 2.0763310118357893e-06, |
|
"loss": 4.3747, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.7817589576547231, |
|
"grad_norm": 5.049217344169785, |
|
"learning_rate": 2.06816018966146e-06, |
|
"loss": 4.2626, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.782627578718784, |
|
"grad_norm": 4.5457661427037195, |
|
"learning_rate": 2.060016320991853e-06, |
|
"loss": 4.2773, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.7834961997828447, |
|
"grad_norm": 4.117834715315518, |
|
"learning_rate": 2.051899469788522e-06, |
|
"loss": 4.3088, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.7843648208469055, |
|
"grad_norm": 5.135053442236565, |
|
"learning_rate": 2.043809699800824e-06, |
|
"loss": 4.2918, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.7852334419109663, |
|
"grad_norm": 7.82388906554306, |
|
"learning_rate": 2.0357470745654213e-06, |
|
"loss": 4.3529, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.7861020629750272, |
|
"grad_norm": 4.837908722893104, |
|
"learning_rate": 2.0277116574057905e-06, |
|
"loss": 4.1562, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.786970684039088, |
|
"grad_norm": 3.68904796267135, |
|
"learning_rate": 2.0197035114317056e-06, |
|
"loss": 4.2128, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.7878393051031487, |
|
"grad_norm": 4.534016411952663, |
|
"learning_rate": 2.0117226995387625e-06, |
|
"loss": 4.3172, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.7887079261672095, |
|
"grad_norm": 3.526628215688476, |
|
"learning_rate": 2.0037692844078728e-06, |
|
"loss": 4.2893, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.7895765472312704, |
|
"grad_norm": 3.8435396264813457, |
|
"learning_rate": 1.9958433285047766e-06, |
|
"loss": 4.2629, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.7904451682953312, |
|
"grad_norm": 12.416597855589442, |
|
"learning_rate": 1.9879448940795496e-06, |
|
"loss": 4.1928, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.791313789359392, |
|
"grad_norm": 3.4260543752536354, |
|
"learning_rate": 1.980074043166118e-06, |
|
"loss": 4.3879, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.7921824104234527, |
|
"grad_norm": 4.024334100744361, |
|
"learning_rate": 1.9722308375817616e-06, |
|
"loss": 4.3358, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.7930510314875135, |
|
"grad_norm": 3.8937964884137704, |
|
"learning_rate": 1.9644153389266428e-06, |
|
"loss": 4.2585, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.7939196525515744, |
|
"grad_norm": 4.112423538802947, |
|
"learning_rate": 1.9566276085833137e-06, |
|
"loss": 4.3666, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.7947882736156352, |
|
"grad_norm": 3.361914528770081, |
|
"learning_rate": 1.9488677077162294e-06, |
|
"loss": 4.3395, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.795656894679696, |
|
"grad_norm": 3.201261785374445, |
|
"learning_rate": 1.9411356972712802e-06, |
|
"loss": 4.1024, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.7965255157437567, |
|
"grad_norm": 3.1715636963506193, |
|
"learning_rate": 1.9334316379753037e-06, |
|
"loss": 4.3268, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.7973941368078176, |
|
"grad_norm": 4.702760089867412, |
|
"learning_rate": 1.92575559033561e-06, |
|
"loss": 4.2872, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.7982627578718784, |
|
"grad_norm": 4.301084643411801, |
|
"learning_rate": 1.91810761463951e-06, |
|
"loss": 4.2491, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.7991313789359392, |
|
"grad_norm": 4.368620526838832, |
|
"learning_rate": 1.9104877709538346e-06, |
|
"loss": 4.1825, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 4.541552734260731, |
|
"learning_rate": 1.902896119124471e-06, |
|
"loss": 4.2936, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.8008686210640608, |
|
"grad_norm": 3.753504642364837, |
|
"learning_rate": 1.8953327187758872e-06, |
|
"loss": 4.2807, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.8017372421281216, |
|
"grad_norm": 4.1076335957004035, |
|
"learning_rate": 1.8877976293106645e-06, |
|
"loss": 4.3054, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.8026058631921824, |
|
"grad_norm": 5.022940647754388, |
|
"learning_rate": 1.8802909099090328e-06, |
|
"loss": 4.2605, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.8034744842562432, |
|
"grad_norm": 4.601415915720413, |
|
"learning_rate": 1.8728126195284063e-06, |
|
"loss": 4.2974, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.8043431053203041, |
|
"grad_norm": 3.5937684601582505, |
|
"learning_rate": 1.8653628169029172e-06, |
|
"loss": 4.4372, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.8052117263843648, |
|
"grad_norm": 4.087141421454951, |
|
"learning_rate": 1.8579415605429566e-06, |
|
"loss": 4.3578, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.8060803474484256, |
|
"grad_norm": 3.768303031259501, |
|
"learning_rate": 1.850548908734715e-06, |
|
"loss": 4.3954, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.8069489685124864, |
|
"grad_norm": 4.631014557901584, |
|
"learning_rate": 1.843184919539724e-06, |
|
"loss": 4.3692, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.8078175895765473, |
|
"grad_norm": 5.588991832935504, |
|
"learning_rate": 1.8358496507944004e-06, |
|
"loss": 4.3679, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8086862106406081, |
|
"grad_norm": 4.760700300967851, |
|
"learning_rate": 1.8285431601095932e-06, |
|
"loss": 4.2824, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.8095548317046688, |
|
"grad_norm": 7.110512990765382, |
|
"learning_rate": 1.8212655048701263e-06, |
|
"loss": 4.276, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.8104234527687296, |
|
"grad_norm": 4.84933914886446, |
|
"learning_rate": 1.8140167422343536e-06, |
|
"loss": 4.3264, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.8112920738327905, |
|
"grad_norm": 6.250495292898315, |
|
"learning_rate": 1.8067969291337111e-06, |
|
"loss": 4.2783, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.8121606948968513, |
|
"grad_norm": 5.081084041830173, |
|
"learning_rate": 1.7996061222722602e-06, |
|
"loss": 4.1333, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.8130293159609121, |
|
"grad_norm": 4.764801399944141, |
|
"learning_rate": 1.7924443781262537e-06, |
|
"loss": 4.258, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.8138979370249728, |
|
"grad_norm": 12.564644718015563, |
|
"learning_rate": 1.7853117529436853e-06, |
|
"loss": 4.4282, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.8147665580890336, |
|
"grad_norm": 9.219100670998142, |
|
"learning_rate": 1.7782083027438493e-06, |
|
"loss": 4.4036, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.8156351791530945, |
|
"grad_norm": 3.513961026121951, |
|
"learning_rate": 1.7711340833169027e-06, |
|
"loss": 4.2192, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.8165038002171553, |
|
"grad_norm": 7.5648391009618985, |
|
"learning_rate": 1.7640891502234242e-06, |
|
"loss": 4.3509, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8173724212812161, |
|
"grad_norm": 7.457786533569347, |
|
"learning_rate": 1.7570735587939774e-06, |
|
"loss": 4.4761, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.8182410423452768, |
|
"grad_norm": 6.24994158926114, |
|
"learning_rate": 1.7500873641286822e-06, |
|
"loss": 4.3524, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.8191096634093377, |
|
"grad_norm": 4.706302961561861, |
|
"learning_rate": 1.7431306210967757e-06, |
|
"loss": 4.4049, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.8199782844733985, |
|
"grad_norm": 5.2319379724074375, |
|
"learning_rate": 1.7362033843361808e-06, |
|
"loss": 4.3184, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.8208469055374593, |
|
"grad_norm": 3.9931677964283336, |
|
"learning_rate": 1.7293057082530823e-06, |
|
"loss": 4.3872, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.8217155266015201, |
|
"grad_norm": 5.122675213766239, |
|
"learning_rate": 1.7224376470214965e-06, |
|
"loss": 4.2414, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.8225841476655809, |
|
"grad_norm": 4.8271711783672595, |
|
"learning_rate": 1.7155992545828459e-06, |
|
"loss": 4.3155, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.8234527687296417, |
|
"grad_norm": 5.08549920131417, |
|
"learning_rate": 1.708790584645536e-06, |
|
"loss": 4.3494, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.8243213897937025, |
|
"grad_norm": 3.2828478853516265, |
|
"learning_rate": 1.7020116906845314e-06, |
|
"loss": 4.1676, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.8251900108577633, |
|
"grad_norm": 4.217032686568317, |
|
"learning_rate": 1.6952626259409403e-06, |
|
"loss": 4.2612, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.826058631921824, |
|
"grad_norm": 3.3093948746453967, |
|
"learning_rate": 1.6885434434215928e-06, |
|
"loss": 4.2619, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.8269272529858849, |
|
"grad_norm": 3.4071908680921816, |
|
"learning_rate": 1.681854195898624e-06, |
|
"loss": 4.3616, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.8277958740499457, |
|
"grad_norm": 3.950423518377903, |
|
"learning_rate": 1.6751949359090608e-06, |
|
"loss": 4.4618, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.8286644951140065, |
|
"grad_norm": 3.113460449412214, |
|
"learning_rate": 1.6685657157544152e-06, |
|
"loss": 4.2883, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.8295331161780674, |
|
"grad_norm": 3.978159856987336, |
|
"learning_rate": 1.6619665875002589e-06, |
|
"loss": 4.2919, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.8304017372421281, |
|
"grad_norm": 3.627190156440042, |
|
"learning_rate": 1.655397602975829e-06, |
|
"loss": 4.4444, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.8312703583061889, |
|
"grad_norm": 3.501429507504989, |
|
"learning_rate": 1.6488588137736142e-06, |
|
"loss": 4.3162, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.8321389793702497, |
|
"grad_norm": 3.912898082889782, |
|
"learning_rate": 1.6423502712489498e-06, |
|
"loss": 4.1927, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.8330076004343105, |
|
"grad_norm": 2.9544066919092002, |
|
"learning_rate": 1.6358720265196162e-06, |
|
"loss": 4.2873, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.8338762214983714, |
|
"grad_norm": 3.7766990336064423, |
|
"learning_rate": 1.629424130465436e-06, |
|
"loss": 4.2311, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.8347448425624321, |
|
"grad_norm": 3.187557348988122, |
|
"learning_rate": 1.6230066337278721e-06, |
|
"loss": 4.3319, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.8356134636264929, |
|
"grad_norm": 2.9035478405536184, |
|
"learning_rate": 1.6166195867096379e-06, |
|
"loss": 4.2798, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.8364820846905537, |
|
"grad_norm": 3.581073588055323, |
|
"learning_rate": 1.6102630395742936e-06, |
|
"loss": 4.2378, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.8373507057546146, |
|
"grad_norm": 2.9717432186991313, |
|
"learning_rate": 1.603937042245851e-06, |
|
"loss": 4.1643, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.8382193268186754, |
|
"grad_norm": 3.4682548739965195, |
|
"learning_rate": 1.5976416444083919e-06, |
|
"loss": 4.2895, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.8390879478827361, |
|
"grad_norm": 2.8257749424306917, |
|
"learning_rate": 1.5913768955056669e-06, |
|
"loss": 4.2387, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.8399565689467969, |
|
"grad_norm": 3.4045296792179585, |
|
"learning_rate": 1.585142844740712e-06, |
|
"loss": 4.2493, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.8408251900108578, |
|
"grad_norm": 3.632548936442941, |
|
"learning_rate": 1.5789395410754624e-06, |
|
"loss": 4.0125, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.8416938110749186, |
|
"grad_norm": 3.458367314452796, |
|
"learning_rate": 1.5727670332303662e-06, |
|
"loss": 4.2027, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.8425624321389794, |
|
"grad_norm": 3.5230657596459563, |
|
"learning_rate": 1.5666253696840039e-06, |
|
"loss": 4.2408, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.8434310532030401, |
|
"grad_norm": 3.0474045364092475, |
|
"learning_rate": 1.5605145986727055e-06, |
|
"loss": 4.1706, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.844299674267101, |
|
"grad_norm": 2.9068561874294243, |
|
"learning_rate": 1.5544347681901708e-06, |
|
"loss": 4.2883, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.8451682953311618, |
|
"grad_norm": 3.2628088807621385, |
|
"learning_rate": 1.548385925987097e-06, |
|
"loss": 4.2064, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.8460369163952226, |
|
"grad_norm": 2.877609574460853, |
|
"learning_rate": 1.5423681195707995e-06, |
|
"loss": 4.2439, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.8469055374592834, |
|
"grad_norm": 3.6963360738924007, |
|
"learning_rate": 1.5363813962048404e-06, |
|
"loss": 4.4093, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.8477741585233441, |
|
"grad_norm": 3.6396595948485686, |
|
"learning_rate": 1.530425802908657e-06, |
|
"loss": 4.2486, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.848642779587405, |
|
"grad_norm": 3.0025316303653593, |
|
"learning_rate": 1.5245013864571915e-06, |
|
"loss": 4.2436, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.8495114006514658, |
|
"grad_norm": 3.075143454138421, |
|
"learning_rate": 1.518608193380527e-06, |
|
"loss": 4.1952, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.8503800217155266, |
|
"grad_norm": 3.837382455029135, |
|
"learning_rate": 1.5127462699635175e-06, |
|
"loss": 4.3256, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.8512486427795874, |
|
"grad_norm": 3.912314406379558, |
|
"learning_rate": 1.5069156622454286e-06, |
|
"loss": 4.1805, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.8521172638436482, |
|
"grad_norm": 3.556579266906855, |
|
"learning_rate": 1.5011164160195713e-06, |
|
"loss": 4.187, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.852985884907709, |
|
"grad_norm": 4.767048077361486, |
|
"learning_rate": 1.495348576832945e-06, |
|
"loss": 4.3401, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.8538545059717698, |
|
"grad_norm": 3.34570276142316, |
|
"learning_rate": 1.4896121899858855e-06, |
|
"loss": 4.3152, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.8547231270358306, |
|
"grad_norm": 3.928790482520009, |
|
"learning_rate": 1.4839073005316954e-06, |
|
"loss": 4.4026, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.8555917480998915, |
|
"grad_norm": 3.896817042687782, |
|
"learning_rate": 1.4782339532763035e-06, |
|
"loss": 4.3404, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.8564603691639522, |
|
"grad_norm": 3.5320973494543972, |
|
"learning_rate": 1.4725921927779053e-06, |
|
"loss": 4.2046, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.857328990228013, |
|
"grad_norm": 4.983206804496437, |
|
"learning_rate": 1.466982063346617e-06, |
|
"loss": 4.2724, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.8581976112920738, |
|
"grad_norm": 2.6576025845538602, |
|
"learning_rate": 1.4614036090441242e-06, |
|
"loss": 4.2774, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.8590662323561347, |
|
"grad_norm": 3.890820577234005, |
|
"learning_rate": 1.4558568736833403e-06, |
|
"loss": 4.3634, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.8599348534201955, |
|
"grad_norm": 4.047293622617586, |
|
"learning_rate": 1.450341900828055e-06, |
|
"loss": 4.2452, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.8608034744842562, |
|
"grad_norm": 3.082152289805242, |
|
"learning_rate": 1.4448587337926029e-06, |
|
"loss": 4.2638, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.861672095548317, |
|
"grad_norm": 3.279358197940446, |
|
"learning_rate": 1.4394074156415127e-06, |
|
"loss": 4.4121, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.8625407166123779, |
|
"grad_norm": 3.527792553396294, |
|
"learning_rate": 1.4339879891891745e-06, |
|
"loss": 4.1801, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.8634093376764387, |
|
"grad_norm": 3.505923077019055, |
|
"learning_rate": 1.4286004969995026e-06, |
|
"loss": 4.2853, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.8642779587404995, |
|
"grad_norm": 3.286205070282355, |
|
"learning_rate": 1.4232449813856024e-06, |
|
"loss": 4.375, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.8651465798045602, |
|
"grad_norm": 3.931680228410995, |
|
"learning_rate": 1.4179214844094354e-06, |
|
"loss": 4.2502, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.866015200868621, |
|
"grad_norm": 2.4039398272576897, |
|
"learning_rate": 1.4126300478814912e-06, |
|
"loss": 4.208, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.8668838219326819, |
|
"grad_norm": 3.7568390710068518, |
|
"learning_rate": 1.4073707133604553e-06, |
|
"loss": 4.1628, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.8677524429967427, |
|
"grad_norm": 3.3414877541755748, |
|
"learning_rate": 1.4021435221528907e-06, |
|
"loss": 4.1127, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.8686210640608035, |
|
"grad_norm": 3.458776188975169, |
|
"learning_rate": 1.3969485153129052e-06, |
|
"loss": 4.1204, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8694896851248642, |
|
"grad_norm": 4.919567300721427, |
|
"learning_rate": 1.3917857336418311e-06, |
|
"loss": 4.3408, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.8703583061889251, |
|
"grad_norm": 3.065606325345494, |
|
"learning_rate": 1.3866552176879073e-06, |
|
"loss": 3.9601, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.8712269272529859, |
|
"grad_norm": 3.761007586350327, |
|
"learning_rate": 1.3815570077459616e-06, |
|
"loss": 4.2438, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.8720955483170467, |
|
"grad_norm": 2.885986718619506, |
|
"learning_rate": 1.3764911438570873e-06, |
|
"loss": 4.26, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.8729641693811075, |
|
"grad_norm": 3.826731301340684, |
|
"learning_rate": 1.3714576658083356e-06, |
|
"loss": 4.3967, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.8738327904451683, |
|
"grad_norm": 2.7458285420431805, |
|
"learning_rate": 1.366456613132402e-06, |
|
"loss": 4.3103, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.8747014115092291, |
|
"grad_norm": 3.0626194989917996, |
|
"learning_rate": 1.3614880251073126e-06, |
|
"loss": 4.3979, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.8755700325732899, |
|
"grad_norm": 2.904930109177129, |
|
"learning_rate": 1.356551940756119e-06, |
|
"loss": 4.1057, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.8764386536373507, |
|
"grad_norm": 2.8060273606690864, |
|
"learning_rate": 1.3516483988465911e-06, |
|
"loss": 4.2066, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.8773072747014115, |
|
"grad_norm": 2.9968484430903852, |
|
"learning_rate": 1.3467774378909088e-06, |
|
"loss": 4.2804, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8781758957654723, |
|
"grad_norm": 2.4009421672150264, |
|
"learning_rate": 1.3419390961453673e-06, |
|
"loss": 4.2054, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.8790445168295331, |
|
"grad_norm": 2.9054455101472914, |
|
"learning_rate": 1.3371334116100692e-06, |
|
"loss": 4.2161, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.8799131378935939, |
|
"grad_norm": 2.7097147432315474, |
|
"learning_rate": 1.332360422028629e-06, |
|
"loss": 4.2635, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.8807817589576548, |
|
"grad_norm": 3.4351233135843895, |
|
"learning_rate": 1.3276201648878778e-06, |
|
"loss": 4.235, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.8816503800217155, |
|
"grad_norm": 3.021256304492239, |
|
"learning_rate": 1.3229126774175663e-06, |
|
"loss": 4.1398, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.8825190010857763, |
|
"grad_norm": 4.561864873305928, |
|
"learning_rate": 1.3182379965900755e-06, |
|
"loss": 4.2196, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.8833876221498371, |
|
"grad_norm": 2.25054567072425, |
|
"learning_rate": 1.3135961591201234e-06, |
|
"loss": 4.3461, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.884256243213898, |
|
"grad_norm": 4.349538372833775, |
|
"learning_rate": 1.3089872014644772e-06, |
|
"loss": 4.1902, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.8851248642779588, |
|
"grad_norm": 5.211240766793744, |
|
"learning_rate": 1.3044111598216697e-06, |
|
"loss": 4.127, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.8859934853420195, |
|
"grad_norm": 3.915181006258746, |
|
"learning_rate": 1.2998680701317116e-06, |
|
"loss": 4.3165, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8868621064060803, |
|
"grad_norm": 2.634220197635655, |
|
"learning_rate": 1.2953579680758102e-06, |
|
"loss": 4.0896, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.8877307274701411, |
|
"grad_norm": 4.444464060721951, |
|
"learning_rate": 1.2908808890760898e-06, |
|
"loss": 4.3389, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.888599348534202, |
|
"grad_norm": 3.6060226328219223, |
|
"learning_rate": 1.2864368682953144e-06, |
|
"loss": 4.3551, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.8894679695982628, |
|
"grad_norm": 5.166782973498534, |
|
"learning_rate": 1.2820259406366086e-06, |
|
"loss": 4.343, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.8903365906623235, |
|
"grad_norm": 2.8773053428878397, |
|
"learning_rate": 1.2776481407431858e-06, |
|
"loss": 4.2031, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8912052117263843, |
|
"grad_norm": 4.339800538926299, |
|
"learning_rate": 1.2733035029980764e-06, |
|
"loss": 4.348, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.8920738327904452, |
|
"grad_norm": 3.237109745257262, |
|
"learning_rate": 1.2689920615238564e-06, |
|
"loss": 4.4058, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.892942453854506, |
|
"grad_norm": 3.5663716818909514, |
|
"learning_rate": 1.2647138501823787e-06, |
|
"loss": 4.3298, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.8938110749185668, |
|
"grad_norm": 3.7872661083743613, |
|
"learning_rate": 1.2604689025745097e-06, |
|
"loss": 4.3178, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.8946796959826275, |
|
"grad_norm": 3.555878208510988, |
|
"learning_rate": 1.2562572520398636e-06, |
|
"loss": 4.429, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.8955483170466884, |
|
"grad_norm": 3.548292136180123, |
|
"learning_rate": 1.2520789316565407e-06, |
|
"loss": 4.2436, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.8964169381107492, |
|
"grad_norm": 2.8754548971872453, |
|
"learning_rate": 1.247933974240869e-06, |
|
"loss": 4.2454, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.89728555917481, |
|
"grad_norm": 3.1391372015122534, |
|
"learning_rate": 1.2438224123471442e-06, |
|
"loss": 4.2209, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.8981541802388708, |
|
"grad_norm": 4.08265544167836, |
|
"learning_rate": 1.2397442782673751e-06, |
|
"loss": 4.2561, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.8990228013029316, |
|
"grad_norm": 2.8484362391754097, |
|
"learning_rate": 1.2356996040310312e-06, |
|
"loss": 4.2269, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.8998914223669924, |
|
"grad_norm": 3.802284151154688, |
|
"learning_rate": 1.231688421404789e-06, |
|
"loss": 4.149, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.9007600434310532, |
|
"grad_norm": 2.8793289723328175, |
|
"learning_rate": 1.2277107618922843e-06, |
|
"loss": 4.2458, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.901628664495114, |
|
"grad_norm": 3.515343781948761, |
|
"learning_rate": 1.2237666567338632e-06, |
|
"loss": 4.0914, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.9024972855591749, |
|
"grad_norm": 3.5760188126248926, |
|
"learning_rate": 1.2198561369063366e-06, |
|
"loss": 4.1833, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.9033659066232356, |
|
"grad_norm": 2.8127761697413787, |
|
"learning_rate": 1.2159792331227404e-06, |
|
"loss": 4.1005, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9042345276872964, |
|
"grad_norm": 3.4802083558560195, |
|
"learning_rate": 1.212135975832091e-06, |
|
"loss": 4.1616, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.9051031487513572, |
|
"grad_norm": 3.3333225446939205, |
|
"learning_rate": 1.2083263952191446e-06, |
|
"loss": 4.3845, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.905971769815418, |
|
"grad_norm": 2.88436141121272, |
|
"learning_rate": 1.2045505212041644e-06, |
|
"loss": 4.3108, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.9068403908794789, |
|
"grad_norm": 3.6801662883359194, |
|
"learning_rate": 1.200808383442684e-06, |
|
"loss": 4.2905, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.9077090119435396, |
|
"grad_norm": 3.019629347981631, |
|
"learning_rate": 1.1971000113252726e-06, |
|
"loss": 4.1987, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.9085776330076004, |
|
"grad_norm": 4.409221785598703, |
|
"learning_rate": 1.1934254339773074e-06, |
|
"loss": 4.3084, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.9094462540716612, |
|
"grad_norm": 2.709116535172474, |
|
"learning_rate": 1.1897846802587395e-06, |
|
"loss": 4.1717, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.9103148751357221, |
|
"grad_norm": 3.8179262996054235, |
|
"learning_rate": 1.1861777787638762e-06, |
|
"loss": 4.3074, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.9111834961997829, |
|
"grad_norm": 3.281229721724311, |
|
"learning_rate": 1.1826047578211473e-06, |
|
"loss": 4.5075, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.9120521172638436, |
|
"grad_norm": 3.0456938909447113, |
|
"learning_rate": 1.1790656454928866e-06, |
|
"loss": 4.4835, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9129207383279044, |
|
"grad_norm": 3.2615534456726123, |
|
"learning_rate": 1.1755604695751134e-06, |
|
"loss": 4.3037, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.9137893593919653, |
|
"grad_norm": 2.9749791444621163, |
|
"learning_rate": 1.1720892575973095e-06, |
|
"loss": 4.3342, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.9146579804560261, |
|
"grad_norm": 3.3527992702942577, |
|
"learning_rate": 1.1686520368222066e-06, |
|
"loss": 4.2419, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.9155266015200869, |
|
"grad_norm": 2.4945121180562797, |
|
"learning_rate": 1.1652488342455726e-06, |
|
"loss": 4.1137, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.9163952225841476, |
|
"grad_norm": 3.2304134555988897, |
|
"learning_rate": 1.161879676595996e-06, |
|
"loss": 4.3529, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.9172638436482085, |
|
"grad_norm": 6.349732062570278, |
|
"learning_rate": 1.1585445903346784e-06, |
|
"loss": 4.1944, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.9181324647122693, |
|
"grad_norm": 2.9360851966959536, |
|
"learning_rate": 1.1552436016552273e-06, |
|
"loss": 4.1959, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.9190010857763301, |
|
"grad_norm": 3.205644542692188, |
|
"learning_rate": 1.1519767364834494e-06, |
|
"loss": 4.3528, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.9198697068403909, |
|
"grad_norm": 3.2880341446977126, |
|
"learning_rate": 1.1487440204771454e-06, |
|
"loss": 4.3303, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.9207383279044516, |
|
"grad_norm": 3.6622193093061615, |
|
"learning_rate": 1.1455454790259118e-06, |
|
"loss": 4.2606, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.9216069489685125, |
|
"grad_norm": 3.0436045507003873, |
|
"learning_rate": 1.14238113725094e-06, |
|
"loss": 4.3319, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.9224755700325733, |
|
"grad_norm": 2.946350336011715, |
|
"learning_rate": 1.1392510200048167e-06, |
|
"loss": 4.3378, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.9233441910966341, |
|
"grad_norm": 2.864144280137868, |
|
"learning_rate": 1.1361551518713331e-06, |
|
"loss": 4.3061, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.924212812160695, |
|
"grad_norm": 2.742188676823669, |
|
"learning_rate": 1.133093557165288e-06, |
|
"loss": 4.4552, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.9250814332247557, |
|
"grad_norm": 2.961292413358245, |
|
"learning_rate": 1.1300662599322992e-06, |
|
"loss": 4.2678, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.9259500542888165, |
|
"grad_norm": 3.237248386245042, |
|
"learning_rate": 1.1270732839486137e-06, |
|
"loss": 4.3326, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.9268186753528773, |
|
"grad_norm": 3.4990748997732877, |
|
"learning_rate": 1.1241146527209192e-06, |
|
"loss": 4.3578, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.9276872964169381, |
|
"grad_norm": 2.895322090554506, |
|
"learning_rate": 1.1211903894861655e-06, |
|
"loss": 4.2311, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.9285559174809989, |
|
"grad_norm": 3.687438443063813, |
|
"learning_rate": 1.1183005172113743e-06, |
|
"loss": 4.2228, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.9294245385450597, |
|
"grad_norm": 2.6961355880795748, |
|
"learning_rate": 1.1154450585934625e-06, |
|
"loss": 4.0498, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.9302931596091205, |
|
"grad_norm": 3.688749799792538, |
|
"learning_rate": 1.1126240360590658e-06, |
|
"loss": 4.1798, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.9311617806731813, |
|
"grad_norm": 2.8110003232896625, |
|
"learning_rate": 1.1098374717643587e-06, |
|
"loss": 4.3087, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.9320304017372422, |
|
"grad_norm": 2.5887554177535943, |
|
"learning_rate": 1.1070853875948837e-06, |
|
"loss": 4.2262, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.9328990228013029, |
|
"grad_norm": 2.847071399167459, |
|
"learning_rate": 1.1043678051653768e-06, |
|
"loss": 4.2708, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.9337676438653637, |
|
"grad_norm": 4.142401135834692, |
|
"learning_rate": 1.1016847458195999e-06, |
|
"loss": 4.1147, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.9346362649294245, |
|
"grad_norm": 2.9501471418720557, |
|
"learning_rate": 1.0990362306301725e-06, |
|
"loss": 4.2384, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.9355048859934854, |
|
"grad_norm": 2.501434571608401, |
|
"learning_rate": 1.0964222803984048e-06, |
|
"loss": 4.3386, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.9363735070575462, |
|
"grad_norm": 3.0424764452901956, |
|
"learning_rate": 1.0938429156541364e-06, |
|
"loss": 4.3864, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.9372421281216069, |
|
"grad_norm": 3.904570325899996, |
|
"learning_rate": 1.0912981566555736e-06, |
|
"loss": 4.1481, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.9381107491856677, |
|
"grad_norm": 3.3812480967472127, |
|
"learning_rate": 1.0887880233891307e-06, |
|
"loss": 4.4023, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.9389793702497286, |
|
"grad_norm": 3.5320678055096018, |
|
"learning_rate": 1.0863125355692749e-06, |
|
"loss": 4.1932, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.9398479913137894, |
|
"grad_norm": 3.2937159160580904, |
|
"learning_rate": 1.0838717126383676e-06, |
|
"loss": 4.1515, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.9407166123778502, |
|
"grad_norm": 3.9965990047935973, |
|
"learning_rate": 1.081465573766515e-06, |
|
"loss": 4.2552, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.9415852334419109, |
|
"grad_norm": 2.6702475745108014, |
|
"learning_rate": 1.079094137851415e-06, |
|
"loss": 4.2498, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.9424538545059717, |
|
"grad_norm": 4.745534492210504, |
|
"learning_rate": 1.0767574235182125e-06, |
|
"loss": 4.2509, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.9433224755700326, |
|
"grad_norm": 2.632042835693663, |
|
"learning_rate": 1.0744554491193483e-06, |
|
"loss": 4.2512, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.9441910966340934, |
|
"grad_norm": 4.902218185640278, |
|
"learning_rate": 1.0721882327344199e-06, |
|
"loss": 4.2946, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.9450597176981542, |
|
"grad_norm": 3.611290466221663, |
|
"learning_rate": 1.0699557921700337e-06, |
|
"loss": 4.3242, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.9459283387622149, |
|
"grad_norm": 4.159732582837829, |
|
"learning_rate": 1.0677581449596724e-06, |
|
"loss": 4.3335, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.9467969598262758, |
|
"grad_norm": 3.3609545882421, |
|
"learning_rate": 1.0655953083635507e-06, |
|
"loss": 4.3966, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.9476655808903366, |
|
"grad_norm": 3.43860026090556, |
|
"learning_rate": 1.0634672993684828e-06, |
|
"loss": 4.2277, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.9485342019543974, |
|
"grad_norm": 3.0705569428179884, |
|
"learning_rate": 1.0613741346877496e-06, |
|
"loss": 4.2567, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.9494028230184582, |
|
"grad_norm": 3.3793501615647195, |
|
"learning_rate": 1.0593158307609649e-06, |
|
"loss": 4.237, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.950271444082519, |
|
"grad_norm": 2.929105704044937, |
|
"learning_rate": 1.0572924037539494e-06, |
|
"loss": 4.2117, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.9511400651465798, |
|
"grad_norm": 2.71792555170425, |
|
"learning_rate": 1.0553038695586018e-06, |
|
"loss": 4.1614, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.9520086862106406, |
|
"grad_norm": 3.78382633594629, |
|
"learning_rate": 1.0533502437927722e-06, |
|
"loss": 4.3529, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.9528773072747014, |
|
"grad_norm": 2.607501362679155, |
|
"learning_rate": 1.0514315418001456e-06, |
|
"loss": 4.3052, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.9537459283387623, |
|
"grad_norm": 3.663360468883577, |
|
"learning_rate": 1.0495477786501138e-06, |
|
"loss": 4.1874, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.954614549402823, |
|
"grad_norm": 2.630120856617326, |
|
"learning_rate": 1.0476989691376622e-06, |
|
"loss": 4.242, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.9554831704668838, |
|
"grad_norm": 3.823941956325378, |
|
"learning_rate": 1.045885127783252e-06, |
|
"loss": 4.2043, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9563517915309446, |
|
"grad_norm": 2.312883220474864, |
|
"learning_rate": 1.0441062688327051e-06, |
|
"loss": 4.1473, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.9572204125950055, |
|
"grad_norm": 4.094343889130672, |
|
"learning_rate": 1.0423624062570952e-06, |
|
"loss": 4.222, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.9580890336590663, |
|
"grad_norm": 2.209599527243729, |
|
"learning_rate": 1.0406535537526343e-06, |
|
"loss": 4.1423, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.958957654723127, |
|
"grad_norm": 3.382906333029422, |
|
"learning_rate": 1.0389797247405677e-06, |
|
"loss": 4.2021, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.9598262757871878, |
|
"grad_norm": 2.1618780172948004, |
|
"learning_rate": 1.0373409323670688e-06, |
|
"loss": 4.2185, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.9606948968512486, |
|
"grad_norm": 2.734044937681216, |
|
"learning_rate": 1.0357371895031331e-06, |
|
"loss": 4.1892, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.9615635179153095, |
|
"grad_norm": 3.0779816158117237, |
|
"learning_rate": 1.0341685087444804e-06, |
|
"loss": 4.1711, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.9624321389793703, |
|
"grad_norm": 3.4747018762690973, |
|
"learning_rate": 1.0326349024114533e-06, |
|
"loss": 4.1973, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.963300760043431, |
|
"grad_norm": 36.83010937108777, |
|
"learning_rate": 1.0311363825489225e-06, |
|
"loss": 4.3603, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.9641693811074918, |
|
"grad_norm": 4.281579688730939, |
|
"learning_rate": 1.0296729609261914e-06, |
|
"loss": 4.2369, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.9650380021715527, |
|
"grad_norm": 3.6658084904084647, |
|
"learning_rate": 1.0282446490369017e-06, |
|
"loss": 4.328, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.9659066232356135, |
|
"grad_norm": 3.1538713927243083, |
|
"learning_rate": 1.0268514580989476e-06, |
|
"loss": 4.224, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.9667752442996743, |
|
"grad_norm": 2.9277894965124895, |
|
"learning_rate": 1.025493399054383e-06, |
|
"loss": 4.1622, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.967643865363735, |
|
"grad_norm": 2.792456827835999, |
|
"learning_rate": 1.0241704825693384e-06, |
|
"loss": 4.1866, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.9685124864277959, |
|
"grad_norm": 3.0405461234372178, |
|
"learning_rate": 1.0228827190339365e-06, |
|
"loss": 4.2563, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.9693811074918567, |
|
"grad_norm": 2.456925962421864, |
|
"learning_rate": 1.0216301185622093e-06, |
|
"loss": 4.2406, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.9702497285559175, |
|
"grad_norm": 2.556980498653978, |
|
"learning_rate": 1.0204126909920216e-06, |
|
"loss": 4.2689, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.9711183496199783, |
|
"grad_norm": 2.494568998681425, |
|
"learning_rate": 1.0192304458849905e-06, |
|
"loss": 4.1788, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.971986970684039, |
|
"grad_norm": 2.414367486391019, |
|
"learning_rate": 1.0180833925264123e-06, |
|
"loss": 4.3123, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.9728555917480999, |
|
"grad_norm": 2.5637239329618797, |
|
"learning_rate": 1.0169715399251886e-06, |
|
"loss": 4.1474, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.9737242128121607, |
|
"grad_norm": 2.1036887441385, |
|
"learning_rate": 1.0158948968137562e-06, |
|
"loss": 4.1269, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.9745928338762215, |
|
"grad_norm": 2.922835583562659, |
|
"learning_rate": 1.0148534716480189e-06, |
|
"loss": 4.3625, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.9754614549402822, |
|
"grad_norm": 9.43946034741677, |
|
"learning_rate": 1.013847272607279e-06, |
|
"loss": 4.1683, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.9763300760043431, |
|
"grad_norm": 4.165841700664373, |
|
"learning_rate": 1.0128763075941765e-06, |
|
"loss": 4.2542, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.9771986970684039, |
|
"grad_norm": 2.839928915059915, |
|
"learning_rate": 1.0119405842346225e-06, |
|
"loss": 4.3801, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.9780673181324647, |
|
"grad_norm": 2.8834509246127547, |
|
"learning_rate": 1.0110401098777443e-06, |
|
"loss": 4.1791, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.9789359391965256, |
|
"grad_norm": 3.323021602743116, |
|
"learning_rate": 1.010174891595824e-06, |
|
"loss": 4.2308, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.9798045602605863, |
|
"grad_norm": 2.8595797575000574, |
|
"learning_rate": 1.0093449361842436e-06, |
|
"loss": 4.163, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.9806731813246471, |
|
"grad_norm": 2.6315144637856416, |
|
"learning_rate": 1.0085502501614336e-06, |
|
"loss": 4.2709, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.9815418023887079, |
|
"grad_norm": 3.743912990675417, |
|
"learning_rate": 1.00779083976882e-06, |
|
"loss": 4.378, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.9824104234527687, |
|
"grad_norm": 2.5760124356799627, |
|
"learning_rate": 1.007066710970775e-06, |
|
"loss": 4.3711, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.9832790445168296, |
|
"grad_norm": 3.24899714712198, |
|
"learning_rate": 1.0063778694545713e-06, |
|
"loss": 4.413, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.9841476655808903, |
|
"grad_norm": 2.651957802148301, |
|
"learning_rate": 1.0057243206303377e-06, |
|
"loss": 4.1993, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.9850162866449511, |
|
"grad_norm": 2.2387986170054557, |
|
"learning_rate": 1.0051060696310137e-06, |
|
"loss": 4.0838, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.9858849077090119, |
|
"grad_norm": 3.0411821712384324, |
|
"learning_rate": 1.0045231213123148e-06, |
|
"loss": 4.3682, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.9867535287730728, |
|
"grad_norm": 4.325765653367129, |
|
"learning_rate": 1.0039754802526882e-06, |
|
"loss": 4.2435, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.9876221498371336, |
|
"grad_norm": 2.974916977057162, |
|
"learning_rate": 1.00346315075328e-06, |
|
"loss": 4.1753, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.9884907709011943, |
|
"grad_norm": 2.953803498662762, |
|
"learning_rate": 1.002986136837902e-06, |
|
"loss": 4.1284, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.9893593919652551, |
|
"grad_norm": 3.8781438570753104, |
|
"learning_rate": 1.0025444422529981e-06, |
|
"loss": 4.2912, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.990228013029316, |
|
"grad_norm": 2.751967293645226, |
|
"learning_rate": 1.0021380704676165e-06, |
|
"loss": 4.2889, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.9910966340933768, |
|
"grad_norm": 3.6019606680877354, |
|
"learning_rate": 1.001767024673382e-06, |
|
"loss": 4.3346, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.9919652551574376, |
|
"grad_norm": 4.860956653468777, |
|
"learning_rate": 1.00143130778447e-06, |
|
"loss": 4.4012, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.9928338762214983, |
|
"grad_norm": 2.978905973214082, |
|
"learning_rate": 1.0011309224375852e-06, |
|
"loss": 4.2442, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.9937024972855591, |
|
"grad_norm": 3.8701647090673466, |
|
"learning_rate": 1.0008658709919392e-06, |
|
"loss": 4.1964, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.99457111834962, |
|
"grad_norm": 3.288978549917795, |
|
"learning_rate": 1.0006361555292336e-06, |
|
"loss": 4.3262, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.9954397394136808, |
|
"grad_norm": 2.960844019977527, |
|
"learning_rate": 1.0004417778536422e-06, |
|
"loss": 4.3462, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.9963083604777416, |
|
"grad_norm": 3.7853750361182774, |
|
"learning_rate": 1.0002827394917987e-06, |
|
"loss": 4.1935, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.9971769815418023, |
|
"grad_norm": 3.018359866548487, |
|
"learning_rate": 1.000159041692782e-06, |
|
"loss": 4.2258, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.9980456026058632, |
|
"grad_norm": 3.5491363826838827, |
|
"learning_rate": 1.0000706854281087e-06, |
|
"loss": 4.3101, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.998914223669924, |
|
"grad_norm": 2.819477984506924, |
|
"learning_rate": 1.000017671391725e-06, |
|
"loss": 4.2138, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9997828447339848, |
|
"grad_norm": 4.035987734141358, |
|
"learning_rate": 1e-06, |
|
"loss": 4.2367, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.9997828447339848, |
|
"step": 1151, |
|
"total_flos": 995560529133568.0, |
|
"train_loss": 4.727850101182608, |
|
"train_runtime": 34373.7603, |
|
"train_samples_per_second": 2.143, |
|
"train_steps_per_second": 0.033 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1151, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 995560529133568.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|