|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 9676, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010334849111202976, |
|
"grad_norm": 3.4666478633880615, |
|
"learning_rate": 0.0006200909466721785, |
|
"loss": 7.9994, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002066969822240595, |
|
"grad_norm": 1.0015512704849243, |
|
"learning_rate": 0.001240181893344357, |
|
"loss": 7.0943, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0031004547333608927, |
|
"grad_norm": 0.8696095943450928, |
|
"learning_rate": 0.0018602728400165356, |
|
"loss": 6.3072, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00413393964448119, |
|
"grad_norm": 0.32349398732185364, |
|
"learning_rate": 0.002480363786688714, |
|
"loss": 5.9694, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.005167424555601488, |
|
"grad_norm": 1.6740758419036865, |
|
"learning_rate": 0.0031004547333608927, |
|
"loss": 5.8536, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0062009094667217855, |
|
"grad_norm": 1.6459004878997803, |
|
"learning_rate": 0.003720545680033071, |
|
"loss": 5.7039, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.007234394377842084, |
|
"grad_norm": 0.48763737082481384, |
|
"learning_rate": 0.0043406366267052495, |
|
"loss": 5.4789, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.00826787928896238, |
|
"grad_norm": 0.4977681338787079, |
|
"learning_rate": 0.004960727573377428, |
|
"loss": 5.269, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.009301364200082679, |
|
"grad_norm": 0.5395390391349792, |
|
"learning_rate": 0.005580818520049607, |
|
"loss": 5.0768, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.010334849111202976, |
|
"grad_norm": 0.4573260247707367, |
|
"learning_rate": 0.006, |
|
"loss": 4.9493, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.011368334022323274, |
|
"grad_norm": 0.5551472902297974, |
|
"learning_rate": 0.006, |
|
"loss": 4.7982, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.012401818933443571, |
|
"grad_norm": 0.4980376958847046, |
|
"learning_rate": 0.006, |
|
"loss": 4.6285, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01343530384456387, |
|
"grad_norm": 0.4951136112213135, |
|
"learning_rate": 0.006, |
|
"loss": 4.5176, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.014468788755684168, |
|
"grad_norm": 0.5360985398292542, |
|
"learning_rate": 0.006, |
|
"loss": 4.3572, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.015502273666804465, |
|
"grad_norm": 0.4469200670719147, |
|
"learning_rate": 0.006, |
|
"loss": 4.214, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01653575857792476, |
|
"grad_norm": 0.6157914400100708, |
|
"learning_rate": 0.006, |
|
"loss": 4.0854, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.01756924348904506, |
|
"grad_norm": 0.4493155777454376, |
|
"learning_rate": 0.006, |
|
"loss": 3.9583, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.018602728400165358, |
|
"grad_norm": 0.6523879766464233, |
|
"learning_rate": 0.006, |
|
"loss": 3.8434, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.019636213311285657, |
|
"grad_norm": 0.7103180885314941, |
|
"learning_rate": 0.006, |
|
"loss": 3.7414, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.02066969822240595, |
|
"grad_norm": 0.44575124979019165, |
|
"learning_rate": 0.006, |
|
"loss": 3.6829, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02170318313352625, |
|
"grad_norm": 0.5272648334503174, |
|
"learning_rate": 0.006, |
|
"loss": 3.6056, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.02273666804464655, |
|
"grad_norm": 0.4514292776584625, |
|
"learning_rate": 0.006, |
|
"loss": 3.5279, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.023770152955766847, |
|
"grad_norm": 0.39017120003700256, |
|
"learning_rate": 0.006, |
|
"loss": 3.4585, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.024803637866887142, |
|
"grad_norm": 0.4593961536884308, |
|
"learning_rate": 0.006, |
|
"loss": 3.4018, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.02583712277800744, |
|
"grad_norm": 0.48860013484954834, |
|
"learning_rate": 0.006, |
|
"loss": 3.3523, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.02687060768912774, |
|
"grad_norm": 0.36506471037864685, |
|
"learning_rate": 0.006, |
|
"loss": 3.3124, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.027904092600248037, |
|
"grad_norm": 0.3506007194519043, |
|
"learning_rate": 0.006, |
|
"loss": 3.2678, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.028937577511368336, |
|
"grad_norm": 0.3703238070011139, |
|
"learning_rate": 0.006, |
|
"loss": 3.2231, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.02997106242248863, |
|
"grad_norm": 0.45603325963020325, |
|
"learning_rate": 0.006, |
|
"loss": 3.1847, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.03100454733360893, |
|
"grad_norm": 0.37536856532096863, |
|
"learning_rate": 0.006, |
|
"loss": 3.1585, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.032038032244729224, |
|
"grad_norm": 0.3810868561267853, |
|
"learning_rate": 0.006, |
|
"loss": 3.1063, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.03307151715584952, |
|
"grad_norm": 0.4568467438220978, |
|
"learning_rate": 0.006, |
|
"loss": 3.0649, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.03410500206696982, |
|
"grad_norm": 0.36088162660598755, |
|
"learning_rate": 0.006, |
|
"loss": 3.0495, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.03513848697809012, |
|
"grad_norm": 0.3663162887096405, |
|
"learning_rate": 0.006, |
|
"loss": 3.0188, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.03617197188921042, |
|
"grad_norm": 0.5975127220153809, |
|
"learning_rate": 0.006, |
|
"loss": 2.994, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.037205456800330716, |
|
"grad_norm": 0.3454865515232086, |
|
"learning_rate": 0.006, |
|
"loss": 2.9814, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.038238941711451015, |
|
"grad_norm": 0.4206918179988861, |
|
"learning_rate": 0.006, |
|
"loss": 2.9306, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.03927242662257131, |
|
"grad_norm": 0.3579295575618744, |
|
"learning_rate": 0.006, |
|
"loss": 2.9165, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.04030591153369161, |
|
"grad_norm": 0.3658403158187866, |
|
"learning_rate": 0.006, |
|
"loss": 2.9103, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.0413393964448119, |
|
"grad_norm": 0.3459131717681885, |
|
"learning_rate": 0.006, |
|
"loss": 2.8628, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0423728813559322, |
|
"grad_norm": 0.4726301431655884, |
|
"learning_rate": 0.006, |
|
"loss": 2.8334, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.0434063662670525, |
|
"grad_norm": 0.3717091381549835, |
|
"learning_rate": 0.006, |
|
"loss": 2.8188, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.0444398511781728, |
|
"grad_norm": 0.3234303295612335, |
|
"learning_rate": 0.006, |
|
"loss": 2.8076, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.0454733360892931, |
|
"grad_norm": 0.4717100262641907, |
|
"learning_rate": 0.006, |
|
"loss": 2.7871, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.046506821000413395, |
|
"grad_norm": 0.35768577456474304, |
|
"learning_rate": 0.006, |
|
"loss": 2.7601, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.047540305911533694, |
|
"grad_norm": 0.3499695062637329, |
|
"learning_rate": 0.006, |
|
"loss": 2.73, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.04857379082265399, |
|
"grad_norm": 0.4336409866809845, |
|
"learning_rate": 0.006, |
|
"loss": 2.7235, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.049607275733774284, |
|
"grad_norm": 0.39916467666625977, |
|
"learning_rate": 0.006, |
|
"loss": 2.7007, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.05064076064489458, |
|
"grad_norm": 0.3249908983707428, |
|
"learning_rate": 0.006, |
|
"loss": 2.6705, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.05167424555601488, |
|
"grad_norm": 0.36379745602607727, |
|
"learning_rate": 0.006, |
|
"loss": 2.6393, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05270773046713518, |
|
"grad_norm": 0.3417607843875885, |
|
"learning_rate": 0.006, |
|
"loss": 2.6767, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.05374121537825548, |
|
"grad_norm": 0.2848694920539856, |
|
"learning_rate": 0.006, |
|
"loss": 2.6244, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.054774700289375776, |
|
"grad_norm": 0.2725778818130493, |
|
"learning_rate": 0.006, |
|
"loss": 2.5704, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.055808185200496074, |
|
"grad_norm": 0.2887585461139679, |
|
"learning_rate": 0.006, |
|
"loss": 2.5508, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.05684167011161637, |
|
"grad_norm": 0.422413170337677, |
|
"learning_rate": 0.006, |
|
"loss": 2.5433, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.05787515502273667, |
|
"grad_norm": 0.5642419457435608, |
|
"learning_rate": 0.006, |
|
"loss": 2.5267, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.05890863993385696, |
|
"grad_norm": 0.33949220180511475, |
|
"learning_rate": 0.006, |
|
"loss": 2.4942, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.05994212484497726, |
|
"grad_norm": 0.36603158712387085, |
|
"learning_rate": 0.006, |
|
"loss": 2.4755, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.06097560975609756, |
|
"grad_norm": 0.3936167061328888, |
|
"learning_rate": 0.006, |
|
"loss": 2.4358, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.06200909466721786, |
|
"grad_norm": 0.46108072996139526, |
|
"learning_rate": 0.006, |
|
"loss": 2.4312, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.06304257957833816, |
|
"grad_norm": 0.3659123480319977, |
|
"learning_rate": 0.006, |
|
"loss": 2.4023, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.06407606448945845, |
|
"grad_norm": 0.4888301193714142, |
|
"learning_rate": 0.006, |
|
"loss": 2.3887, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.06510954940057875, |
|
"grad_norm": 0.33769381046295166, |
|
"learning_rate": 0.006, |
|
"loss": 2.3645, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.06614303431169904, |
|
"grad_norm": 0.4479112923145294, |
|
"learning_rate": 0.006, |
|
"loss": 2.3123, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.06717651922281935, |
|
"grad_norm": 0.3715905249118805, |
|
"learning_rate": 0.006, |
|
"loss": 2.3009, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.06821000413393964, |
|
"grad_norm": 0.43582767248153687, |
|
"learning_rate": 0.006, |
|
"loss": 2.3256, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.06924348904505995, |
|
"grad_norm": 0.295462042093277, |
|
"learning_rate": 0.006, |
|
"loss": 2.281, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.07027697395618024, |
|
"grad_norm": 0.2555060386657715, |
|
"learning_rate": 0.006, |
|
"loss": 2.2711, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.07131045886730054, |
|
"grad_norm": 0.2812485992908478, |
|
"learning_rate": 0.006, |
|
"loss": 2.2392, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.07234394377842084, |
|
"grad_norm": 0.2797698974609375, |
|
"learning_rate": 0.006, |
|
"loss": 2.2316, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.07337742868954113, |
|
"grad_norm": 0.4201503396034241, |
|
"learning_rate": 0.006, |
|
"loss": 2.1894, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.07441091360066143, |
|
"grad_norm": 0.48455363512039185, |
|
"learning_rate": 0.006, |
|
"loss": 2.1682, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.07544439851178172, |
|
"grad_norm": 0.4906412959098816, |
|
"learning_rate": 0.006, |
|
"loss": 2.1594, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.07647788342290203, |
|
"grad_norm": 0.3227013051509857, |
|
"learning_rate": 0.006, |
|
"loss": 2.148, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.07751136833402232, |
|
"grad_norm": 0.24952304363250732, |
|
"learning_rate": 0.006, |
|
"loss": 2.1323, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.07854485324514263, |
|
"grad_norm": 0.4688262641429901, |
|
"learning_rate": 0.006, |
|
"loss": 2.0781, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.07957833815626292, |
|
"grad_norm": 0.3051510453224182, |
|
"learning_rate": 0.006, |
|
"loss": 2.1005, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.08061182306738322, |
|
"grad_norm": 0.303608775138855, |
|
"learning_rate": 0.006, |
|
"loss": 2.0619, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.08164530797850351, |
|
"grad_norm": 0.41049811244010925, |
|
"learning_rate": 0.006, |
|
"loss": 2.069, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.0826787928896238, |
|
"grad_norm": 0.25236430764198303, |
|
"learning_rate": 0.006, |
|
"loss": 2.0536, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.08371227780074411, |
|
"grad_norm": 0.2919696569442749, |
|
"learning_rate": 0.006, |
|
"loss": 2.0221, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.0847457627118644, |
|
"grad_norm": 0.3965113162994385, |
|
"learning_rate": 0.006, |
|
"loss": 2.0008, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.08577924762298471, |
|
"grad_norm": 0.3784347176551819, |
|
"learning_rate": 0.006, |
|
"loss": 2.0041, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.086812732534105, |
|
"grad_norm": 0.32382646203041077, |
|
"learning_rate": 0.006, |
|
"loss": 1.9987, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.0878462174452253, |
|
"grad_norm": 0.28474798798561096, |
|
"learning_rate": 0.006, |
|
"loss": 1.9859, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.0888797023563456, |
|
"grad_norm": 0.35125505924224854, |
|
"learning_rate": 0.006, |
|
"loss": 1.9719, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.08991318726746589, |
|
"grad_norm": 0.2959253787994385, |
|
"learning_rate": 0.006, |
|
"loss": 1.9691, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.0909466721785862, |
|
"grad_norm": 0.2681068181991577, |
|
"learning_rate": 0.006, |
|
"loss": 1.946, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.09198015708970649, |
|
"grad_norm": 0.2882836163043976, |
|
"learning_rate": 0.006, |
|
"loss": 1.9227, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.09301364200082679, |
|
"grad_norm": 0.2792555093765259, |
|
"learning_rate": 0.006, |
|
"loss": 1.9067, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.09404712691194708, |
|
"grad_norm": 0.31801220774650574, |
|
"learning_rate": 0.006, |
|
"loss": 1.8997, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.09508061182306739, |
|
"grad_norm": 0.3104709982872009, |
|
"learning_rate": 0.006, |
|
"loss": 1.909, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.09611409673418768, |
|
"grad_norm": 0.4227132499217987, |
|
"learning_rate": 0.006, |
|
"loss": 1.8898, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.09714758164530798, |
|
"grad_norm": 0.2799486815929413, |
|
"learning_rate": 0.006, |
|
"loss": 1.8799, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.09818106655642828, |
|
"grad_norm": 0.25304150581359863, |
|
"learning_rate": 0.006, |
|
"loss": 1.8542, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.09921455146754857, |
|
"grad_norm": 0.40398165583610535, |
|
"learning_rate": 0.006, |
|
"loss": 1.8495, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.10024803637866887, |
|
"grad_norm": 0.24255605041980743, |
|
"learning_rate": 0.006, |
|
"loss": 1.8279, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.10128152128978916, |
|
"grad_norm": 0.251412957906723, |
|
"learning_rate": 0.006, |
|
"loss": 1.8448, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.10231500620090947, |
|
"grad_norm": 0.33690279722213745, |
|
"learning_rate": 0.006, |
|
"loss": 1.8345, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.10334849111202976, |
|
"grad_norm": 0.3986241817474365, |
|
"learning_rate": 0.006, |
|
"loss": 1.8272, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.10438197602315007, |
|
"grad_norm": 0.275290310382843, |
|
"learning_rate": 0.006, |
|
"loss": 1.8177, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.10541546093427036, |
|
"grad_norm": 0.24022069573402405, |
|
"learning_rate": 0.006, |
|
"loss": 1.8, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.10644894584539066, |
|
"grad_norm": 0.22507286071777344, |
|
"learning_rate": 0.006, |
|
"loss": 1.7825, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.10748243075651095, |
|
"grad_norm": 0.2965947985649109, |
|
"learning_rate": 0.006, |
|
"loss": 1.778, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.10851591566763125, |
|
"grad_norm": 0.3291703164577484, |
|
"learning_rate": 0.006, |
|
"loss": 1.7864, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.10954940057875155, |
|
"grad_norm": 0.2484293133020401, |
|
"learning_rate": 0.006, |
|
"loss": 1.7649, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.11058288548987184, |
|
"grad_norm": 0.30289486050605774, |
|
"learning_rate": 0.006, |
|
"loss": 1.7756, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.11161637040099215, |
|
"grad_norm": 0.2765989601612091, |
|
"learning_rate": 0.006, |
|
"loss": 1.7458, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.11264985531211244, |
|
"grad_norm": 0.24408799409866333, |
|
"learning_rate": 0.006, |
|
"loss": 1.7598, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.11368334022323275, |
|
"grad_norm": 0.2399667352437973, |
|
"learning_rate": 0.006, |
|
"loss": 1.754, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.11471682513435304, |
|
"grad_norm": 0.2846289873123169, |
|
"learning_rate": 0.006, |
|
"loss": 1.7406, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.11575031004547334, |
|
"grad_norm": 0.26227328181266785, |
|
"learning_rate": 0.006, |
|
"loss": 1.7336, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.11678379495659363, |
|
"grad_norm": 0.2673957347869873, |
|
"learning_rate": 0.006, |
|
"loss": 1.7281, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.11781727986771393, |
|
"grad_norm": 0.3353629410266876, |
|
"learning_rate": 0.006, |
|
"loss": 1.7169, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.11885076477883423, |
|
"grad_norm": 0.21159133315086365, |
|
"learning_rate": 0.006, |
|
"loss": 1.7016, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.11988424968995452, |
|
"grad_norm": 0.24120958149433136, |
|
"learning_rate": 0.006, |
|
"loss": 1.7033, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.12091773460107483, |
|
"grad_norm": 0.26428887248039246, |
|
"learning_rate": 0.006, |
|
"loss": 1.6867, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.12195121951219512, |
|
"grad_norm": 0.27754953503608704, |
|
"learning_rate": 0.006, |
|
"loss": 1.6901, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.12298470442331542, |
|
"grad_norm": 0.36125853657722473, |
|
"learning_rate": 0.006, |
|
"loss": 1.6808, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.12401818933443572, |
|
"grad_norm": 0.23145093023777008, |
|
"learning_rate": 0.006, |
|
"loss": 1.6946, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.12505167424555602, |
|
"grad_norm": 0.22397373616695404, |
|
"learning_rate": 0.006, |
|
"loss": 1.6619, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.1260851591566763, |
|
"grad_norm": 0.2219894528388977, |
|
"learning_rate": 0.006, |
|
"loss": 1.6483, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.1271186440677966, |
|
"grad_norm": 0.2552240788936615, |
|
"learning_rate": 0.006, |
|
"loss": 1.6574, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.1281521289789169, |
|
"grad_norm": 0.4382542073726654, |
|
"learning_rate": 0.006, |
|
"loss": 1.663, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.12918561389003722, |
|
"grad_norm": 0.2551495134830475, |
|
"learning_rate": 0.006, |
|
"loss": 1.6543, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.1302190988011575, |
|
"grad_norm": 0.21785561740398407, |
|
"learning_rate": 0.006, |
|
"loss": 1.6479, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.1312525837122778, |
|
"grad_norm": 0.25551608204841614, |
|
"learning_rate": 0.006, |
|
"loss": 1.6303, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.1322860686233981, |
|
"grad_norm": 0.30908089876174927, |
|
"learning_rate": 0.006, |
|
"loss": 1.6388, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.1333195535345184, |
|
"grad_norm": 0.39932873845100403, |
|
"learning_rate": 0.006, |
|
"loss": 1.6192, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.1343530384456387, |
|
"grad_norm": 0.2735249102115631, |
|
"learning_rate": 0.006, |
|
"loss": 1.6327, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.135386523356759, |
|
"grad_norm": 0.23229070007801056, |
|
"learning_rate": 0.006, |
|
"loss": 1.6278, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.13642000826787928, |
|
"grad_norm": 0.29770010709762573, |
|
"learning_rate": 0.006, |
|
"loss": 1.6137, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.13745349317899958, |
|
"grad_norm": 0.2619315981864929, |
|
"learning_rate": 0.006, |
|
"loss": 1.6071, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.1384869780901199, |
|
"grad_norm": 0.2108081877231598, |
|
"learning_rate": 0.006, |
|
"loss": 1.6189, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.13952046300124019, |
|
"grad_norm": 0.2384573072195053, |
|
"learning_rate": 0.006, |
|
"loss": 1.6035, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.14055394791236048, |
|
"grad_norm": 0.2051675170660019, |
|
"learning_rate": 0.006, |
|
"loss": 1.5866, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.14158743282348077, |
|
"grad_norm": 0.3068487346172333, |
|
"learning_rate": 0.006, |
|
"loss": 1.5727, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.1426209177346011, |
|
"grad_norm": 0.24623431265354156, |
|
"learning_rate": 0.006, |
|
"loss": 1.5786, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.14365440264572138, |
|
"grad_norm": 0.3030165731906891, |
|
"learning_rate": 0.006, |
|
"loss": 1.5793, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.14468788755684167, |
|
"grad_norm": 0.236822709441185, |
|
"learning_rate": 0.006, |
|
"loss": 1.5737, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.14572137246796196, |
|
"grad_norm": 0.4476490318775177, |
|
"learning_rate": 0.006, |
|
"loss": 1.5743, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.14675485737908225, |
|
"grad_norm": 0.3274894952774048, |
|
"learning_rate": 0.006, |
|
"loss": 1.5579, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.14778834229020257, |
|
"grad_norm": 0.2623043358325958, |
|
"learning_rate": 0.006, |
|
"loss": 1.5666, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.14882182720132286, |
|
"grad_norm": 0.23737861216068268, |
|
"learning_rate": 0.006, |
|
"loss": 1.553, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.14985531211244316, |
|
"grad_norm": 0.23686368763446808, |
|
"learning_rate": 0.006, |
|
"loss": 1.5598, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.15088879702356345, |
|
"grad_norm": 0.2711767256259918, |
|
"learning_rate": 0.006, |
|
"loss": 1.5508, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.15192228193468377, |
|
"grad_norm": 0.1919260323047638, |
|
"learning_rate": 0.006, |
|
"loss": 1.5364, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.15295576684580406, |
|
"grad_norm": 0.24973885715007782, |
|
"learning_rate": 0.006, |
|
"loss": 1.5444, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.15398925175692435, |
|
"grad_norm": 0.2096351981163025, |
|
"learning_rate": 0.006, |
|
"loss": 1.5422, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.15502273666804464, |
|
"grad_norm": 0.28911715745925903, |
|
"learning_rate": 0.006, |
|
"loss": 1.5562, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.15605622157916493, |
|
"grad_norm": 0.2608921229839325, |
|
"learning_rate": 0.006, |
|
"loss": 1.5153, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.15708970649028525, |
|
"grad_norm": 0.2605489194393158, |
|
"learning_rate": 0.006, |
|
"loss": 1.5262, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.15812319140140554, |
|
"grad_norm": 0.2948496341705322, |
|
"learning_rate": 0.006, |
|
"loss": 1.5318, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.15915667631252584, |
|
"grad_norm": 0.33066225051879883, |
|
"learning_rate": 0.006, |
|
"loss": 1.5138, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.16019016122364613, |
|
"grad_norm": 0.2134758085012436, |
|
"learning_rate": 0.006, |
|
"loss": 1.5199, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.16122364613476645, |
|
"grad_norm": 0.22920167446136475, |
|
"learning_rate": 0.006, |
|
"loss": 1.5084, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.16225713104588674, |
|
"grad_norm": 0.22044587135314941, |
|
"learning_rate": 0.006, |
|
"loss": 1.5254, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.16329061595700703, |
|
"grad_norm": 0.28658127784729004, |
|
"learning_rate": 0.006, |
|
"loss": 1.5149, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.16432410086812732, |
|
"grad_norm": 0.1932067573070526, |
|
"learning_rate": 0.006, |
|
"loss": 1.4943, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.1653575857792476, |
|
"grad_norm": 0.19694995880126953, |
|
"learning_rate": 0.006, |
|
"loss": 1.4871, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.16639107069036793, |
|
"grad_norm": 0.2092551738023758, |
|
"learning_rate": 0.006, |
|
"loss": 1.481, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.16742455560148822, |
|
"grad_norm": 0.20461459457874298, |
|
"learning_rate": 0.006, |
|
"loss": 1.4958, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.16845804051260851, |
|
"grad_norm": 0.2160821110010147, |
|
"learning_rate": 0.006, |
|
"loss": 1.5013, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.1694915254237288, |
|
"grad_norm": 0.23302793502807617, |
|
"learning_rate": 0.006, |
|
"loss": 1.4991, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.1705250103348491, |
|
"grad_norm": 0.20844736695289612, |
|
"learning_rate": 0.006, |
|
"loss": 1.4765, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.17155849524596942, |
|
"grad_norm": 0.18730531632900238, |
|
"learning_rate": 0.006, |
|
"loss": 1.4732, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.1725919801570897, |
|
"grad_norm": 0.2798612415790558, |
|
"learning_rate": 0.006, |
|
"loss": 1.4809, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.17362546506821, |
|
"grad_norm": 0.17376913130283356, |
|
"learning_rate": 0.006, |
|
"loss": 1.4847, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.1746589499793303, |
|
"grad_norm": 0.1984642595052719, |
|
"learning_rate": 0.006, |
|
"loss": 1.4677, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.1756924348904506, |
|
"grad_norm": 0.18918636441230774, |
|
"learning_rate": 0.006, |
|
"loss": 1.4623, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.1767259198015709, |
|
"grad_norm": 0.22821608185768127, |
|
"learning_rate": 0.006, |
|
"loss": 1.4836, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.1777594047126912, |
|
"grad_norm": 0.2023961991071701, |
|
"learning_rate": 0.006, |
|
"loss": 1.4665, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.17879288962381149, |
|
"grad_norm": 0.21143494546413422, |
|
"learning_rate": 0.006, |
|
"loss": 1.4637, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.17982637453493178, |
|
"grad_norm": 0.18367299437522888, |
|
"learning_rate": 0.006, |
|
"loss": 1.4564, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.1808598594460521, |
|
"grad_norm": 0.18500660359859467, |
|
"learning_rate": 0.006, |
|
"loss": 1.4485, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.1818933443571724, |
|
"grad_norm": 0.208240807056427, |
|
"learning_rate": 0.006, |
|
"loss": 1.449, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.18292682926829268, |
|
"grad_norm": 0.28708386421203613, |
|
"learning_rate": 0.006, |
|
"loss": 1.4514, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.18396031417941297, |
|
"grad_norm": 0.23758843541145325, |
|
"learning_rate": 0.006, |
|
"loss": 1.4376, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.1849937990905333, |
|
"grad_norm": 0.23937605321407318, |
|
"learning_rate": 0.006, |
|
"loss": 1.4406, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.18602728400165358, |
|
"grad_norm": 0.44142553210258484, |
|
"learning_rate": 0.006, |
|
"loss": 1.4331, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.18706076891277387, |
|
"grad_norm": 0.1951988786458969, |
|
"learning_rate": 0.006, |
|
"loss": 1.4443, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.18809425382389416, |
|
"grad_norm": 0.22376669943332672, |
|
"learning_rate": 0.006, |
|
"loss": 1.4345, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.18912773873501446, |
|
"grad_norm": 0.24400733411312103, |
|
"learning_rate": 0.006, |
|
"loss": 1.4405, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.19016122364613477, |
|
"grad_norm": 0.20870748162269592, |
|
"learning_rate": 0.006, |
|
"loss": 1.4291, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.19119470855725507, |
|
"grad_norm": 0.2239631861448288, |
|
"learning_rate": 0.006, |
|
"loss": 1.4269, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.19222819346837536, |
|
"grad_norm": 0.24455626308918, |
|
"learning_rate": 0.006, |
|
"loss": 1.4146, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.19326167837949565, |
|
"grad_norm": 0.25358930230140686, |
|
"learning_rate": 0.006, |
|
"loss": 1.4431, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.19429516329061597, |
|
"grad_norm": 0.1898830235004425, |
|
"learning_rate": 0.006, |
|
"loss": 1.4222, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.19532864820173626, |
|
"grad_norm": 0.21167783439159393, |
|
"learning_rate": 0.006, |
|
"loss": 1.4251, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.19636213311285655, |
|
"grad_norm": 0.2603727877140045, |
|
"learning_rate": 0.006, |
|
"loss": 1.426, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.19739561802397684, |
|
"grad_norm": 0.193504199385643, |
|
"learning_rate": 0.006, |
|
"loss": 1.4041, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.19842910293509713, |
|
"grad_norm": 0.22818297147750854, |
|
"learning_rate": 0.006, |
|
"loss": 1.4204, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.19946258784621745, |
|
"grad_norm": 0.20980095863342285, |
|
"learning_rate": 0.006, |
|
"loss": 1.405, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.20049607275733775, |
|
"grad_norm": 0.17794106900691986, |
|
"learning_rate": 0.006, |
|
"loss": 1.3871, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.20152955766845804, |
|
"grad_norm": 0.2572697103023529, |
|
"learning_rate": 0.006, |
|
"loss": 1.4001, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.20256304257957833, |
|
"grad_norm": 0.2605222463607788, |
|
"learning_rate": 0.006, |
|
"loss": 1.4029, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.20359652749069865, |
|
"grad_norm": 0.27022120356559753, |
|
"learning_rate": 0.006, |
|
"loss": 1.4009, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.20463001240181894, |
|
"grad_norm": 0.20254716277122498, |
|
"learning_rate": 0.006, |
|
"loss": 1.3946, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.20566349731293923, |
|
"grad_norm": 0.19156356155872345, |
|
"learning_rate": 0.006, |
|
"loss": 1.3848, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.20669698222405952, |
|
"grad_norm": 0.182766854763031, |
|
"learning_rate": 0.006, |
|
"loss": 1.3964, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2077304671351798, |
|
"grad_norm": 0.1684897243976593, |
|
"learning_rate": 0.006, |
|
"loss": 1.3872, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.20876395204630013, |
|
"grad_norm": 0.19054663181304932, |
|
"learning_rate": 0.006, |
|
"loss": 1.3817, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.20979743695742042, |
|
"grad_norm": 0.17960067093372345, |
|
"learning_rate": 0.006, |
|
"loss": 1.389, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.21083092186854072, |
|
"grad_norm": 0.23440410196781158, |
|
"learning_rate": 0.006, |
|
"loss": 1.3707, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.211864406779661, |
|
"grad_norm": 0.21306714415550232, |
|
"learning_rate": 0.006, |
|
"loss": 1.3789, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.21289789169078133, |
|
"grad_norm": 0.24979044497013092, |
|
"learning_rate": 0.006, |
|
"loss": 1.3755, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.21393137660190162, |
|
"grad_norm": 0.2036939263343811, |
|
"learning_rate": 0.006, |
|
"loss": 1.3825, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.2149648615130219, |
|
"grad_norm": 0.21026568114757538, |
|
"learning_rate": 0.006, |
|
"loss": 1.3701, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.2159983464241422, |
|
"grad_norm": 0.18213844299316406, |
|
"learning_rate": 0.006, |
|
"loss": 1.3835, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.2170318313352625, |
|
"grad_norm": 0.23943543434143066, |
|
"learning_rate": 0.006, |
|
"loss": 1.3681, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.2180653162463828, |
|
"grad_norm": 0.27890509366989136, |
|
"learning_rate": 0.006, |
|
"loss": 1.3746, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.2190988011575031, |
|
"grad_norm": 0.1875576227903366, |
|
"learning_rate": 0.006, |
|
"loss": 1.3833, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.2201322860686234, |
|
"grad_norm": 0.21590693295001984, |
|
"learning_rate": 0.006, |
|
"loss": 1.3679, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.2211657709797437, |
|
"grad_norm": 0.16515956819057465, |
|
"learning_rate": 0.006, |
|
"loss": 1.3628, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.222199255890864, |
|
"grad_norm": 0.3273829221725464, |
|
"learning_rate": 0.006, |
|
"loss": 1.3605, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.2232327408019843, |
|
"grad_norm": 0.19835293292999268, |
|
"learning_rate": 0.006, |
|
"loss": 1.3599, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.2242662257131046, |
|
"grad_norm": 0.20204949378967285, |
|
"learning_rate": 0.006, |
|
"loss": 1.3548, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.22529971062422488, |
|
"grad_norm": 0.20704525709152222, |
|
"learning_rate": 0.006, |
|
"loss": 1.3625, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.22633319553534517, |
|
"grad_norm": 0.16702768206596375, |
|
"learning_rate": 0.006, |
|
"loss": 1.3475, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.2273666804464655, |
|
"grad_norm": 0.2254660427570343, |
|
"learning_rate": 0.006, |
|
"loss": 1.3508, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.22840016535758578, |
|
"grad_norm": 0.17011414468288422, |
|
"learning_rate": 0.006, |
|
"loss": 1.3416, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.22943365026870607, |
|
"grad_norm": 0.24636393785476685, |
|
"learning_rate": 0.006, |
|
"loss": 1.3487, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.23046713517982637, |
|
"grad_norm": 0.2143375277519226, |
|
"learning_rate": 0.006, |
|
"loss": 1.345, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.23150062009094668, |
|
"grad_norm": 0.1868714541196823, |
|
"learning_rate": 0.006, |
|
"loss": 1.3359, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.23253410500206698, |
|
"grad_norm": 0.1683160364627838, |
|
"learning_rate": 0.006, |
|
"loss": 1.3399, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.23356758991318727, |
|
"grad_norm": 0.199097141623497, |
|
"learning_rate": 0.006, |
|
"loss": 1.3568, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.23460107482430756, |
|
"grad_norm": 0.18377502262592316, |
|
"learning_rate": 0.006, |
|
"loss": 1.3488, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.23563455973542785, |
|
"grad_norm": 0.20199266076087952, |
|
"learning_rate": 0.006, |
|
"loss": 1.344, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.23666804464654817, |
|
"grad_norm": 0.271793931722641, |
|
"learning_rate": 0.006, |
|
"loss": 1.3169, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.23770152955766846, |
|
"grad_norm": 0.17339195311069489, |
|
"learning_rate": 0.006, |
|
"loss": 1.3328, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.23873501446878875, |
|
"grad_norm": 0.17549367249011993, |
|
"learning_rate": 0.006, |
|
"loss": 1.3354, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.23976849937990904, |
|
"grad_norm": 0.1800873875617981, |
|
"learning_rate": 0.006, |
|
"loss": 1.322, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.24080198429102936, |
|
"grad_norm": 0.22731319069862366, |
|
"learning_rate": 0.006, |
|
"loss": 1.3235, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.24183546920214966, |
|
"grad_norm": 0.22910168766975403, |
|
"learning_rate": 0.006, |
|
"loss": 1.3243, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.24286895411326995, |
|
"grad_norm": 0.1611885130405426, |
|
"learning_rate": 0.006, |
|
"loss": 1.3352, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 0.19426490366458893, |
|
"learning_rate": 0.006, |
|
"loss": 1.3091, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.24493592393551053, |
|
"grad_norm": 0.19100475311279297, |
|
"learning_rate": 0.006, |
|
"loss": 1.3198, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.24596940884663085, |
|
"grad_norm": 0.2597349286079407, |
|
"learning_rate": 0.006, |
|
"loss": 1.302, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.24700289375775114, |
|
"grad_norm": 0.16532281041145325, |
|
"learning_rate": 0.006, |
|
"loss": 1.3182, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.24803637866887143, |
|
"grad_norm": 0.2065669149160385, |
|
"learning_rate": 0.006, |
|
"loss": 1.3182, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.24906986357999172, |
|
"grad_norm": 0.1802499145269394, |
|
"learning_rate": 0.006, |
|
"loss": 1.3206, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.25010334849111204, |
|
"grad_norm": 0.2926013469696045, |
|
"learning_rate": 0.006, |
|
"loss": 1.3089, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.2511368334022323, |
|
"grad_norm": 0.28276434540748596, |
|
"learning_rate": 0.006, |
|
"loss": 1.307, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.2521703183133526, |
|
"grad_norm": 0.18156251311302185, |
|
"learning_rate": 0.006, |
|
"loss": 1.3227, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.25320380322447295, |
|
"grad_norm": 0.18208877742290497, |
|
"learning_rate": 0.006, |
|
"loss": 1.3065, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.2542372881355932, |
|
"grad_norm": 0.18455766141414642, |
|
"learning_rate": 0.006, |
|
"loss": 1.3019, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.25527077304671353, |
|
"grad_norm": 0.23398499190807343, |
|
"learning_rate": 0.006, |
|
"loss": 1.3126, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.2563042579578338, |
|
"grad_norm": 0.1919931322336197, |
|
"learning_rate": 0.006, |
|
"loss": 1.2982, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.2573377428689541, |
|
"grad_norm": 0.24925625324249268, |
|
"learning_rate": 0.006, |
|
"loss": 1.32, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.25837122778007443, |
|
"grad_norm": 0.2440209984779358, |
|
"learning_rate": 0.006, |
|
"loss": 1.2904, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2594047126911947, |
|
"grad_norm": 0.16576959192752838, |
|
"learning_rate": 0.006, |
|
"loss": 1.3065, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.260438197602315, |
|
"grad_norm": 0.18232128024101257, |
|
"learning_rate": 0.006, |
|
"loss": 1.3048, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.2614716825134353, |
|
"grad_norm": 0.23745986819267273, |
|
"learning_rate": 0.006, |
|
"loss": 1.292, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.2625051674245556, |
|
"grad_norm": 0.22818538546562195, |
|
"learning_rate": 0.006, |
|
"loss": 1.2964, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.2635386523356759, |
|
"grad_norm": 0.1811428666114807, |
|
"learning_rate": 0.006, |
|
"loss": 1.2968, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.2645721372467962, |
|
"grad_norm": 0.2562517821788788, |
|
"learning_rate": 0.006, |
|
"loss": 1.2985, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.2656056221579165, |
|
"grad_norm": 0.16540707647800446, |
|
"learning_rate": 0.006, |
|
"loss": 1.2962, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.2666391070690368, |
|
"grad_norm": 0.17198024690151215, |
|
"learning_rate": 0.006, |
|
"loss": 1.2835, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.2676725919801571, |
|
"grad_norm": 0.15511544048786163, |
|
"learning_rate": 0.006, |
|
"loss": 1.2689, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.2687060768912774, |
|
"grad_norm": 0.1916809380054474, |
|
"learning_rate": 0.006, |
|
"loss": 1.2822, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.26973956180239766, |
|
"grad_norm": 0.17753975093364716, |
|
"learning_rate": 0.006, |
|
"loss": 1.2843, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.270773046713518, |
|
"grad_norm": 0.18664464354515076, |
|
"learning_rate": 0.006, |
|
"loss": 1.2876, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.2718065316246383, |
|
"grad_norm": 0.283917635679245, |
|
"learning_rate": 0.006, |
|
"loss": 1.2745, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.27284001653575857, |
|
"grad_norm": 0.1494530886411667, |
|
"learning_rate": 0.006, |
|
"loss": 1.2848, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.2738735014468789, |
|
"grad_norm": 0.2093777358531952, |
|
"learning_rate": 0.006, |
|
"loss": 1.2786, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.27490698635799915, |
|
"grad_norm": 0.19870586693286896, |
|
"learning_rate": 0.006, |
|
"loss": 1.2895, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.27594047126911947, |
|
"grad_norm": 0.18807923793792725, |
|
"learning_rate": 0.006, |
|
"loss": 1.2784, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.2769739561802398, |
|
"grad_norm": 0.16479447484016418, |
|
"learning_rate": 0.006, |
|
"loss": 1.2815, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.27800744109136005, |
|
"grad_norm": 0.19209875166416168, |
|
"learning_rate": 0.006, |
|
"loss": 1.2791, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.27904092600248037, |
|
"grad_norm": 0.20269426703453064, |
|
"learning_rate": 0.006, |
|
"loss": 1.2828, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.28007441091360064, |
|
"grad_norm": 0.1880909502506256, |
|
"learning_rate": 0.006, |
|
"loss": 1.2786, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.28110789582472095, |
|
"grad_norm": 0.16020233929157257, |
|
"learning_rate": 0.006, |
|
"loss": 1.2637, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.2821413807358413, |
|
"grad_norm": 0.17531338334083557, |
|
"learning_rate": 0.006, |
|
"loss": 1.264, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.28317486564696154, |
|
"grad_norm": 0.16038616001605988, |
|
"learning_rate": 0.006, |
|
"loss": 1.2692, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.28420835055808186, |
|
"grad_norm": 0.19676966965198517, |
|
"learning_rate": 0.006, |
|
"loss": 1.271, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.2852418354692022, |
|
"grad_norm": 0.20371529459953308, |
|
"learning_rate": 0.006, |
|
"loss": 1.2637, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.28627532038032244, |
|
"grad_norm": 0.18698766827583313, |
|
"learning_rate": 0.006, |
|
"loss": 1.2776, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.28730880529144276, |
|
"grad_norm": 0.20747795701026917, |
|
"learning_rate": 0.006, |
|
"loss": 1.2573, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.288342290202563, |
|
"grad_norm": 0.2328963726758957, |
|
"learning_rate": 0.006, |
|
"loss": 1.2568, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.28937577511368334, |
|
"grad_norm": 0.1655196100473404, |
|
"learning_rate": 0.006, |
|
"loss": 1.2614, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.29040926002480366, |
|
"grad_norm": 0.1615329086780548, |
|
"learning_rate": 0.006, |
|
"loss": 1.2712, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.2914427449359239, |
|
"grad_norm": 0.15835103392601013, |
|
"learning_rate": 0.006, |
|
"loss": 1.2524, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.29247622984704424, |
|
"grad_norm": 0.20250938832759857, |
|
"learning_rate": 0.006, |
|
"loss": 1.264, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.2935097147581645, |
|
"grad_norm": 0.17166636884212494, |
|
"learning_rate": 0.006, |
|
"loss": 1.2563, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.2945431996692848, |
|
"grad_norm": 0.2327214777469635, |
|
"learning_rate": 0.006, |
|
"loss": 1.2546, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.29557668458040515, |
|
"grad_norm": 0.19193734228610992, |
|
"learning_rate": 0.006, |
|
"loss": 1.2647, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.2966101694915254, |
|
"grad_norm": 0.21454569697380066, |
|
"learning_rate": 0.006, |
|
"loss": 1.2573, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.29764365440264573, |
|
"grad_norm": 0.18797141313552856, |
|
"learning_rate": 0.006, |
|
"loss": 1.2661, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.298677139313766, |
|
"grad_norm": 0.16613776981830597, |
|
"learning_rate": 0.006, |
|
"loss": 1.2593, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.2997106242248863, |
|
"grad_norm": 0.15257228910923004, |
|
"learning_rate": 0.006, |
|
"loss": 1.2534, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.30074410913600663, |
|
"grad_norm": 0.1718466728925705, |
|
"learning_rate": 0.006, |
|
"loss": 1.2526, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.3017775940471269, |
|
"grad_norm": 0.17519408464431763, |
|
"learning_rate": 0.006, |
|
"loss": 1.236, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.3028110789582472, |
|
"grad_norm": 0.1739313006401062, |
|
"learning_rate": 0.006, |
|
"loss": 1.2416, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.30384456386936753, |
|
"grad_norm": 0.2000485360622406, |
|
"learning_rate": 0.006, |
|
"loss": 1.2525, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.3048780487804878, |
|
"grad_norm": 0.21297138929367065, |
|
"learning_rate": 0.006, |
|
"loss": 1.2312, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.3059115336916081, |
|
"grad_norm": 0.1601925939321518, |
|
"learning_rate": 0.006, |
|
"loss": 1.2575, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.3069450186027284, |
|
"grad_norm": 0.22686167061328888, |
|
"learning_rate": 0.006, |
|
"loss": 1.2421, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.3079785035138487, |
|
"grad_norm": 0.2552824318408966, |
|
"learning_rate": 0.006, |
|
"loss": 1.255, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.309011988424969, |
|
"grad_norm": 0.1520908921957016, |
|
"learning_rate": 0.006, |
|
"loss": 1.2335, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.3100454733360893, |
|
"grad_norm": 0.18067720532417297, |
|
"learning_rate": 0.006, |
|
"loss": 1.2278, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3110789582472096, |
|
"grad_norm": 0.2269776612520218, |
|
"learning_rate": 0.006, |
|
"loss": 1.2401, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.31211244315832987, |
|
"grad_norm": 0.17168264091014862, |
|
"learning_rate": 0.006, |
|
"loss": 1.245, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.3131459280694502, |
|
"grad_norm": 0.20345386862754822, |
|
"learning_rate": 0.006, |
|
"loss": 1.2286, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.3141794129805705, |
|
"grad_norm": 0.17593909800052643, |
|
"learning_rate": 0.006, |
|
"loss": 1.2439, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.31521289789169077, |
|
"grad_norm": 0.1948511153459549, |
|
"learning_rate": 0.006, |
|
"loss": 1.2513, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.3162463828028111, |
|
"grad_norm": 0.1758977472782135, |
|
"learning_rate": 0.006, |
|
"loss": 1.2428, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.31727986771393135, |
|
"grad_norm": 0.14619344472885132, |
|
"learning_rate": 0.006, |
|
"loss": 1.2295, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.31831335262505167, |
|
"grad_norm": 0.15746112167835236, |
|
"learning_rate": 0.006, |
|
"loss": 1.2133, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.319346837536172, |
|
"grad_norm": 0.14768971502780914, |
|
"learning_rate": 0.006, |
|
"loss": 1.2234, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.32038032244729225, |
|
"grad_norm": 0.15294450521469116, |
|
"learning_rate": 0.006, |
|
"loss": 1.2255, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.3214138073584126, |
|
"grad_norm": 0.18775102496147156, |
|
"learning_rate": 0.006, |
|
"loss": 1.2378, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.3224472922695329, |
|
"grad_norm": 0.15432560443878174, |
|
"learning_rate": 0.006, |
|
"loss": 1.2359, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.32348077718065316, |
|
"grad_norm": 0.17162322998046875, |
|
"learning_rate": 0.006, |
|
"loss": 1.2257, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.3245142620917735, |
|
"grad_norm": 0.21141092479228973, |
|
"learning_rate": 0.006, |
|
"loss": 1.2301, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.32554774700289374, |
|
"grad_norm": 0.1810036152601242, |
|
"learning_rate": 0.006, |
|
"loss": 1.2204, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.32658123191401406, |
|
"grad_norm": 0.20127083361148834, |
|
"learning_rate": 0.006, |
|
"loss": 1.2263, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.3276147168251344, |
|
"grad_norm": 0.1795072853565216, |
|
"learning_rate": 0.006, |
|
"loss": 1.2224, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.32864820173625464, |
|
"grad_norm": 0.26130935549736023, |
|
"learning_rate": 0.006, |
|
"loss": 1.2236, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.32968168664737496, |
|
"grad_norm": 0.17055651545524597, |
|
"learning_rate": 0.006, |
|
"loss": 1.2229, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.3307151715584952, |
|
"grad_norm": 0.24257224798202515, |
|
"learning_rate": 0.006, |
|
"loss": 1.218, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.33174865646961554, |
|
"grad_norm": 0.1574934422969818, |
|
"learning_rate": 0.006, |
|
"loss": 1.2248, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.33278214138073586, |
|
"grad_norm": 0.18338268995285034, |
|
"learning_rate": 0.006, |
|
"loss": 1.211, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.3338156262918561, |
|
"grad_norm": 0.23949076235294342, |
|
"learning_rate": 0.006, |
|
"loss": 1.2107, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.33484911120297645, |
|
"grad_norm": 0.23181311786174774, |
|
"learning_rate": 0.006, |
|
"loss": 1.2157, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.3358825961140967, |
|
"grad_norm": 0.1727987825870514, |
|
"learning_rate": 0.006, |
|
"loss": 1.2113, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.33691608102521703, |
|
"grad_norm": 0.17050251364707947, |
|
"learning_rate": 0.006, |
|
"loss": 1.2196, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.33794956593633735, |
|
"grad_norm": 0.1836351752281189, |
|
"learning_rate": 0.006, |
|
"loss": 1.2164, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.3389830508474576, |
|
"grad_norm": 0.16653425991535187, |
|
"learning_rate": 0.006, |
|
"loss": 1.2108, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.34001653575857793, |
|
"grad_norm": 0.19799716770648956, |
|
"learning_rate": 0.006, |
|
"loss": 1.2072, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.3410500206696982, |
|
"grad_norm": 0.15341606736183167, |
|
"learning_rate": 0.006, |
|
"loss": 1.2157, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.3420835055808185, |
|
"grad_norm": 0.15945138037204742, |
|
"learning_rate": 0.006, |
|
"loss": 1.2065, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.34311699049193883, |
|
"grad_norm": 0.17721377313137054, |
|
"learning_rate": 0.006, |
|
"loss": 1.2056, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.3441504754030591, |
|
"grad_norm": 0.1358070969581604, |
|
"learning_rate": 0.006, |
|
"loss": 1.2171, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.3451839603141794, |
|
"grad_norm": 0.17187251150608063, |
|
"learning_rate": 0.006, |
|
"loss": 1.2046, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.34621744522529974, |
|
"grad_norm": 0.14732858538627625, |
|
"learning_rate": 0.006, |
|
"loss": 1.2025, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.34725093013642, |
|
"grad_norm": 0.224739208817482, |
|
"learning_rate": 0.006, |
|
"loss": 1.2213, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.3482844150475403, |
|
"grad_norm": 0.20575636625289917, |
|
"learning_rate": 0.006, |
|
"loss": 1.1922, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.3493178999586606, |
|
"grad_norm": 0.15750053524971008, |
|
"learning_rate": 0.006, |
|
"loss": 1.209, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.3503513848697809, |
|
"grad_norm": 0.27751949429512024, |
|
"learning_rate": 0.006, |
|
"loss": 1.2079, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.3513848697809012, |
|
"grad_norm": 0.14315839111804962, |
|
"learning_rate": 0.006, |
|
"loss": 1.1934, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.3524183546920215, |
|
"grad_norm": 0.15051007270812988, |
|
"learning_rate": 0.006, |
|
"loss": 1.2067, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.3534518396031418, |
|
"grad_norm": 0.15940986573696136, |
|
"learning_rate": 0.006, |
|
"loss": 1.2097, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.35448532451426207, |
|
"grad_norm": 0.1412818878889084, |
|
"learning_rate": 0.006, |
|
"loss": 1.1959, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.3555188094253824, |
|
"grad_norm": 0.1425037980079651, |
|
"learning_rate": 0.006, |
|
"loss": 1.1901, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.3565522943365027, |
|
"grad_norm": 0.15428400039672852, |
|
"learning_rate": 0.006, |
|
"loss": 1.2037, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.35758577924762297, |
|
"grad_norm": 0.1565396934747696, |
|
"learning_rate": 0.006, |
|
"loss": 1.2066, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.3586192641587433, |
|
"grad_norm": 0.1757258176803589, |
|
"learning_rate": 0.006, |
|
"loss": 1.1947, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.35965274906986355, |
|
"grad_norm": 0.23932603001594543, |
|
"learning_rate": 0.006, |
|
"loss": 1.1993, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.3606862339809839, |
|
"grad_norm": 0.17783917486667633, |
|
"learning_rate": 0.006, |
|
"loss": 1.1881, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.3617197188921042, |
|
"grad_norm": 0.13933712244033813, |
|
"learning_rate": 0.006, |
|
"loss": 1.1969, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.36275320380322446, |
|
"grad_norm": 0.13872799277305603, |
|
"learning_rate": 0.006, |
|
"loss": 1.1958, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.3637866887143448, |
|
"grad_norm": 0.18496429920196533, |
|
"learning_rate": 0.006, |
|
"loss": 1.1978, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.3648201736254651, |
|
"grad_norm": 0.2182753086090088, |
|
"learning_rate": 0.006, |
|
"loss": 1.1957, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.36585365853658536, |
|
"grad_norm": 0.15641410648822784, |
|
"learning_rate": 0.006, |
|
"loss": 1.1828, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.3668871434477057, |
|
"grad_norm": 0.15196652710437775, |
|
"learning_rate": 0.006, |
|
"loss": 1.1901, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.36792062835882594, |
|
"grad_norm": 0.15420852601528168, |
|
"learning_rate": 0.006, |
|
"loss": 1.1966, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.36895411326994626, |
|
"grad_norm": 0.1419685333967209, |
|
"learning_rate": 0.006, |
|
"loss": 1.1835, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.3699875981810666, |
|
"grad_norm": 0.20714856684207916, |
|
"learning_rate": 0.006, |
|
"loss": 1.1969, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.37102108309218684, |
|
"grad_norm": 0.14852945506572723, |
|
"learning_rate": 0.006, |
|
"loss": 1.1979, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.37205456800330716, |
|
"grad_norm": 0.1534539759159088, |
|
"learning_rate": 0.006, |
|
"loss": 1.1814, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.3730880529144274, |
|
"grad_norm": 0.1536993384361267, |
|
"learning_rate": 0.006, |
|
"loss": 1.1935, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.37412153782554775, |
|
"grad_norm": 0.14678268134593964, |
|
"learning_rate": 0.006, |
|
"loss": 1.1879, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.37515502273666806, |
|
"grad_norm": 0.15194503962993622, |
|
"learning_rate": 0.006, |
|
"loss": 1.1851, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.37618850764778833, |
|
"grad_norm": 0.1853533834218979, |
|
"learning_rate": 0.006, |
|
"loss": 1.177, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.37722199255890865, |
|
"grad_norm": 0.19894924759864807, |
|
"learning_rate": 0.006, |
|
"loss": 1.1792, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.3782554774700289, |
|
"grad_norm": 0.14289256930351257, |
|
"learning_rate": 0.006, |
|
"loss": 1.1916, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.37928896238114923, |
|
"grad_norm": 0.15364837646484375, |
|
"learning_rate": 0.006, |
|
"loss": 1.1773, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.38032244729226955, |
|
"grad_norm": 0.26057279109954834, |
|
"learning_rate": 0.006, |
|
"loss": 1.1772, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.3813559322033898, |
|
"grad_norm": 0.14714686572551727, |
|
"learning_rate": 0.006, |
|
"loss": 1.183, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.38238941711451013, |
|
"grad_norm": 0.13475820422172546, |
|
"learning_rate": 0.006, |
|
"loss": 1.1734, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.38342290202563045, |
|
"grad_norm": 0.169892817735672, |
|
"learning_rate": 0.006, |
|
"loss": 1.1833, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.3844563869367507, |
|
"grad_norm": 0.15819117426872253, |
|
"learning_rate": 0.006, |
|
"loss": 1.1693, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.38548987184787104, |
|
"grad_norm": 0.2074589729309082, |
|
"learning_rate": 0.006, |
|
"loss": 1.1773, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.3865233567589913, |
|
"grad_norm": 0.1474793255329132, |
|
"learning_rate": 0.006, |
|
"loss": 1.1746, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.3875568416701116, |
|
"grad_norm": 0.20919297635555267, |
|
"learning_rate": 0.006, |
|
"loss": 1.1644, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.38859032658123194, |
|
"grad_norm": 0.1802784502506256, |
|
"learning_rate": 0.006, |
|
"loss": 1.1785, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.3896238114923522, |
|
"grad_norm": 0.13777178525924683, |
|
"learning_rate": 0.006, |
|
"loss": 1.1762, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.3906572964034725, |
|
"grad_norm": 0.19703027606010437, |
|
"learning_rate": 0.006, |
|
"loss": 1.1715, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.3916907813145928, |
|
"grad_norm": 0.13620315492153168, |
|
"learning_rate": 0.006, |
|
"loss": 1.1729, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.3927242662257131, |
|
"grad_norm": 0.19151124358177185, |
|
"learning_rate": 0.006, |
|
"loss": 1.165, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.3937577511368334, |
|
"grad_norm": 0.14211703836917877, |
|
"learning_rate": 0.006, |
|
"loss": 1.1705, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.3947912360479537, |
|
"grad_norm": 0.13506080210208893, |
|
"learning_rate": 0.006, |
|
"loss": 1.1656, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.395824720959074, |
|
"grad_norm": 0.15365293622016907, |
|
"learning_rate": 0.006, |
|
"loss": 1.1642, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.39685820587019427, |
|
"grad_norm": 0.1530192494392395, |
|
"learning_rate": 0.006, |
|
"loss": 1.1621, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.3978916907813146, |
|
"grad_norm": 0.1344013661146164, |
|
"learning_rate": 0.006, |
|
"loss": 1.1694, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.3989251756924349, |
|
"grad_norm": 0.1469976007938385, |
|
"learning_rate": 0.006, |
|
"loss": 1.1706, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.39995866060355517, |
|
"grad_norm": 0.14535875618457794, |
|
"learning_rate": 0.006, |
|
"loss": 1.1641, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.4009921455146755, |
|
"grad_norm": 0.22596335411071777, |
|
"learning_rate": 0.006, |
|
"loss": 1.1835, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.4020256304257958, |
|
"grad_norm": 0.214060440659523, |
|
"learning_rate": 0.006, |
|
"loss": 1.1688, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.4030591153369161, |
|
"grad_norm": 0.14078359305858612, |
|
"learning_rate": 0.006, |
|
"loss": 1.158, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.4040926002480364, |
|
"grad_norm": 0.15023833513259888, |
|
"learning_rate": 0.006, |
|
"loss": 1.1599, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.40512608515915666, |
|
"grad_norm": 0.13650792837142944, |
|
"learning_rate": 0.006, |
|
"loss": 1.1512, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.406159570070277, |
|
"grad_norm": 0.1389831155538559, |
|
"learning_rate": 0.006, |
|
"loss": 1.1624, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.4071930549813973, |
|
"grad_norm": 0.1548847109079361, |
|
"learning_rate": 0.006, |
|
"loss": 1.1822, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.40822653989251756, |
|
"grad_norm": 0.14826175570487976, |
|
"learning_rate": 0.006, |
|
"loss": 1.1539, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.4092600248036379, |
|
"grad_norm": 0.27681705355644226, |
|
"learning_rate": 0.006, |
|
"loss": 1.1543, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.41029350971475814, |
|
"grad_norm": 0.17979800701141357, |
|
"learning_rate": 0.006, |
|
"loss": 1.1583, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.41132699462587846, |
|
"grad_norm": 0.14845605194568634, |
|
"learning_rate": 0.006, |
|
"loss": 1.1559, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.4123604795369988, |
|
"grad_norm": 0.18015886843204498, |
|
"learning_rate": 0.006, |
|
"loss": 1.1663, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.41339396444811904, |
|
"grad_norm": 0.2010691910982132, |
|
"learning_rate": 0.006, |
|
"loss": 1.1572, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.41442744935923936, |
|
"grad_norm": 0.14041297137737274, |
|
"learning_rate": 0.006, |
|
"loss": 1.1621, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.4154609342703596, |
|
"grad_norm": 0.13786040246486664, |
|
"learning_rate": 0.006, |
|
"loss": 1.157, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.41649441918147995, |
|
"grad_norm": 0.17166124284267426, |
|
"learning_rate": 0.006, |
|
"loss": 1.178, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.41752790409260027, |
|
"grad_norm": 0.13718323409557343, |
|
"learning_rate": 0.006, |
|
"loss": 1.1514, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.41856138900372053, |
|
"grad_norm": 0.15878678858280182, |
|
"learning_rate": 0.006, |
|
"loss": 1.1718, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.41959487391484085, |
|
"grad_norm": 0.14852339029312134, |
|
"learning_rate": 0.006, |
|
"loss": 1.1559, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.42062835882596117, |
|
"grad_norm": 0.17088137567043304, |
|
"learning_rate": 0.006, |
|
"loss": 1.1527, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.42166184373708143, |
|
"grad_norm": 0.15074481070041656, |
|
"learning_rate": 0.006, |
|
"loss": 1.1519, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.42269532864820175, |
|
"grad_norm": 0.13103771209716797, |
|
"learning_rate": 0.006, |
|
"loss": 1.1475, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.423728813559322, |
|
"grad_norm": 0.1428011655807495, |
|
"learning_rate": 0.006, |
|
"loss": 1.1528, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.42476229847044233, |
|
"grad_norm": 0.14280404150485992, |
|
"learning_rate": 0.006, |
|
"loss": 1.1542, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.42579578338156265, |
|
"grad_norm": 0.14426454901695251, |
|
"learning_rate": 0.006, |
|
"loss": 1.1632, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.4268292682926829, |
|
"grad_norm": 0.13187375664710999, |
|
"learning_rate": 0.006, |
|
"loss": 1.1564, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.42786275320380324, |
|
"grad_norm": 0.1555539071559906, |
|
"learning_rate": 0.006, |
|
"loss": 1.1497, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.4288962381149235, |
|
"grad_norm": 0.14076194167137146, |
|
"learning_rate": 0.006, |
|
"loss": 1.1431, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.4299297230260438, |
|
"grad_norm": 0.14553508162498474, |
|
"learning_rate": 0.006, |
|
"loss": 1.1444, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.43096320793716414, |
|
"grad_norm": 0.14131340384483337, |
|
"learning_rate": 0.006, |
|
"loss": 1.1504, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.4319966928482844, |
|
"grad_norm": 0.1349923461675644, |
|
"learning_rate": 0.006, |
|
"loss": 1.1499, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.4330301777594047, |
|
"grad_norm": 0.1801183521747589, |
|
"learning_rate": 0.006, |
|
"loss": 1.1458, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.434063662670525, |
|
"grad_norm": 0.16793182492256165, |
|
"learning_rate": 0.006, |
|
"loss": 1.1527, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.4350971475816453, |
|
"grad_norm": 0.19141018390655518, |
|
"learning_rate": 0.006, |
|
"loss": 1.1451, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.4361306324927656, |
|
"grad_norm": 0.1311262547969818, |
|
"learning_rate": 0.006, |
|
"loss": 1.1433, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.4371641174038859, |
|
"grad_norm": 0.1293700635433197, |
|
"learning_rate": 0.006, |
|
"loss": 1.1251, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.4381976023150062, |
|
"grad_norm": 0.13121871650218964, |
|
"learning_rate": 0.006, |
|
"loss": 1.1392, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.43923108722612647, |
|
"grad_norm": 0.13999488949775696, |
|
"learning_rate": 0.006, |
|
"loss": 1.135, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.4402645721372468, |
|
"grad_norm": 0.14969255030155182, |
|
"learning_rate": 0.006, |
|
"loss": 1.1399, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.4412980570483671, |
|
"grad_norm": 0.14516979455947876, |
|
"learning_rate": 0.006, |
|
"loss": 1.1385, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.4423315419594874, |
|
"grad_norm": 0.13377705216407776, |
|
"learning_rate": 0.006, |
|
"loss": 1.1409, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.4433650268706077, |
|
"grad_norm": 0.1788671314716339, |
|
"learning_rate": 0.006, |
|
"loss": 1.1368, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.444398511781728, |
|
"grad_norm": 0.1459660530090332, |
|
"learning_rate": 0.006, |
|
"loss": 1.1399, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.4454319966928483, |
|
"grad_norm": 0.14903658628463745, |
|
"learning_rate": 0.006, |
|
"loss": 1.1347, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.4464654816039686, |
|
"grad_norm": 0.16043029725551605, |
|
"learning_rate": 0.006, |
|
"loss": 1.1395, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.44749896651508886, |
|
"grad_norm": 0.1458473652601242, |
|
"learning_rate": 0.006, |
|
"loss": 1.1442, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.4485324514262092, |
|
"grad_norm": 0.1839747577905655, |
|
"learning_rate": 0.006, |
|
"loss": 1.1278, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.4495659363373295, |
|
"grad_norm": 0.14207588136196136, |
|
"learning_rate": 0.006, |
|
"loss": 1.1355, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.45059942124844976, |
|
"grad_norm": 0.1426732838153839, |
|
"learning_rate": 0.006, |
|
"loss": 1.1345, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.4516329061595701, |
|
"grad_norm": 0.19898781180381775, |
|
"learning_rate": 0.006, |
|
"loss": 1.128, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.45266639107069034, |
|
"grad_norm": 0.1501811444759369, |
|
"learning_rate": 0.006, |
|
"loss": 1.1294, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.45369987598181066, |
|
"grad_norm": 0.13546693325042725, |
|
"learning_rate": 0.006, |
|
"loss": 1.1459, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.454733360892931, |
|
"grad_norm": 0.1429995894432068, |
|
"learning_rate": 0.006, |
|
"loss": 1.1401, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.45576684580405125, |
|
"grad_norm": 0.16272728145122528, |
|
"learning_rate": 0.006, |
|
"loss": 1.1388, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.45680033071517157, |
|
"grad_norm": 0.13995374739170074, |
|
"learning_rate": 0.006, |
|
"loss": 1.1348, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.45783381562629183, |
|
"grad_norm": 0.13635413348674774, |
|
"learning_rate": 0.006, |
|
"loss": 1.13, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.45886730053741215, |
|
"grad_norm": 0.12863661348819733, |
|
"learning_rate": 0.006, |
|
"loss": 1.1289, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.45990078544853247, |
|
"grad_norm": 0.14227396249771118, |
|
"learning_rate": 0.006, |
|
"loss": 1.1304, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.46093427035965273, |
|
"grad_norm": 0.21000246703624725, |
|
"learning_rate": 0.006, |
|
"loss": 1.132, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.46196775527077305, |
|
"grad_norm": 0.15686342120170593, |
|
"learning_rate": 0.006, |
|
"loss": 1.1456, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.46300124018189337, |
|
"grad_norm": 0.13623683154582977, |
|
"learning_rate": 0.006, |
|
"loss": 1.1229, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.46403472509301363, |
|
"grad_norm": 0.13092273473739624, |
|
"learning_rate": 0.006, |
|
"loss": 1.1199, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.46506821000413395, |
|
"grad_norm": 0.13690875470638275, |
|
"learning_rate": 0.006, |
|
"loss": 1.1095, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.4661016949152542, |
|
"grad_norm": 0.12770096957683563, |
|
"learning_rate": 0.006, |
|
"loss": 1.1251, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.46713517982637454, |
|
"grad_norm": 0.12008727341890335, |
|
"learning_rate": 0.006, |
|
"loss": 1.1151, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.46816866473749486, |
|
"grad_norm": 0.13290061056613922, |
|
"learning_rate": 0.006, |
|
"loss": 1.1278, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.4692021496486151, |
|
"grad_norm": 0.16319867968559265, |
|
"learning_rate": 0.006, |
|
"loss": 1.1261, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.47023563455973544, |
|
"grad_norm": 0.13294318318367004, |
|
"learning_rate": 0.006, |
|
"loss": 1.1314, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.4712691194708557, |
|
"grad_norm": 0.15630632638931274, |
|
"learning_rate": 0.006, |
|
"loss": 1.1423, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.472302604381976, |
|
"grad_norm": 0.16916899383068085, |
|
"learning_rate": 0.006, |
|
"loss": 1.1385, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.47333608929309634, |
|
"grad_norm": 0.2240544855594635, |
|
"learning_rate": 0.006, |
|
"loss": 1.1307, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.4743695742042166, |
|
"grad_norm": 0.12742292881011963, |
|
"learning_rate": 0.006, |
|
"loss": 1.1278, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.4754030591153369, |
|
"grad_norm": 0.13861480355262756, |
|
"learning_rate": 0.006, |
|
"loss": 1.1248, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.4764365440264572, |
|
"grad_norm": 0.16158758103847504, |
|
"learning_rate": 0.006, |
|
"loss": 1.1437, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.4774700289375775, |
|
"grad_norm": 0.13849808275699615, |
|
"learning_rate": 0.006, |
|
"loss": 1.1345, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.4785035138486978, |
|
"grad_norm": 0.1381804645061493, |
|
"learning_rate": 0.006, |
|
"loss": 1.124, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.4795369987598181, |
|
"grad_norm": 0.1629112809896469, |
|
"learning_rate": 0.006, |
|
"loss": 1.1198, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.4805704836709384, |
|
"grad_norm": 0.1835983693599701, |
|
"learning_rate": 0.006, |
|
"loss": 1.1198, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.48160396858205873, |
|
"grad_norm": 0.13495475053787231, |
|
"learning_rate": 0.006, |
|
"loss": 1.1266, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.482637453493179, |
|
"grad_norm": 0.15339604020118713, |
|
"learning_rate": 0.006, |
|
"loss": 1.1111, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.4836709384042993, |
|
"grad_norm": 0.14230936765670776, |
|
"learning_rate": 0.006, |
|
"loss": 1.1002, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.4847044233154196, |
|
"grad_norm": 0.13988140225410461, |
|
"learning_rate": 0.006, |
|
"loss": 1.1132, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.4857379082265399, |
|
"grad_norm": 0.17499862611293793, |
|
"learning_rate": 0.006, |
|
"loss": 1.1223, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.4867713931376602, |
|
"grad_norm": 0.15850752592086792, |
|
"learning_rate": 0.006, |
|
"loss": 1.1199, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 0.12476572394371033, |
|
"learning_rate": 0.006, |
|
"loss": 1.1252, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.4888383629599008, |
|
"grad_norm": 0.14426189661026, |
|
"learning_rate": 0.006, |
|
"loss": 1.1132, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.48987184787102106, |
|
"grad_norm": 0.14036618173122406, |
|
"learning_rate": 0.006, |
|
"loss": 1.1226, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.4909053327821414, |
|
"grad_norm": 0.1434011608362198, |
|
"learning_rate": 0.006, |
|
"loss": 1.1123, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.4919388176932617, |
|
"grad_norm": 0.19470463693141937, |
|
"learning_rate": 0.006, |
|
"loss": 1.1321, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.49297230260438196, |
|
"grad_norm": 0.12864422798156738, |
|
"learning_rate": 0.006, |
|
"loss": 1.1176, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.4940057875155023, |
|
"grad_norm": 0.16909734904766083, |
|
"learning_rate": 0.006, |
|
"loss": 1.1164, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.49503927242662255, |
|
"grad_norm": 0.14817242324352264, |
|
"learning_rate": 0.006, |
|
"loss": 1.1086, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.49607275733774286, |
|
"grad_norm": 0.16644009947776794, |
|
"learning_rate": 0.006, |
|
"loss": 1.1142, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.4971062422488632, |
|
"grad_norm": 0.16777795553207397, |
|
"learning_rate": 0.006, |
|
"loss": 1.1077, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.49813972715998345, |
|
"grad_norm": 0.1416415572166443, |
|
"learning_rate": 0.006, |
|
"loss": 1.122, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.49917321207110377, |
|
"grad_norm": 0.1440482884645462, |
|
"learning_rate": 0.006, |
|
"loss": 1.1129, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.5002066969822241, |
|
"grad_norm": 0.1519850343465805, |
|
"learning_rate": 0.006, |
|
"loss": 1.1144, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.5012401818933444, |
|
"grad_norm": 0.24164853990077972, |
|
"learning_rate": 0.006, |
|
"loss": 1.1235, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.5022736668044646, |
|
"grad_norm": 0.16202867031097412, |
|
"learning_rate": 0.006, |
|
"loss": 1.1054, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.5033071517155849, |
|
"grad_norm": 0.13450877368450165, |
|
"learning_rate": 0.006, |
|
"loss": 1.1112, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.5043406366267053, |
|
"grad_norm": 0.1484173834323883, |
|
"learning_rate": 0.006, |
|
"loss": 1.103, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.5053741215378256, |
|
"grad_norm": 0.14877624809741974, |
|
"learning_rate": 0.006, |
|
"loss": 1.1353, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.5064076064489459, |
|
"grad_norm": 0.14426067471504211, |
|
"learning_rate": 0.006, |
|
"loss": 1.1172, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.5074410913600661, |
|
"grad_norm": 0.14011584222316742, |
|
"learning_rate": 0.006, |
|
"loss": 1.1171, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.5084745762711864, |
|
"grad_norm": 0.20109255611896515, |
|
"learning_rate": 0.006, |
|
"loss": 1.1097, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.5095080611823067, |
|
"grad_norm": 0.1302032619714737, |
|
"learning_rate": 0.006, |
|
"loss": 1.1092, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.5105415460934271, |
|
"grad_norm": 0.13182514905929565, |
|
"learning_rate": 0.006, |
|
"loss": 1.1043, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.5115750310045474, |
|
"grad_norm": 0.13346487283706665, |
|
"learning_rate": 0.006, |
|
"loss": 1.1133, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.5126085159156676, |
|
"grad_norm": 0.13681770861148834, |
|
"learning_rate": 0.006, |
|
"loss": 1.1153, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.5136420008267879, |
|
"grad_norm": 0.14637672901153564, |
|
"learning_rate": 0.006, |
|
"loss": 1.0984, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.5146754857379082, |
|
"grad_norm": 0.14188942313194275, |
|
"learning_rate": 0.006, |
|
"loss": 1.1033, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.5157089706490285, |
|
"grad_norm": 0.1467074155807495, |
|
"learning_rate": 0.006, |
|
"loss": 1.095, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.5167424555601489, |
|
"grad_norm": 0.15010304749011993, |
|
"learning_rate": 0.006, |
|
"loss": 1.1029, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5177759404712691, |
|
"grad_norm": 0.13308066129684448, |
|
"learning_rate": 0.006, |
|
"loss": 1.1004, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.5188094253823894, |
|
"grad_norm": 0.13878673315048218, |
|
"learning_rate": 0.006, |
|
"loss": 1.1055, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.5198429102935097, |
|
"grad_norm": 0.14125706255435944, |
|
"learning_rate": 0.006, |
|
"loss": 1.0927, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.52087639520463, |
|
"grad_norm": 0.1262088418006897, |
|
"learning_rate": 0.006, |
|
"loss": 1.1112, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.5219098801157503, |
|
"grad_norm": 0.20926526188850403, |
|
"learning_rate": 0.006, |
|
"loss": 1.1169, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.5229433650268706, |
|
"grad_norm": 0.2341676652431488, |
|
"learning_rate": 0.006, |
|
"loss": 1.1198, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.5239768499379909, |
|
"grad_norm": 0.15894795954227448, |
|
"learning_rate": 0.006, |
|
"loss": 1.1017, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.5250103348491112, |
|
"grad_norm": 0.14738234877586365, |
|
"learning_rate": 0.006, |
|
"loss": 1.1021, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.5260438197602315, |
|
"grad_norm": 0.14371953904628754, |
|
"learning_rate": 0.006, |
|
"loss": 1.0978, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.5270773046713518, |
|
"grad_norm": 0.13194578886032104, |
|
"learning_rate": 0.006, |
|
"loss": 1.1073, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.5281107895824722, |
|
"grad_norm": 0.1411438137292862, |
|
"learning_rate": 0.006, |
|
"loss": 1.0931, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.5291442744935924, |
|
"grad_norm": 0.1347801685333252, |
|
"learning_rate": 0.006, |
|
"loss": 1.0896, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.5301777594047127, |
|
"grad_norm": 0.12243802100419998, |
|
"learning_rate": 0.006, |
|
"loss": 1.0912, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.531211244315833, |
|
"grad_norm": 0.15618611872196198, |
|
"learning_rate": 0.006, |
|
"loss": 1.0925, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.5322447292269533, |
|
"grad_norm": 0.133057102560997, |
|
"learning_rate": 0.006, |
|
"loss": 1.0937, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.5332782141380736, |
|
"grad_norm": 0.14488716423511505, |
|
"learning_rate": 0.006, |
|
"loss": 1.1057, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.5343116990491938, |
|
"grad_norm": 0.15518608689308167, |
|
"learning_rate": 0.006, |
|
"loss": 1.1073, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.5353451839603142, |
|
"grad_norm": 0.14580237865447998, |
|
"learning_rate": 0.006, |
|
"loss": 1.1064, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.5363786688714345, |
|
"grad_norm": 0.12775352597236633, |
|
"learning_rate": 0.006, |
|
"loss": 1.1043, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.5374121537825548, |
|
"grad_norm": 0.15589575469493866, |
|
"learning_rate": 0.006, |
|
"loss": 1.0995, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.5384456386936751, |
|
"grad_norm": 0.1348804533481598, |
|
"learning_rate": 0.006, |
|
"loss": 1.0959, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.5394791236047953, |
|
"grad_norm": 0.1302793323993683, |
|
"learning_rate": 0.006, |
|
"loss": 1.0833, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.5405126085159156, |
|
"grad_norm": 0.1520976573228836, |
|
"learning_rate": 0.006, |
|
"loss": 1.0991, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.541546093427036, |
|
"grad_norm": 0.13820461928844452, |
|
"learning_rate": 0.006, |
|
"loss": 1.1039, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.5425795783381563, |
|
"grad_norm": 0.13032610714435577, |
|
"learning_rate": 0.006, |
|
"loss": 1.0947, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.5436130632492766, |
|
"grad_norm": 0.1633882224559784, |
|
"learning_rate": 0.006, |
|
"loss": 1.1009, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.5446465481603968, |
|
"grad_norm": 0.2063656449317932, |
|
"learning_rate": 0.006, |
|
"loss": 1.0947, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.5456800330715171, |
|
"grad_norm": 0.13242821395397186, |
|
"learning_rate": 0.006, |
|
"loss": 1.0904, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.5467135179826375, |
|
"grad_norm": 0.15254896879196167, |
|
"learning_rate": 0.006, |
|
"loss": 1.0955, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.5477470028937578, |
|
"grad_norm": 0.12627582252025604, |
|
"learning_rate": 0.006, |
|
"loss": 1.1101, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.5487804878048781, |
|
"grad_norm": 0.132407084107399, |
|
"learning_rate": 0.006, |
|
"loss": 1.0947, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.5498139727159983, |
|
"grad_norm": 0.1298685371875763, |
|
"learning_rate": 0.006, |
|
"loss": 1.0853, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.5508474576271186, |
|
"grad_norm": 0.25235456228256226, |
|
"learning_rate": 0.006, |
|
"loss": 1.1048, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.5518809425382389, |
|
"grad_norm": 0.15979771316051483, |
|
"learning_rate": 0.006, |
|
"loss": 1.0945, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.5529144274493593, |
|
"grad_norm": 0.12209047377109528, |
|
"learning_rate": 0.006, |
|
"loss": 1.0918, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.5539479123604796, |
|
"grad_norm": 0.14376087486743927, |
|
"learning_rate": 0.006, |
|
"loss": 1.1055, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.5549813972715998, |
|
"grad_norm": 0.12132777273654938, |
|
"learning_rate": 0.006, |
|
"loss": 1.1009, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.5560148821827201, |
|
"grad_norm": 0.12317829579114914, |
|
"learning_rate": 0.006, |
|
"loss": 1.0873, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.5570483670938404, |
|
"grad_norm": 0.15228794515132904, |
|
"learning_rate": 0.006, |
|
"loss": 1.0883, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.5580818520049607, |
|
"grad_norm": 0.11975687742233276, |
|
"learning_rate": 0.006, |
|
"loss": 1.1075, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.5591153369160811, |
|
"grad_norm": 0.15219765901565552, |
|
"learning_rate": 0.006, |
|
"loss": 1.087, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.5601488218272013, |
|
"grad_norm": 0.12764790654182434, |
|
"learning_rate": 0.006, |
|
"loss": 1.0928, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.5611823067383216, |
|
"grad_norm": 0.18155109882354736, |
|
"learning_rate": 0.006, |
|
"loss": 1.0924, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.5622157916494419, |
|
"grad_norm": 0.16299748420715332, |
|
"learning_rate": 0.006, |
|
"loss": 1.0923, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.5632492765605622, |
|
"grad_norm": 0.1478397250175476, |
|
"learning_rate": 0.006, |
|
"loss": 1.0838, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.5642827614716825, |
|
"grad_norm": 0.14866343140602112, |
|
"learning_rate": 0.006, |
|
"loss": 1.0861, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.5653162463828028, |
|
"grad_norm": 0.15744392573833466, |
|
"learning_rate": 0.006, |
|
"loss": 1.0981, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.5663497312939231, |
|
"grad_norm": 0.1575409471988678, |
|
"learning_rate": 0.006, |
|
"loss": 1.0972, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.5673832162050434, |
|
"grad_norm": 0.2215876430273056, |
|
"learning_rate": 0.006, |
|
"loss": 1.0926, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.5684167011161637, |
|
"grad_norm": 0.2060500532388687, |
|
"learning_rate": 0.006, |
|
"loss": 1.0885, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.569450186027284, |
|
"grad_norm": 0.19035674631595612, |
|
"learning_rate": 0.006, |
|
"loss": 1.0852, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.5704836709384044, |
|
"grad_norm": 0.15188254415988922, |
|
"learning_rate": 0.006, |
|
"loss": 1.0883, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.5715171558495246, |
|
"grad_norm": 0.1428184062242508, |
|
"learning_rate": 0.006, |
|
"loss": 1.0777, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.5725506407606449, |
|
"grad_norm": 0.12388137727975845, |
|
"learning_rate": 0.006, |
|
"loss": 1.0884, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.5735841256717652, |
|
"grad_norm": 0.1246667355298996, |
|
"learning_rate": 0.006, |
|
"loss": 1.0807, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.5746176105828855, |
|
"grad_norm": 0.15170446038246155, |
|
"learning_rate": 0.006, |
|
"loss": 1.0908, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.5756510954940058, |
|
"grad_norm": 0.12245498597621918, |
|
"learning_rate": 0.006, |
|
"loss": 1.0915, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.576684580405126, |
|
"grad_norm": 0.12406784296035767, |
|
"learning_rate": 0.006, |
|
"loss": 1.0821, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.5777180653162464, |
|
"grad_norm": 0.173844575881958, |
|
"learning_rate": 0.006, |
|
"loss": 1.0879, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.5787515502273667, |
|
"grad_norm": 0.1362677961587906, |
|
"learning_rate": 0.006, |
|
"loss": 1.0929, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.579785035138487, |
|
"grad_norm": 0.13046661019325256, |
|
"learning_rate": 0.006, |
|
"loss": 1.0891, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.5808185200496073, |
|
"grad_norm": 0.13165681064128876, |
|
"learning_rate": 0.006, |
|
"loss": 1.0842, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.5818520049607275, |
|
"grad_norm": 0.1174607053399086, |
|
"learning_rate": 0.006, |
|
"loss": 1.0827, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.5828854898718479, |
|
"grad_norm": 0.12023981660604477, |
|
"learning_rate": 0.006, |
|
"loss": 1.0725, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.5839189747829682, |
|
"grad_norm": 0.16525237262248993, |
|
"learning_rate": 0.006, |
|
"loss": 1.0812, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.5849524596940885, |
|
"grad_norm": 0.14079253375530243, |
|
"learning_rate": 0.006, |
|
"loss": 1.0859, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.5859859446052088, |
|
"grad_norm": 0.1366300880908966, |
|
"learning_rate": 0.006, |
|
"loss": 1.0847, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.587019429516329, |
|
"grad_norm": 0.12389519810676575, |
|
"learning_rate": 0.006, |
|
"loss": 1.0897, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.5880529144274493, |
|
"grad_norm": 0.1193692535161972, |
|
"learning_rate": 0.006, |
|
"loss": 1.0727, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.5890863993385697, |
|
"grad_norm": 0.14679639041423798, |
|
"learning_rate": 0.006, |
|
"loss": 1.0849, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.59011988424969, |
|
"grad_norm": 0.13050967454910278, |
|
"learning_rate": 0.006, |
|
"loss": 1.0753, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.5911533691608103, |
|
"grad_norm": 0.12082899361848831, |
|
"learning_rate": 0.006, |
|
"loss": 1.078, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.5921868540719305, |
|
"grad_norm": 0.1468031257390976, |
|
"learning_rate": 0.006, |
|
"loss": 1.0866, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.5932203389830508, |
|
"grad_norm": 0.12028497457504272, |
|
"learning_rate": 0.006, |
|
"loss": 1.0726, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.5942538238941711, |
|
"grad_norm": 0.14379620552062988, |
|
"learning_rate": 0.006, |
|
"loss": 1.088, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.5952873088052915, |
|
"grad_norm": 0.13077853620052338, |
|
"learning_rate": 0.006, |
|
"loss": 1.0743, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.5963207937164118, |
|
"grad_norm": 0.14271710813045502, |
|
"learning_rate": 0.006, |
|
"loss": 1.078, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.597354278627532, |
|
"grad_norm": 0.1357925534248352, |
|
"learning_rate": 0.006, |
|
"loss": 1.0777, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.5983877635386523, |
|
"grad_norm": 0.12296663224697113, |
|
"learning_rate": 0.006, |
|
"loss": 1.0561, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.5994212484497726, |
|
"grad_norm": 0.1592247039079666, |
|
"learning_rate": 0.006, |
|
"loss": 1.0731, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.600454733360893, |
|
"grad_norm": 0.24744771420955658, |
|
"learning_rate": 0.006, |
|
"loss": 1.0703, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.6014882182720133, |
|
"grad_norm": 0.18649180233478546, |
|
"learning_rate": 0.006, |
|
"loss": 1.0663, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.6025217031831335, |
|
"grad_norm": 0.15970060229301453, |
|
"learning_rate": 0.006, |
|
"loss": 1.0814, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.6035551880942538, |
|
"grad_norm": 0.15505217015743256, |
|
"learning_rate": 0.006, |
|
"loss": 1.0819, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.6045886730053741, |
|
"grad_norm": 0.1281612068414688, |
|
"learning_rate": 0.006, |
|
"loss": 1.073, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.6056221579164944, |
|
"grad_norm": 0.13322196900844574, |
|
"learning_rate": 0.006, |
|
"loss": 1.0851, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.6066556428276147, |
|
"grad_norm": 0.1385001540184021, |
|
"learning_rate": 0.006, |
|
"loss": 1.0714, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.6076891277387351, |
|
"grad_norm": 0.12118349224328995, |
|
"learning_rate": 0.006, |
|
"loss": 1.0744, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.6087226126498553, |
|
"grad_norm": 0.12691213190555573, |
|
"learning_rate": 0.006, |
|
"loss": 1.0664, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.6097560975609756, |
|
"grad_norm": 0.14859186112880707, |
|
"learning_rate": 0.006, |
|
"loss": 1.0821, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.6107895824720959, |
|
"grad_norm": 0.14851497113704681, |
|
"learning_rate": 0.006, |
|
"loss": 1.08, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.6118230673832162, |
|
"grad_norm": 0.1358513981103897, |
|
"learning_rate": 0.006, |
|
"loss": 1.0747, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.6128565522943366, |
|
"grad_norm": 0.14147254824638367, |
|
"learning_rate": 0.006, |
|
"loss": 1.0805, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.6138900372054568, |
|
"grad_norm": 0.13193708658218384, |
|
"learning_rate": 0.006, |
|
"loss": 1.079, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.6149235221165771, |
|
"grad_norm": 0.1255359649658203, |
|
"learning_rate": 0.006, |
|
"loss": 1.0785, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.6159570070276974, |
|
"grad_norm": 0.19004510343074799, |
|
"learning_rate": 0.006, |
|
"loss": 1.0728, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.6169904919388177, |
|
"grad_norm": 0.2386738657951355, |
|
"learning_rate": 0.006, |
|
"loss": 1.0836, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.618023976849938, |
|
"grad_norm": 0.14890602231025696, |
|
"learning_rate": 0.006, |
|
"loss": 1.0791, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.6190574617610582, |
|
"grad_norm": 0.15145020186901093, |
|
"learning_rate": 0.006, |
|
"loss": 1.0717, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.6200909466721786, |
|
"grad_norm": 0.12245503813028336, |
|
"learning_rate": 0.006, |
|
"loss": 1.064, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6211244315832989, |
|
"grad_norm": 0.13569168746471405, |
|
"learning_rate": 0.006, |
|
"loss": 1.0717, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.6221579164944192, |
|
"grad_norm": 0.14238642156124115, |
|
"learning_rate": 0.006, |
|
"loss": 1.072, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.6231914014055395, |
|
"grad_norm": 0.13332600891590118, |
|
"learning_rate": 0.006, |
|
"loss": 1.0821, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.6242248863166597, |
|
"grad_norm": 0.12775711715221405, |
|
"learning_rate": 0.006, |
|
"loss": 1.0665, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.62525837122778, |
|
"grad_norm": 0.12378199398517609, |
|
"learning_rate": 0.006, |
|
"loss": 1.0594, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.6262918561389004, |
|
"grad_norm": 0.127638041973114, |
|
"learning_rate": 0.006, |
|
"loss": 1.064, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.6273253410500207, |
|
"grad_norm": 0.1450446993112564, |
|
"learning_rate": 0.006, |
|
"loss": 1.0718, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.628358825961141, |
|
"grad_norm": 0.14722521603107452, |
|
"learning_rate": 0.006, |
|
"loss": 1.0786, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.6293923108722612, |
|
"grad_norm": 0.11357062309980392, |
|
"learning_rate": 0.006, |
|
"loss": 1.0713, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.6304257957833815, |
|
"grad_norm": 0.1290910542011261, |
|
"learning_rate": 0.006, |
|
"loss": 1.0692, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.6314592806945019, |
|
"grad_norm": 0.12275319546461105, |
|
"learning_rate": 0.006, |
|
"loss": 1.0599, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.6324927656056222, |
|
"grad_norm": 0.14953821897506714, |
|
"learning_rate": 0.006, |
|
"loss": 1.0735, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.6335262505167425, |
|
"grad_norm": 0.15243026614189148, |
|
"learning_rate": 0.006, |
|
"loss": 1.0641, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.6345597354278627, |
|
"grad_norm": 0.17082957923412323, |
|
"learning_rate": 0.006, |
|
"loss": 1.0669, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.635593220338983, |
|
"grad_norm": 0.14030581712722778, |
|
"learning_rate": 0.006, |
|
"loss": 1.0659, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.6366267052501033, |
|
"grad_norm": 0.1736230105161667, |
|
"learning_rate": 0.006, |
|
"loss": 1.0698, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.6376601901612237, |
|
"grad_norm": 0.12367543578147888, |
|
"learning_rate": 0.006, |
|
"loss": 1.0657, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.638693675072344, |
|
"grad_norm": 0.14306339621543884, |
|
"learning_rate": 0.006, |
|
"loss": 1.0731, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.6397271599834642, |
|
"grad_norm": 0.1417074203491211, |
|
"learning_rate": 0.006, |
|
"loss": 1.0704, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.6407606448945845, |
|
"grad_norm": 0.12904278934001923, |
|
"learning_rate": 0.006, |
|
"loss": 1.0619, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.6417941298057048, |
|
"grad_norm": 0.13394799828529358, |
|
"learning_rate": 0.006, |
|
"loss": 1.054, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.6428276147168251, |
|
"grad_norm": 0.13006740808486938, |
|
"learning_rate": 0.006, |
|
"loss": 1.058, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.6438610996279455, |
|
"grad_norm": 0.1411658525466919, |
|
"learning_rate": 0.006, |
|
"loss": 1.0688, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.6448945845390658, |
|
"grad_norm": 0.12172463536262512, |
|
"learning_rate": 0.006, |
|
"loss": 1.0599, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.645928069450186, |
|
"grad_norm": 0.1132531389594078, |
|
"learning_rate": 0.006, |
|
"loss": 1.0493, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.6469615543613063, |
|
"grad_norm": 0.15614502131938934, |
|
"learning_rate": 0.006, |
|
"loss": 1.0637, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.6479950392724266, |
|
"grad_norm": 0.14655426144599915, |
|
"learning_rate": 0.006, |
|
"loss": 1.0537, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.649028524183547, |
|
"grad_norm": 0.1248323991894722, |
|
"learning_rate": 0.006, |
|
"loss": 1.0735, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.6500620090946673, |
|
"grad_norm": 0.13113942742347717, |
|
"learning_rate": 0.006, |
|
"loss": 1.0561, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.6510954940057875, |
|
"grad_norm": 0.11488605290651321, |
|
"learning_rate": 0.006, |
|
"loss": 1.0626, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.6521289789169078, |
|
"grad_norm": 0.13470590114593506, |
|
"learning_rate": 0.006, |
|
"loss": 1.0629, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.6531624638280281, |
|
"grad_norm": 0.13424867391586304, |
|
"learning_rate": 0.006, |
|
"loss": 1.0634, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.6541959487391484, |
|
"grad_norm": 0.13190287351608276, |
|
"learning_rate": 0.006, |
|
"loss": 1.0697, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.6552294336502688, |
|
"grad_norm": 0.12076553702354431, |
|
"learning_rate": 0.006, |
|
"loss": 1.0559, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.656262918561389, |
|
"grad_norm": 0.12332470715045929, |
|
"learning_rate": 0.006, |
|
"loss": 1.0511, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.6572964034725093, |
|
"grad_norm": 0.13250276446342468, |
|
"learning_rate": 0.006, |
|
"loss": 1.0772, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.6583298883836296, |
|
"grad_norm": 0.12431011348962784, |
|
"learning_rate": 0.006, |
|
"loss": 1.0574, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.6593633732947499, |
|
"grad_norm": 0.11310591548681259, |
|
"learning_rate": 0.006, |
|
"loss": 1.0548, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.6603968582058702, |
|
"grad_norm": 0.12900187075138092, |
|
"learning_rate": 0.006, |
|
"loss": 1.0624, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.6614303431169904, |
|
"grad_norm": 0.21367403864860535, |
|
"learning_rate": 0.006, |
|
"loss": 1.0681, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.6624638280281108, |
|
"grad_norm": 0.17459499835968018, |
|
"learning_rate": 0.006, |
|
"loss": 1.0547, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.6634973129392311, |
|
"grad_norm": 0.12943004071712494, |
|
"learning_rate": 0.006, |
|
"loss": 1.0659, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.6645307978503514, |
|
"grad_norm": 0.12108403444290161, |
|
"learning_rate": 0.006, |
|
"loss": 1.0721, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.6655642827614717, |
|
"grad_norm": 0.14485575258731842, |
|
"learning_rate": 0.006, |
|
"loss": 1.0502, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.6665977676725919, |
|
"grad_norm": 0.12145304679870605, |
|
"learning_rate": 0.006, |
|
"loss": 1.0489, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.6676312525837123, |
|
"grad_norm": 0.12879317998886108, |
|
"learning_rate": 0.006, |
|
"loss": 1.0603, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.6686647374948326, |
|
"grad_norm": 0.13428542017936707, |
|
"learning_rate": 0.006, |
|
"loss": 1.0538, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.6696982224059529, |
|
"grad_norm": 0.13670314848423004, |
|
"learning_rate": 0.006, |
|
"loss": 1.053, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.6707317073170732, |
|
"grad_norm": 0.12726275622844696, |
|
"learning_rate": 0.006, |
|
"loss": 1.0555, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.6717651922281934, |
|
"grad_norm": 0.1293756067752838, |
|
"learning_rate": 0.006, |
|
"loss": 1.0579, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.6727986771393137, |
|
"grad_norm": 0.15011049807071686, |
|
"learning_rate": 0.006, |
|
"loss": 1.0528, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.6738321620504341, |
|
"grad_norm": 0.1232418343424797, |
|
"learning_rate": 0.006, |
|
"loss": 1.0474, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.6748656469615544, |
|
"grad_norm": 0.13466301560401917, |
|
"learning_rate": 0.006, |
|
"loss": 1.0561, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.6758991318726747, |
|
"grad_norm": 0.18297189474105835, |
|
"learning_rate": 0.006, |
|
"loss": 1.0569, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.6769326167837949, |
|
"grad_norm": 0.13041065633296967, |
|
"learning_rate": 0.006, |
|
"loss": 1.0614, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.6779661016949152, |
|
"grad_norm": 0.15428952872753143, |
|
"learning_rate": 0.006, |
|
"loss": 1.0526, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.6789995866060355, |
|
"grad_norm": 0.1226574033498764, |
|
"learning_rate": 0.006, |
|
"loss": 1.0476, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.6800330715171559, |
|
"grad_norm": 0.1592113971710205, |
|
"learning_rate": 0.006, |
|
"loss": 1.057, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.6810665564282762, |
|
"grad_norm": 0.131056547164917, |
|
"learning_rate": 0.006, |
|
"loss": 1.0643, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.6821000413393964, |
|
"grad_norm": 0.17470133304595947, |
|
"learning_rate": 0.006, |
|
"loss": 1.0545, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.6831335262505167, |
|
"grad_norm": 0.14145781099796295, |
|
"learning_rate": 0.006, |
|
"loss": 1.0459, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.684167011161637, |
|
"grad_norm": 0.11432395130395889, |
|
"learning_rate": 0.006, |
|
"loss": 1.0454, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.6852004960727573, |
|
"grad_norm": 0.12126456946134567, |
|
"learning_rate": 0.006, |
|
"loss": 1.0467, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.6862339809838777, |
|
"grad_norm": 0.12609295547008514, |
|
"learning_rate": 0.006, |
|
"loss": 1.0734, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.687267465894998, |
|
"grad_norm": 0.15360839664936066, |
|
"learning_rate": 0.006, |
|
"loss": 1.0424, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.6883009508061182, |
|
"grad_norm": 0.13050951063632965, |
|
"learning_rate": 0.006, |
|
"loss": 1.0532, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.6893344357172385, |
|
"grad_norm": 0.11345363408327103, |
|
"learning_rate": 0.006, |
|
"loss": 1.0616, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.6903679206283588, |
|
"grad_norm": 0.12678933143615723, |
|
"learning_rate": 0.006, |
|
"loss": 1.0472, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.6914014055394792, |
|
"grad_norm": 0.12220112234354019, |
|
"learning_rate": 0.006, |
|
"loss": 1.048, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.6924348904505995, |
|
"grad_norm": 0.12634249031543732, |
|
"learning_rate": 0.006, |
|
"loss": 1.0489, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.6934683753617197, |
|
"grad_norm": 0.14149808883666992, |
|
"learning_rate": 0.006, |
|
"loss": 1.0548, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.69450186027284, |
|
"grad_norm": 0.12240725010633469, |
|
"learning_rate": 0.006, |
|
"loss": 1.0466, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.6955353451839603, |
|
"grad_norm": 0.1353718340396881, |
|
"learning_rate": 0.006, |
|
"loss": 1.0423, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.6965688300950806, |
|
"grad_norm": 0.18347500264644623, |
|
"learning_rate": 0.006, |
|
"loss": 1.0485, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.697602315006201, |
|
"grad_norm": 0.12348928302526474, |
|
"learning_rate": 0.006, |
|
"loss": 1.0573, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.6986357999173212, |
|
"grad_norm": 0.1428414285182953, |
|
"learning_rate": 0.006, |
|
"loss": 1.0412, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.6996692848284415, |
|
"grad_norm": 0.1410830169916153, |
|
"learning_rate": 0.006, |
|
"loss": 1.0569, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.7007027697395618, |
|
"grad_norm": 0.14577074348926544, |
|
"learning_rate": 0.005935620400122873, |
|
"loss": 1.0546, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.7017362546506821, |
|
"grad_norm": 0.14377078413963318, |
|
"learning_rate": 0.005842197295969981, |
|
"loss": 1.045, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.7027697395618024, |
|
"grad_norm": 0.12980251014232635, |
|
"learning_rate": 0.005750244615429315, |
|
"loss": 1.0595, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.7038032244729227, |
|
"grad_norm": 0.13310031592845917, |
|
"learning_rate": 0.005659739214915371, |
|
"loss": 1.0492, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.704836709384043, |
|
"grad_norm": 0.12173794209957123, |
|
"learning_rate": 0.005570658315108789, |
|
"loss": 1.0409, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.7058701942951633, |
|
"grad_norm": 0.17054332792758942, |
|
"learning_rate": 0.005482979495223033, |
|
"loss": 1.0505, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.7069036792062836, |
|
"grad_norm": 0.18403039872646332, |
|
"learning_rate": 0.005396680687361299, |
|
"loss": 1.0462, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.7079371641174039, |
|
"grad_norm": 0.13148552179336548, |
|
"learning_rate": 0.005311740170962234, |
|
"loss": 1.049, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.7089706490285241, |
|
"grad_norm": 0.13557471334934235, |
|
"learning_rate": 0.005228136567333095, |
|
"loss": 1.0348, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.7100041339396445, |
|
"grad_norm": 0.11231327056884766, |
|
"learning_rate": 0.005145848834268932, |
|
"loss": 1.0309, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.7110376188507648, |
|
"grad_norm": 0.14308036863803864, |
|
"learning_rate": 0.005064856260756483, |
|
"loss": 1.024, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.7120711037618851, |
|
"grad_norm": 0.11384549736976624, |
|
"learning_rate": 0.0049851384617614035, |
|
"loss": 1.0255, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.7131045886730054, |
|
"grad_norm": 0.1373758465051651, |
|
"learning_rate": 0.004906675373097566, |
|
"loss": 1.0248, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.7141380735841256, |
|
"grad_norm": 0.13972637057304382, |
|
"learning_rate": 0.00482944724637709, |
|
"loss": 1.0197, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.7151715584952459, |
|
"grad_norm": 0.11777855455875397, |
|
"learning_rate": 0.004753434644039878, |
|
"loss": 1.0197, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.7162050434063663, |
|
"grad_norm": 0.10751146823167801, |
|
"learning_rate": 0.004678618434461362, |
|
"loss": 1.0272, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.7172385283174866, |
|
"grad_norm": 0.11851785331964493, |
|
"learning_rate": 0.004604979787137271, |
|
"loss": 1.0109, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.7182720132286069, |
|
"grad_norm": 0.1341877430677414, |
|
"learning_rate": 0.004532500167944173, |
|
"loss": 1.0106, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.7193054981397271, |
|
"grad_norm": 0.11008955538272858, |
|
"learning_rate": 0.004461161334474619, |
|
"loss": 1.0152, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.7203389830508474, |
|
"grad_norm": 0.16443827748298645, |
|
"learning_rate": 0.0043909453314457106, |
|
"loss": 1.0118, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.7213724679619677, |
|
"grad_norm": 0.11731505393981934, |
|
"learning_rate": 0.004321834486179928, |
|
"loss": 1.0081, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.7224059528730881, |
|
"grad_norm": 0.11518187075853348, |
|
"learning_rate": 0.004253811404157096, |
|
"loss": 1.004, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.7234394377842084, |
|
"grad_norm": 0.10781227797269821, |
|
"learning_rate": 0.004186858964636351, |
|
"loss": 0.9879, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7244729226953287, |
|
"grad_norm": 0.12845131754875183, |
|
"learning_rate": 0.004120960316347017, |
|
"loss": 1.0148, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.7255064076064489, |
|
"grad_norm": 0.11902609467506409, |
|
"learning_rate": 0.004056098873247311, |
|
"loss": 1.0012, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.7265398925175692, |
|
"grad_norm": 0.11711709946393967, |
|
"learning_rate": 0.003992258310349796, |
|
"loss": 1.0134, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.7275733774286895, |
|
"grad_norm": 0.12396284192800522, |
|
"learning_rate": 0.003929422559612546, |
|
"loss": 1.0023, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.7286068623398099, |
|
"grad_norm": 0.10830879211425781, |
|
"learning_rate": 0.0038675758058949712, |
|
"loss": 1.0126, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.7296403472509302, |
|
"grad_norm": 0.11904972046613693, |
|
"learning_rate": 0.003806702482977311, |
|
"loss": 0.9993, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.7306738321620504, |
|
"grad_norm": 0.14758719503879547, |
|
"learning_rate": 0.0037467872696427614, |
|
"loss": 0.9919, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 0.10925702750682831, |
|
"learning_rate": 0.0036878150858212825, |
|
"loss": 0.9991, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.732740801984291, |
|
"grad_norm": 0.1237645223736763, |
|
"learning_rate": 0.0036297710887940875, |
|
"loss": 1.004, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.7337742868954114, |
|
"grad_norm": 0.12318721413612366, |
|
"learning_rate": 0.003572640669457879, |
|
"loss": 0.9833, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.7348077718065317, |
|
"grad_norm": 0.10309872031211853, |
|
"learning_rate": 0.003516409448647884, |
|
"loss": 0.9919, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.7358412567176519, |
|
"grad_norm": 0.10170971602201462, |
|
"learning_rate": 0.003461063273518752, |
|
"loss": 0.9908, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.7368747416287722, |
|
"grad_norm": 0.13282230496406555, |
|
"learning_rate": 0.0034065882139824315, |
|
"loss": 0.9849, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.7379082265398925, |
|
"grad_norm": 0.10541505366563797, |
|
"learning_rate": 0.0033529705592020968, |
|
"loss": 0.9893, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.7389417114510128, |
|
"grad_norm": 0.10669828951358795, |
|
"learning_rate": 0.0033001968141412714, |
|
"loss": 0.9841, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.7399751963621332, |
|
"grad_norm": 0.11185015738010406, |
|
"learning_rate": 0.0032482536961672545, |
|
"loss": 0.9888, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.7410086812732534, |
|
"grad_norm": 0.11848310381174088, |
|
"learning_rate": 0.003197128131708016, |
|
"loss": 0.9753, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.7420421661843737, |
|
"grad_norm": 0.12162330746650696, |
|
"learning_rate": 0.0031468072529617073, |
|
"loss": 0.9896, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.743075651095494, |
|
"grad_norm": 0.10726076364517212, |
|
"learning_rate": 0.0030972783946579593, |
|
"loss": 0.967, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.7441091360066143, |
|
"grad_norm": 0.11569386720657349, |
|
"learning_rate": 0.003048529090870162, |
|
"loss": 0.9691, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.7451426209177346, |
|
"grad_norm": 0.11116993427276611, |
|
"learning_rate": 0.0030005470718779117, |
|
"loss": 0.9783, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.7461761058288549, |
|
"grad_norm": 0.10824961215257645, |
|
"learning_rate": 0.002953320261078841, |
|
"loss": 0.9589, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.7472095907399752, |
|
"grad_norm": 0.1097089946269989, |
|
"learning_rate": 0.00290683677194906, |
|
"loss": 0.9707, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.7482430756510955, |
|
"grad_norm": 0.13413402438163757, |
|
"learning_rate": 0.002861084905051434, |
|
"loss": 0.9583, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.7492765605622158, |
|
"grad_norm": 0.10966306924819946, |
|
"learning_rate": 0.0028160531450909495, |
|
"loss": 0.9656, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.7503100454733361, |
|
"grad_norm": 0.10541026294231415, |
|
"learning_rate": 0.0027717301580164277, |
|
"loss": 0.9568, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.7513435303844563, |
|
"grad_norm": 0.10642149299383163, |
|
"learning_rate": 0.002728104788167857, |
|
"loss": 0.973, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.7523770152955767, |
|
"grad_norm": 0.10307744145393372, |
|
"learning_rate": 0.0026851660554686207, |
|
"loss": 0.973, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.753410500206697, |
|
"grad_norm": 0.10917676985263824, |
|
"learning_rate": 0.002642903152661921, |
|
"loss": 0.9739, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.7544439851178173, |
|
"grad_norm": 0.12075357139110565, |
|
"learning_rate": 0.002601305442590699, |
|
"loss": 0.9668, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.7554774700289376, |
|
"grad_norm": 0.10885104537010193, |
|
"learning_rate": 0.0025603624555203654, |
|
"loss": 0.9732, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.7565109549400578, |
|
"grad_norm": 0.11787907034158707, |
|
"learning_rate": 0.0025200638865036733, |
|
"loss": 0.9597, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.7575444398511781, |
|
"grad_norm": 0.11566518247127533, |
|
"learning_rate": 0.0024803995927870632, |
|
"loss": 0.9595, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.7585779247622985, |
|
"grad_norm": 0.09844059497117996, |
|
"learning_rate": 0.002441359591257831, |
|
"loss": 0.9561, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.7596114096734188, |
|
"grad_norm": 0.1304013878107071, |
|
"learning_rate": 0.002402934055931478, |
|
"loss": 0.9454, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.7606448945845391, |
|
"grad_norm": 0.1041470617055893, |
|
"learning_rate": 0.002365113315478606, |
|
"loss": 0.9453, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.7616783794956593, |
|
"grad_norm": 0.10921091586351395, |
|
"learning_rate": 0.00232788785079074, |
|
"loss": 0.9644, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.7627118644067796, |
|
"grad_norm": 0.12734107673168182, |
|
"learning_rate": 0.0022912482925844615, |
|
"loss": 0.9491, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.7637453493179, |
|
"grad_norm": 0.11103381216526031, |
|
"learning_rate": 0.0022551854190432517, |
|
"loss": 0.9379, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.7647788342290203, |
|
"grad_norm": 0.10525085031986237, |
|
"learning_rate": 0.0022196901534964524, |
|
"loss": 0.956, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.7658123191401406, |
|
"grad_norm": 0.10325931012630463, |
|
"learning_rate": 0.0021847535621347543, |
|
"loss": 0.9478, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.7668458040512609, |
|
"grad_norm": 0.10702349990606308, |
|
"learning_rate": 0.0021503668517616494, |
|
"loss": 0.9408, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.7678792889623811, |
|
"grad_norm": 0.10343906283378601, |
|
"learning_rate": 0.002116521367580266, |
|
"loss": 0.9505, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.7689127738735014, |
|
"grad_norm": 0.11631717532873154, |
|
"learning_rate": 0.0020832085910150425, |
|
"loss": 0.9483, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.7699462587846218, |
|
"grad_norm": 0.114792600274086, |
|
"learning_rate": 0.0020504201375676876, |
|
"loss": 0.9425, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.7709797436957421, |
|
"grad_norm": 0.10078386217355728, |
|
"learning_rate": 0.0020181477547068817, |
|
"loss": 0.955, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.7720132286068624, |
|
"grad_norm": 0.10332197695970535, |
|
"learning_rate": 0.0019863833197911983, |
|
"loss": 0.9488, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.7730467135179826, |
|
"grad_norm": 0.1078406572341919, |
|
"learning_rate": 0.001955118838024712, |
|
"loss": 0.9536, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.7740801984291029, |
|
"grad_norm": 0.1059098169207573, |
|
"learning_rate": 0.0019243464404447924, |
|
"loss": 0.9396, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.7751136833402232, |
|
"grad_norm": 0.10873686522245407, |
|
"learning_rate": 0.0018940583819415564, |
|
"loss": 0.9412, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.7761471682513436, |
|
"grad_norm": 0.10201577097177505, |
|
"learning_rate": 0.0018642470393085062, |
|
"loss": 0.9406, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.7771806531624639, |
|
"grad_norm": 0.10363386571407318, |
|
"learning_rate": 0.0018349049093238407, |
|
"loss": 0.9349, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.7782141380735841, |
|
"grad_norm": 0.10290543735027313, |
|
"learning_rate": 0.001806024606861967, |
|
"loss": 0.9433, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.7792476229847044, |
|
"grad_norm": 0.09915654361248016, |
|
"learning_rate": 0.0017775988630347405, |
|
"loss": 0.9333, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.7802811078958247, |
|
"grad_norm": 0.10667795687913895, |
|
"learning_rate": 0.0017496205233619542, |
|
"loss": 0.9282, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.781314592806945, |
|
"grad_norm": 0.0981353297829628, |
|
"learning_rate": 0.001722082545970627, |
|
"loss": 0.9443, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.7823480777180654, |
|
"grad_norm": 0.09341371059417725, |
|
"learning_rate": 0.0016949779998226353, |
|
"loss": 0.9255, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.7833815626291856, |
|
"grad_norm": 0.09437743574380875, |
|
"learning_rate": 0.0016683000629702357, |
|
"loss": 0.9374, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.7844150475403059, |
|
"grad_norm": 0.10137518495321274, |
|
"learning_rate": 0.001642042020839051, |
|
"loss": 0.9434, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.7854485324514262, |
|
"grad_norm": 0.10156513750553131, |
|
"learning_rate": 0.001616197264538076, |
|
"loss": 0.9282, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.7864820173625465, |
|
"grad_norm": 0.10588687658309937, |
|
"learning_rate": 0.0015907592891962843, |
|
"loss": 0.9329, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.7875155022736668, |
|
"grad_norm": 0.10155037045478821, |
|
"learning_rate": 0.0015657216923254177, |
|
"loss": 0.9268, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.788548987184787, |
|
"grad_norm": 0.10379229485988617, |
|
"learning_rate": 0.0015410781722085428, |
|
"loss": 0.9437, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.7895824720959074, |
|
"grad_norm": 0.10199972987174988, |
|
"learning_rate": 0.0015168225263139693, |
|
"loss": 0.9425, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.7906159570070277, |
|
"grad_norm": 0.10469642281532288, |
|
"learning_rate": 0.0014929486497341341, |
|
"loss": 0.924, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.791649441918148, |
|
"grad_norm": 0.09616528451442719, |
|
"learning_rate": 0.0014694505336490583, |
|
"loss": 0.931, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.7926829268292683, |
|
"grad_norm": 0.10645470768213272, |
|
"learning_rate": 0.001446322263813983, |
|
"loss": 0.9266, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.7937164117403885, |
|
"grad_norm": 0.10266025364398956, |
|
"learning_rate": 0.0014235580190708143, |
|
"loss": 0.9251, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.7947498966515089, |
|
"grad_norm": 0.10014299303293228, |
|
"learning_rate": 0.0014011520698829947, |
|
"loss": 0.9268, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.7957833815626292, |
|
"grad_norm": 0.10017547011375427, |
|
"learning_rate": 0.0013790987768934346, |
|
"loss": 0.9345, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.7968168664737495, |
|
"grad_norm": 0.09650063514709473, |
|
"learning_rate": 0.0013573925895051418, |
|
"loss": 0.919, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.7978503513848698, |
|
"grad_norm": 0.11645365506410599, |
|
"learning_rate": 0.00133602804448419, |
|
"loss": 0.9256, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.79888383629599, |
|
"grad_norm": 0.09640100598335266, |
|
"learning_rate": 0.0013149997645846788, |
|
"loss": 0.9252, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.7999173212071103, |
|
"grad_norm": 0.09893805533647537, |
|
"learning_rate": 0.0012943024571953315, |
|
"loss": 0.9209, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.8009508061182307, |
|
"grad_norm": 0.10975757986307144, |
|
"learning_rate": 0.001273930913007398, |
|
"loss": 0.9259, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.801984291029351, |
|
"grad_norm": 0.09872964024543762, |
|
"learning_rate": 0.0012538800047035218, |
|
"loss": 0.9139, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.8030177759404713, |
|
"grad_norm": 0.10294859856367111, |
|
"learning_rate": 0.0012341446856672469, |
|
"loss": 0.9219, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.8040512608515916, |
|
"grad_norm": 0.09699690341949463, |
|
"learning_rate": 0.0012147199887128318, |
|
"loss": 0.9171, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.8050847457627118, |
|
"grad_norm": 0.10195200890302658, |
|
"learning_rate": 0.0011956010248350593, |
|
"loss": 0.9181, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.8061182306738321, |
|
"grad_norm": 0.10153238475322723, |
|
"learning_rate": 0.0011767829819787205, |
|
"loss": 0.9122, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.8071517155849525, |
|
"grad_norm": 0.09719067811965942, |
|
"learning_rate": 0.0011582611238274692, |
|
"loss": 0.9261, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.8081852004960728, |
|
"grad_norm": 0.09960606694221497, |
|
"learning_rate": 0.0011400307886117367, |
|
"loss": 0.9159, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.8092186854071931, |
|
"grad_norm": 0.10018911957740784, |
|
"learning_rate": 0.001122087387935411, |
|
"loss": 0.9043, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.8102521703183133, |
|
"grad_norm": 0.09987188875675201, |
|
"learning_rate": 0.0011044264056209812, |
|
"loss": 0.923, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.8112856552294336, |
|
"grad_norm": 0.10447081923484802, |
|
"learning_rate": 0.0010870433965728626, |
|
"loss": 0.912, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.812319140140554, |
|
"grad_norm": 0.09891889244318008, |
|
"learning_rate": 0.0010699339856586065, |
|
"loss": 0.9231, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.8133526250516743, |
|
"grad_norm": 0.11543388664722443, |
|
"learning_rate": 0.0010530938666077263, |
|
"loss": 0.9189, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.8143861099627946, |
|
"grad_norm": 0.10272625833749771, |
|
"learning_rate": 0.0010365188009278474, |
|
"loss": 0.9218, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.8154195948739148, |
|
"grad_norm": 0.11380596458911896, |
|
"learning_rate": 0.0010202046168379232, |
|
"loss": 0.9089, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.8164530797850351, |
|
"grad_norm": 0.10323087871074677, |
|
"learning_rate": 0.0010041472082182382, |
|
"loss": 0.9053, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.8174865646961554, |
|
"grad_norm": 0.10377652198076248, |
|
"learning_rate": 0.0009883425335769375, |
|
"loss": 0.9167, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.8185200496072758, |
|
"grad_norm": 0.09777701646089554, |
|
"learning_rate": 0.0009727866150328237, |
|
"loss": 0.9134, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.8195535345183961, |
|
"grad_norm": 0.10352311283349991, |
|
"learning_rate": 0.0009574755373141626, |
|
"loss": 0.9203, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.8205870194295163, |
|
"grad_norm": 0.1033698320388794, |
|
"learning_rate": 0.0009424054467732486, |
|
"loss": 0.916, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.8216205043406366, |
|
"grad_norm": 0.10148289799690247, |
|
"learning_rate": 0.0009275725504164789, |
|
"loss": 0.9055, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.8226539892517569, |
|
"grad_norm": 0.1043614000082016, |
|
"learning_rate": 0.0009129731149496945, |
|
"loss": 0.9103, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.8236874741628772, |
|
"grad_norm": 0.10638295859098434, |
|
"learning_rate": 0.0008986034658385467, |
|
"loss": 0.9095, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.8247209590739976, |
|
"grad_norm": 0.09755829721689224, |
|
"learning_rate": 0.0008844599863836532, |
|
"loss": 0.911, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.8257544439851178, |
|
"grad_norm": 0.09516286849975586, |
|
"learning_rate": 0.0008705391168103099, |
|
"loss": 0.8951, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.8267879288962381, |
|
"grad_norm": 0.09857280552387238, |
|
"learning_rate": 0.0008568373533725312, |
|
"loss": 0.9129, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.8278214138073584, |
|
"grad_norm": 0.09658730775117874, |
|
"learning_rate": 0.0008433512474711912, |
|
"loss": 0.9123, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.8288548987184787, |
|
"grad_norm": 0.09653860330581665, |
|
"learning_rate": 0.0008300774047860455, |
|
"loss": 0.8981, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.829888383629599, |
|
"grad_norm": 0.10416875779628754, |
|
"learning_rate": 0.0008170124844214137, |
|
"loss": 0.9173, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.8309218685407193, |
|
"grad_norm": 0.09936217218637466, |
|
"learning_rate": 0.0008041531980653093, |
|
"loss": 0.9129, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.8319553534518396, |
|
"grad_norm": 0.10089723765850067, |
|
"learning_rate": 0.0007914963091618035, |
|
"loss": 0.9138, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.8329888383629599, |
|
"grad_norm": 0.09607098996639252, |
|
"learning_rate": 0.0007790386320964167, |
|
"loss": 0.8986, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.8340223232740802, |
|
"grad_norm": 0.09406198561191559, |
|
"learning_rate": 0.0007667770313943293, |
|
"loss": 0.9062, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.8350558081852005, |
|
"grad_norm": 0.1027892455458641, |
|
"learning_rate": 0.0007547084209312148, |
|
"loss": 0.9082, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.8360892930963207, |
|
"grad_norm": 0.09503350406885147, |
|
"learning_rate": 0.0007428297631564922, |
|
"loss": 0.915, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.8371227780074411, |
|
"grad_norm": 0.11514163017272949, |
|
"learning_rate": 0.000731138068328804, |
|
"loss": 0.8963, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.8381562629185614, |
|
"grad_norm": 0.09855664521455765, |
|
"learning_rate": 0.0007196303937635279, |
|
"loss": 0.8902, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.8391897478296817, |
|
"grad_norm": 0.0985339879989624, |
|
"learning_rate": 0.000708303843092133, |
|
"loss": 0.9087, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.840223232740802, |
|
"grad_norm": 0.10324376076459885, |
|
"learning_rate": 0.0006971555655331906, |
|
"loss": 0.9129, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.8412567176519223, |
|
"grad_norm": 0.10233034193515778, |
|
"learning_rate": 0.0006861827551748618, |
|
"loss": 0.8974, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.8422902025630425, |
|
"grad_norm": 0.09102931618690491, |
|
"learning_rate": 0.0006753826502686767, |
|
"loss": 0.8945, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.8433236874741629, |
|
"grad_norm": 0.09884581714868546, |
|
"learning_rate": 0.0006647525325344291, |
|
"loss": 0.9076, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.8443571723852832, |
|
"grad_norm": 0.10044873505830765, |
|
"learning_rate": 0.0006542897264760131, |
|
"loss": 0.897, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.8453906572964035, |
|
"grad_norm": 0.10729029029607773, |
|
"learning_rate": 0.000643991598708026, |
|
"loss": 0.9061, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.8464241422075238, |
|
"grad_norm": 0.09865026921033859, |
|
"learning_rate": 0.0006338555572929715, |
|
"loss": 0.9198, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.847457627118644, |
|
"grad_norm": 0.0979701355099678, |
|
"learning_rate": 0.0006238790510888947, |
|
"loss": 0.9027, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.8484911120297643, |
|
"grad_norm": 0.10607071965932846, |
|
"learning_rate": 0.0006140595691072842, |
|
"loss": 0.9005, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.8495245969408847, |
|
"grad_norm": 0.09750446677207947, |
|
"learning_rate": 0.0006043946398810819, |
|
"loss": 0.8955, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.850558081852005, |
|
"grad_norm": 0.10503144562244415, |
|
"learning_rate": 0.0005948818308426382, |
|
"loss": 0.8987, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.8515915667631253, |
|
"grad_norm": 0.09300017356872559, |
|
"learning_rate": 0.0005855187477114587, |
|
"loss": 0.8943, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.8526250516742455, |
|
"grad_norm": 0.09706971049308777, |
|
"learning_rate": 0.0005763030338915879, |
|
"loss": 0.8942, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.8536585365853658, |
|
"grad_norm": 0.11037931591272354, |
|
"learning_rate": 0.000567232369878477, |
|
"loss": 0.8958, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.8546920214964862, |
|
"grad_norm": 0.09634577482938766, |
|
"learning_rate": 0.0005583044726751868, |
|
"loss": 0.8976, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.8557255064076065, |
|
"grad_norm": 0.10242386907339096, |
|
"learning_rate": 0.0005495170952177806, |
|
"loss": 0.8902, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.8567589913187268, |
|
"grad_norm": 0.10059966146945953, |
|
"learning_rate": 0.00054086802580976, |
|
"loss": 0.8896, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.857792476229847, |
|
"grad_norm": 0.09923160821199417, |
|
"learning_rate": 0.0005323550875654025, |
|
"loss": 0.9073, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.8588259611409673, |
|
"grad_norm": 0.09592054039239883, |
|
"learning_rate": 0.000523976137861861, |
|
"loss": 0.896, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.8598594460520876, |
|
"grad_norm": 0.09789422899484634, |
|
"learning_rate": 0.0005157290677998864, |
|
"loss": 0.8878, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.860892930963208, |
|
"grad_norm": 0.09961704909801483, |
|
"learning_rate": 0.0005076118016730388, |
|
"loss": 0.9077, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.8619264158743283, |
|
"grad_norm": 0.10076375305652618, |
|
"learning_rate": 0.0004996222964452523, |
|
"loss": 0.8979, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.8629599007854485, |
|
"grad_norm": 0.0950293093919754, |
|
"learning_rate": 0.000491758541236623, |
|
"loss": 0.8966, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.8639933856965688, |
|
"grad_norm": 0.10502886027097702, |
|
"learning_rate": 0.0004840185568172905, |
|
"loss": 0.9094, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.8650268706076891, |
|
"grad_norm": 0.09783428907394409, |
|
"learning_rate": 0.0004764003951092846, |
|
"loss": 0.904, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.8660603555188094, |
|
"grad_norm": 0.10145466774702072, |
|
"learning_rate": 0.00046890213869621406, |
|
"loss": 0.8912, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.8670938404299298, |
|
"grad_norm": 0.09995938092470169, |
|
"learning_rate": 0.000461521900340671, |
|
"loss": 0.91, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.86812732534105, |
|
"grad_norm": 0.09882347285747528, |
|
"learning_rate": 0.0004542578225092324, |
|
"loss": 0.8935, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.8691608102521703, |
|
"grad_norm": 0.1028551384806633, |
|
"learning_rate": 0.00044710807690493667, |
|
"loss": 0.9005, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.8701942951632906, |
|
"grad_norm": 0.09993860870599747, |
|
"learning_rate": 0.00044007086400711984, |
|
"loss": 0.8883, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.8712277800744109, |
|
"grad_norm": 0.09587843716144562, |
|
"learning_rate": 0.0004331444126184934, |
|
"loss": 0.896, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.8722612649855312, |
|
"grad_norm": 0.10601107031106949, |
|
"learning_rate": 0.0004263269794193516, |
|
"loss": 0.9027, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.8732947498966515, |
|
"grad_norm": 0.09365765005350113, |
|
"learning_rate": 0.00041961684852879507, |
|
"loss": 0.8928, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.8743282348077718, |
|
"grad_norm": 0.10556492209434509, |
|
"learning_rate": 0.0004130123310728602, |
|
"loss": 0.8872, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.8753617197188921, |
|
"grad_norm": 0.09618480503559113, |
|
"learning_rate": 0.0004065117647594465, |
|
"loss": 0.8917, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.8763952046300124, |
|
"grad_norm": 0.100033700466156, |
|
"learning_rate": 0.0004001135134599342, |
|
"loss": 0.8949, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.8774286895411327, |
|
"grad_norm": 0.09465377777814865, |
|
"learning_rate": 0.00039381596679738596, |
|
"loss": 0.8931, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.8784621744522529, |
|
"grad_norm": 0.09475496411323547, |
|
"learning_rate": 0.00038761753974123146, |
|
"loss": 0.8856, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.8794956593633733, |
|
"grad_norm": 0.09431330114603043, |
|
"learning_rate": 0.0003815166722083307, |
|
"loss": 0.8921, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.8805291442744936, |
|
"grad_norm": 0.11336002498865128, |
|
"learning_rate": 0.00037551182867031637, |
|
"loss": 0.8944, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.8815626291856139, |
|
"grad_norm": 0.09444057941436768, |
|
"learning_rate": 0.00036960149776711637, |
|
"loss": 0.8907, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.8825961140967342, |
|
"grad_norm": 0.09592239558696747, |
|
"learning_rate": 0.0003637841919265597, |
|
"loss": 0.8993, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.8836295990078545, |
|
"grad_norm": 0.09695941209793091, |
|
"learning_rate": 0.0003580584469899685, |
|
"loss": 0.8948, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.8846630839189747, |
|
"grad_norm": 0.10035042464733124, |
|
"learning_rate": 0.0003524228218436444, |
|
"loss": 0.8897, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.8856965688300951, |
|
"grad_norm": 0.11594710499048233, |
|
"learning_rate": 0.0003468758980561539, |
|
"loss": 0.896, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.8867300537412154, |
|
"grad_norm": 0.10346604883670807, |
|
"learning_rate": 0.00034141627952132355, |
|
"loss": 0.8885, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.8877635386523357, |
|
"grad_norm": 0.09101969003677368, |
|
"learning_rate": 0.0003360425921068532, |
|
"loss": 0.8867, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.888797023563456, |
|
"grad_norm": 0.09395504742860794, |
|
"learning_rate": 0.00033075348330846094, |
|
"loss": 0.8871, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.8898305084745762, |
|
"grad_norm": 0.09610182791948318, |
|
"learning_rate": 0.00032554762190947073, |
|
"loss": 0.8913, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.8908639933856966, |
|
"grad_norm": 0.11420192569494247, |
|
"learning_rate": 0.0003204236976457585, |
|
"loss": 0.8897, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.8918974782968169, |
|
"grad_norm": 0.10842256993055344, |
|
"learning_rate": 0.00031538042087597126, |
|
"loss": 0.8887, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.8929309632079372, |
|
"grad_norm": 0.10306430608034134, |
|
"learning_rate": 0.00031041652225693745, |
|
"loss": 0.8882, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.8939644481190575, |
|
"grad_norm": 0.09563726931810379, |
|
"learning_rate": 0.00030553075242418533, |
|
"loss": 0.8884, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.8949979330301777, |
|
"grad_norm": 0.09344979375600815, |
|
"learning_rate": 0.0003007218816774904, |
|
"loss": 0.8853, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.896031417941298, |
|
"grad_norm": 0.09084004163742065, |
|
"learning_rate": 0.0002959886996713722, |
|
"loss": 0.8844, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.8970649028524184, |
|
"grad_norm": 0.0981561467051506, |
|
"learning_rate": 0.0002913300151104616, |
|
"loss": 0.8987, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.8980983877635387, |
|
"grad_norm": 0.09380181133747101, |
|
"learning_rate": 0.00028674465544966445, |
|
"loss": 0.8881, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.899131872674659, |
|
"grad_norm": 0.10047593712806702, |
|
"learning_rate": 0.00028223146659904236, |
|
"loss": 0.8949, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.9001653575857792, |
|
"grad_norm": 0.10739541053771973, |
|
"learning_rate": 0.0002777893126333406, |
|
"loss": 0.8969, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.9011988424968995, |
|
"grad_norm": 0.09939514100551605, |
|
"learning_rate": 0.0002734170755060864, |
|
"loss": 0.8808, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.9022323274080198, |
|
"grad_norm": 0.09798012673854828, |
|
"learning_rate": 0.0002691136547681875, |
|
"loss": 0.8898, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.9032658123191402, |
|
"grad_norm": 0.09667760878801346, |
|
"learning_rate": 0.00026487796729096044, |
|
"loss": 0.8996, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.9042992972302605, |
|
"grad_norm": 0.09401620179414749, |
|
"learning_rate": 0.0002607089469935173, |
|
"loss": 0.8797, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.9053327821413807, |
|
"grad_norm": 0.09637662023305893, |
|
"learning_rate": 0.0002566055445744437, |
|
"loss": 0.885, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.906366267052501, |
|
"grad_norm": 0.0979810431599617, |
|
"learning_rate": 0.00025256672724770034, |
|
"loss": 0.8765, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.9073997519636213, |
|
"grad_norm": 0.09577486664056778, |
|
"learning_rate": 0.0002485914784826802, |
|
"loss": 0.878, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.9084332368747416, |
|
"grad_norm": 0.09690549224615097, |
|
"learning_rate": 0.0002446787977483582, |
|
"loss": 0.8951, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.909466721785862, |
|
"grad_norm": 0.09132607281208038, |
|
"learning_rate": 0.00024082770026146767, |
|
"loss": 0.8922, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.9105002066969822, |
|
"grad_norm": 0.10059286653995514, |
|
"learning_rate": 0.00023703721673863946, |
|
"loss": 0.8961, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.9115336916081025, |
|
"grad_norm": 0.09476503729820251, |
|
"learning_rate": 0.00023330639315244326, |
|
"loss": 0.8891, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.9125671765192228, |
|
"grad_norm": 0.097599096596241, |
|
"learning_rate": 0.00022963429049126816, |
|
"loss": 0.8877, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.9136006614303431, |
|
"grad_norm": 0.09217467904090881, |
|
"learning_rate": 0.00022601998452298264, |
|
"loss": 0.893, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.9146341463414634, |
|
"grad_norm": 0.11072806268930435, |
|
"learning_rate": 0.00022246256556231452, |
|
"loss": 0.8943, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.9156676312525837, |
|
"grad_norm": 0.10245074331760406, |
|
"learning_rate": 0.00021896113824189194, |
|
"loss": 0.9008, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.916701116163704, |
|
"grad_norm": 0.0995858684182167, |
|
"learning_rate": 0.00021551482128688842, |
|
"loss": 0.8831, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 0.9177346010748243, |
|
"grad_norm": 0.09560049325227737, |
|
"learning_rate": 0.0002121227472932145, |
|
"loss": 0.8978, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.9187680859859446, |
|
"grad_norm": 0.09800171852111816, |
|
"learning_rate": 0.00020878406250920078, |
|
"loss": 0.8993, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 0.9198015708970649, |
|
"grad_norm": 0.09587955474853516, |
|
"learning_rate": 0.00020549792662071688, |
|
"loss": 0.8947, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.9208350558081853, |
|
"grad_norm": 0.09761986881494522, |
|
"learning_rate": 0.00020226351253967265, |
|
"loss": 0.8968, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 0.9218685407193055, |
|
"grad_norm": 0.10232323408126831, |
|
"learning_rate": 0.00019908000619584844, |
|
"loss": 0.8743, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.9229020256304258, |
|
"grad_norm": 0.09598308801651001, |
|
"learning_rate": 0.00019594660633200127, |
|
"loss": 0.8886, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 0.9239355105415461, |
|
"grad_norm": 0.09552961587905884, |
|
"learning_rate": 0.0001928625243021967, |
|
"loss": 0.898, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.9249689954526664, |
|
"grad_norm": 0.09763569384813309, |
|
"learning_rate": 0.000189826983873314, |
|
"loss": 0.8872, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.9260024803637867, |
|
"grad_norm": 0.10132598876953125, |
|
"learning_rate": 0.00018683922102967604, |
|
"loss": 0.8844, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.927035965274907, |
|
"grad_norm": 0.10523135215044022, |
|
"learning_rate": 0.00018389848378075436, |
|
"loss": 0.8886, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 0.9280694501860273, |
|
"grad_norm": 0.10074016451835632, |
|
"learning_rate": 0.0001810040319719, |
|
"loss": 0.8868, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.9291029350971476, |
|
"grad_norm": 0.09313800185918808, |
|
"learning_rate": 0.00017815513709805425, |
|
"loss": 0.8944, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 0.9301364200082679, |
|
"grad_norm": 0.09687028080224991, |
|
"learning_rate": 0.00017535108212039094, |
|
"loss": 0.8764, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.9311699049193882, |
|
"grad_norm": 0.09959317743778229, |
|
"learning_rate": 0.00017259116128584488, |
|
"loss": 0.8879, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 0.9322033898305084, |
|
"grad_norm": 0.09753499180078506, |
|
"learning_rate": 0.0001698746799494807, |
|
"loss": 0.8878, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.9332368747416288, |
|
"grad_norm": 0.10160617530345917, |
|
"learning_rate": 0.00016720095439965764, |
|
"loss": 0.8878, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 0.9342703596527491, |
|
"grad_norm": 0.09427917003631592, |
|
"learning_rate": 0.00016456931168594587, |
|
"loss": 0.8917, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.9353038445638694, |
|
"grad_norm": 0.09126557409763336, |
|
"learning_rate": 0.0001619790894497517, |
|
"loss": 0.8756, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.9363373294749897, |
|
"grad_norm": 0.10263457894325256, |
|
"learning_rate": 0.0001594296357576083, |
|
"loss": 0.8968, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.9373708143861099, |
|
"grad_norm": 0.09004776179790497, |
|
"learning_rate": 0.00015692030893709055, |
|
"loss": 0.8747, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 0.9384042992972302, |
|
"grad_norm": 0.09706980735063553, |
|
"learning_rate": 0.0001544504774153122, |
|
"loss": 0.8782, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.9394377842083506, |
|
"grad_norm": 0.11443266272544861, |
|
"learning_rate": 0.00015201951955996554, |
|
"loss": 0.8954, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 0.9404712691194709, |
|
"grad_norm": 0.09983173757791519, |
|
"learning_rate": 0.00014962682352286227, |
|
"loss": 0.8839, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.9415047540305912, |
|
"grad_norm": 0.09485773742198944, |
|
"learning_rate": 0.00014727178708593758, |
|
"loss": 0.8873, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 0.9425382389417114, |
|
"grad_norm": 0.09533528983592987, |
|
"learning_rate": 0.00014495381750967773, |
|
"loss": 0.8803, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.9435717238528317, |
|
"grad_norm": 0.09723920375108719, |
|
"learning_rate": 0.00014267233138393328, |
|
"loss": 0.894, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 0.944605208763952, |
|
"grad_norm": 0.10340782254934311, |
|
"learning_rate": 0.00014042675448108058, |
|
"loss": 0.8876, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.9456386936750724, |
|
"grad_norm": 0.10088169574737549, |
|
"learning_rate": 0.00013821652161149427, |
|
"loss": 0.8797, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.9466721785861927, |
|
"grad_norm": 0.09791452437639236, |
|
"learning_rate": 0.0001360410764812946, |
|
"loss": 0.8897, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.9477056634973129, |
|
"grad_norm": 0.0989762470126152, |
|
"learning_rate": 0.00013389987155233373, |
|
"loss": 0.885, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 0.9487391484084332, |
|
"grad_norm": 0.09686373174190521, |
|
"learning_rate": 0.0001317923679043859, |
|
"loss": 0.88, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.9497726333195535, |
|
"grad_norm": 0.094807930290699, |
|
"learning_rate": 0.00012971803509950622, |
|
"loss": 0.8877, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 0.9508061182306738, |
|
"grad_norm": 0.0951213538646698, |
|
"learning_rate": 0.000127676351048525, |
|
"loss": 0.8858, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.9518396031417942, |
|
"grad_norm": 0.09922289848327637, |
|
"learning_rate": 0.00012566680187964274, |
|
"loss": 0.8826, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 0.9528730880529144, |
|
"grad_norm": 0.09398606419563293, |
|
"learning_rate": 0.0001236888818090938, |
|
"loss": 0.8817, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.9539065729640347, |
|
"grad_norm": 0.1025305688381195, |
|
"learning_rate": 0.0001217420930138456, |
|
"loss": 0.8809, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 0.954940057875155, |
|
"grad_norm": 0.0998004898428917, |
|
"learning_rate": 0.00011982594550630144, |
|
"loss": 0.8878, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.9559735427862753, |
|
"grad_norm": 0.09735240042209625, |
|
"learning_rate": 0.00011793995701097545, |
|
"loss": 0.8713, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.9570070276973957, |
|
"grad_norm": 0.09462594240903854, |
|
"learning_rate": 0.00011608365284310853, |
|
"loss": 0.8932, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.958040512608516, |
|
"grad_norm": 0.0939006358385086, |
|
"learning_rate": 0.00011425656578919491, |
|
"loss": 0.8865, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 0.9590739975196362, |
|
"grad_norm": 0.10327833890914917, |
|
"learning_rate": 0.00011245823598938918, |
|
"loss": 0.885, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.9601074824307565, |
|
"grad_norm": 0.10130874067544937, |
|
"learning_rate": 0.00011068821082176394, |
|
"loss": 0.8737, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 0.9611409673418768, |
|
"grad_norm": 0.0958571583032608, |
|
"learning_rate": 0.00010894604478838941, |
|
"loss": 0.8836, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.9621744522529971, |
|
"grad_norm": 0.10654882341623306, |
|
"learning_rate": 0.00010723129940320598, |
|
"loss": 0.8808, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 0.9632079371641175, |
|
"grad_norm": 0.10343246906995773, |
|
"learning_rate": 0.00010554354308166149, |
|
"loss": 0.8838, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.9642414220752377, |
|
"grad_norm": 0.09667105227708817, |
|
"learning_rate": 0.00010388235103208581, |
|
"loss": 0.8944, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 0.965274906986358, |
|
"grad_norm": 0.09733343869447708, |
|
"learning_rate": 0.00010224730514877476, |
|
"loss": 0.8807, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.9663083918974783, |
|
"grad_norm": 0.09809593111276627, |
|
"learning_rate": 0.0001006379939067572, |
|
"loss": 0.8824, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.9673418768085986, |
|
"grad_norm": 0.09950044006109238, |
|
"learning_rate": 9.905401225821806e-05, |
|
"loss": 0.8895, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.9683753617197189, |
|
"grad_norm": 0.09585009515285492, |
|
"learning_rate": 9.749496153055196e-05, |
|
"loss": 0.871, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 0.9694088466308391, |
|
"grad_norm": 0.09470534324645996, |
|
"learning_rate": 9.596044932602107e-05, |
|
"loss": 0.8805, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.9704423315419595, |
|
"grad_norm": 0.09828654676675797, |
|
"learning_rate": 9.445008942299262e-05, |
|
"loss": 0.8796, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 0.9714758164530798, |
|
"grad_norm": 0.09540653228759766, |
|
"learning_rate": 9.296350167873062e-05, |
|
"loss": 0.8769, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.9725093013642001, |
|
"grad_norm": 0.09175027906894684, |
|
"learning_rate": 9.150031193371763e-05, |
|
"loss": 0.8823, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 0.9735427862753204, |
|
"grad_norm": 0.09416258335113525, |
|
"learning_rate": 9.00601519174826e-05, |
|
"loss": 0.8826, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.9745762711864406, |
|
"grad_norm": 0.102226123213768, |
|
"learning_rate": 8.864265915591077e-05, |
|
"loss": 0.8813, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 0.1030995100736618, |
|
"learning_rate": 8.724747688001238e-05, |
|
"loss": 0.89, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.9766432410086813, |
|
"grad_norm": 0.09412838518619537, |
|
"learning_rate": 8.587425393612774e-05, |
|
"loss": 0.8869, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.9776767259198016, |
|
"grad_norm": 0.09783417731523514, |
|
"learning_rate": 8.45226446975449e-05, |
|
"loss": 0.8745, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.9787102108309219, |
|
"grad_norm": 0.09457999467849731, |
|
"learning_rate": 8.31923089775091e-05, |
|
"loss": 0.8854, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 0.9797436957420421, |
|
"grad_norm": 0.0977085754275322, |
|
"learning_rate": 8.188291194360097e-05, |
|
"loss": 0.8743, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.9807771806531624, |
|
"grad_norm": 0.09691467881202698, |
|
"learning_rate": 8.059412403346259e-05, |
|
"loss": 0.8798, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 0.9818106655642828, |
|
"grad_norm": 0.09877024590969086, |
|
"learning_rate": 7.932562087184977e-05, |
|
"loss": 0.8785, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.9828441504754031, |
|
"grad_norm": 0.09752603620290756, |
|
"learning_rate": 7.807708318899015e-05, |
|
"loss": 0.8787, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 0.9838776353865234, |
|
"grad_norm": 0.10295706242322922, |
|
"learning_rate": 7.684819674022599e-05, |
|
"loss": 0.8782, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.9849111202976436, |
|
"grad_norm": 0.09535824507474899, |
|
"learning_rate": 7.563865222692196e-05, |
|
"loss": 0.8861, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 0.9859446052087639, |
|
"grad_norm": 0.09367913007736206, |
|
"learning_rate": 7.444814521861768e-05, |
|
"loss": 0.8791, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.9869780901198842, |
|
"grad_norm": 0.10222353786230087, |
|
"learning_rate": 7.327637607640558e-05, |
|
"loss": 0.884, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.9880115750310046, |
|
"grad_norm": 0.1052263155579567, |
|
"learning_rate": 7.212304987751476e-05, |
|
"loss": 0.8766, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.9890450599421249, |
|
"grad_norm": 0.09064412862062454, |
|
"learning_rate": 7.098787634108176e-05, |
|
"loss": 0.8846, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 0.9900785448532451, |
|
"grad_norm": 0.09217038005590439, |
|
"learning_rate": 6.98705697550898e-05, |
|
"loss": 0.878, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.9911120297643654, |
|
"grad_norm": 0.08925709873437881, |
|
"learning_rate": 6.87708489044578e-05, |
|
"loss": 0.8659, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 0.9921455146754857, |
|
"grad_norm": 0.09898435324430466, |
|
"learning_rate": 6.768843700026141e-05, |
|
"loss": 0.8891, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.993178999586606, |
|
"grad_norm": 0.09614672511816025, |
|
"learning_rate": 6.662306161006783e-05, |
|
"loss": 0.8828, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 0.9942124844977264, |
|
"grad_norm": 0.09297246485948563, |
|
"learning_rate": 6.557445458936731e-05, |
|
"loss": 0.8905, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.9952459694088466, |
|
"grad_norm": 0.10573872923851013, |
|
"learning_rate": 6.45423520140839e-05, |
|
"loss": 0.8876, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 0.9962794543199669, |
|
"grad_norm": 0.09352509677410126, |
|
"learning_rate": 6.352649411414819e-05, |
|
"loss": 0.8856, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.9973129392310872, |
|
"grad_norm": 0.0931333377957344, |
|
"learning_rate": 6.252662520811583e-05, |
|
"loss": 0.8848, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.9983464241422075, |
|
"grad_norm": 0.10176722705364227, |
|
"learning_rate": 6.154249363881505e-05, |
|
"loss": 0.8792, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.9993799090533279, |
|
"grad_norm": 0.09999672323465347, |
|
"learning_rate": 6.057385171000694e-05, |
|
"loss": 0.8789, |
|
"step": 9670 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 9676, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.24620217896713e+16, |
|
"train_batch_size": 1024, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|