|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999365683476055, |
|
"eval_steps": 500, |
|
"global_step": 3941, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002537266095781795, |
|
"grad_norm": 112.85723727111593, |
|
"learning_rate": 1.2658227848101266e-07, |
|
"loss": 1.8967, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00507453219156359, |
|
"grad_norm": 111.7277255600065, |
|
"learning_rate": 2.5316455696202533e-07, |
|
"loss": 1.8962, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007611798287345386, |
|
"grad_norm": 100.54525732943956, |
|
"learning_rate": 3.79746835443038e-07, |
|
"loss": 1.8773, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01014906438312718, |
|
"grad_norm": 60.43888528431074, |
|
"learning_rate": 5.063291139240507e-07, |
|
"loss": 1.7742, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.012686330478908976, |
|
"grad_norm": 28.26912001667364, |
|
"learning_rate": 6.329113924050634e-07, |
|
"loss": 1.7141, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.015223596574690771, |
|
"grad_norm": 27.480791814917016, |
|
"learning_rate": 7.59493670886076e-07, |
|
"loss": 1.6293, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.017760862670472565, |
|
"grad_norm": 20.239901551582367, |
|
"learning_rate": 8.860759493670887e-07, |
|
"loss": 1.5743, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02029812876625436, |
|
"grad_norm": 11.923875375009446, |
|
"learning_rate": 1.0126582278481013e-06, |
|
"loss": 1.5202, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.022835394862036156, |
|
"grad_norm": 6.627037994110648, |
|
"learning_rate": 1.139240506329114e-06, |
|
"loss": 1.4872, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02537266095781795, |
|
"grad_norm": 8.16791147643463, |
|
"learning_rate": 1.2658227848101267e-06, |
|
"loss": 1.4824, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.027909927053599747, |
|
"grad_norm": 9.357053020719055, |
|
"learning_rate": 1.3924050632911392e-06, |
|
"loss": 1.4456, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.030447193149381543, |
|
"grad_norm": 8.946206463663723, |
|
"learning_rate": 1.518987341772152e-06, |
|
"loss": 1.4253, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.032984459245163335, |
|
"grad_norm": 6.684895053435472, |
|
"learning_rate": 1.6455696202531647e-06, |
|
"loss": 1.4119, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03552172534094513, |
|
"grad_norm": 5.858068093439728, |
|
"learning_rate": 1.7721518987341774e-06, |
|
"loss": 1.3944, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.038058991436726926, |
|
"grad_norm": 10.736099643334308, |
|
"learning_rate": 1.8987341772151901e-06, |
|
"loss": 1.3937, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04059625753250872, |
|
"grad_norm": 5.52407932247684, |
|
"learning_rate": 2.0253164556962026e-06, |
|
"loss": 1.3624, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04313352362829052, |
|
"grad_norm": 8.107714144715326, |
|
"learning_rate": 2.1518987341772153e-06, |
|
"loss": 1.3854, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04567078972407231, |
|
"grad_norm": 5.726773653383632, |
|
"learning_rate": 2.278481012658228e-06, |
|
"loss": 1.3745, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04820805581985411, |
|
"grad_norm": 5.421447552101552, |
|
"learning_rate": 2.4050632911392408e-06, |
|
"loss": 1.3616, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0507453219156359, |
|
"grad_norm": 5.519589755451587, |
|
"learning_rate": 2.5316455696202535e-06, |
|
"loss": 1.3802, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0532825880114177, |
|
"grad_norm": 4.997598050858228, |
|
"learning_rate": 2.6582278481012658e-06, |
|
"loss": 1.3501, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.055819854107199494, |
|
"grad_norm": 7.739443027895296, |
|
"learning_rate": 2.7848101265822785e-06, |
|
"loss": 1.3529, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05835712020298129, |
|
"grad_norm": 14.153932111981923, |
|
"learning_rate": 2.9113924050632912e-06, |
|
"loss": 1.3425, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.060894386298763085, |
|
"grad_norm": 6.821474472121072, |
|
"learning_rate": 3.037974683544304e-06, |
|
"loss": 1.3268, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06343165239454487, |
|
"grad_norm": 7.105335682421715, |
|
"learning_rate": 3.164556962025317e-06, |
|
"loss": 1.3391, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06596891849032667, |
|
"grad_norm": 9.157922661641283, |
|
"learning_rate": 3.2911392405063294e-06, |
|
"loss": 1.331, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06850618458610847, |
|
"grad_norm": 8.558636103035896, |
|
"learning_rate": 3.417721518987342e-06, |
|
"loss": 1.3318, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07104345068189026, |
|
"grad_norm": 6.250103925808961, |
|
"learning_rate": 3.544303797468355e-06, |
|
"loss": 1.3356, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07358071677767206, |
|
"grad_norm": 6.294667609868247, |
|
"learning_rate": 3.6708860759493675e-06, |
|
"loss": 1.32, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07611798287345385, |
|
"grad_norm": 6.6361456210836725, |
|
"learning_rate": 3.7974683544303802e-06, |
|
"loss": 1.3224, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07865524896923565, |
|
"grad_norm": 8.289069596597512, |
|
"learning_rate": 3.924050632911393e-06, |
|
"loss": 1.3176, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08119251506501744, |
|
"grad_norm": 5.542830103144929, |
|
"learning_rate": 4.050632911392405e-06, |
|
"loss": 1.2962, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08372978116079924, |
|
"grad_norm": 10.531069717984824, |
|
"learning_rate": 4.177215189873418e-06, |
|
"loss": 1.3005, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08626704725658103, |
|
"grad_norm": 17.37898460345658, |
|
"learning_rate": 4.303797468354431e-06, |
|
"loss": 1.3031, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08880431335236283, |
|
"grad_norm": 9.252485190691019, |
|
"learning_rate": 4.430379746835443e-06, |
|
"loss": 1.3107, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09134157944814462, |
|
"grad_norm": 7.525737017146553, |
|
"learning_rate": 4.556962025316456e-06, |
|
"loss": 1.2886, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09387884554392642, |
|
"grad_norm": 10.083996939013279, |
|
"learning_rate": 4.683544303797468e-06, |
|
"loss": 1.2954, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09641611163970822, |
|
"grad_norm": 18.864544413704667, |
|
"learning_rate": 4.8101265822784815e-06, |
|
"loss": 1.2959, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.09895337773549001, |
|
"grad_norm": 15.18986427449958, |
|
"learning_rate": 4.936708860759495e-06, |
|
"loss": 1.2918, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1014906438312718, |
|
"grad_norm": 8.154706578846366, |
|
"learning_rate": 4.999975471465892e-06, |
|
"loss": 1.2952, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1040279099270536, |
|
"grad_norm": 8.626125448906336, |
|
"learning_rate": 4.999779246080933e-06, |
|
"loss": 1.2812, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1065651760228354, |
|
"grad_norm": 14.880568433586623, |
|
"learning_rate": 4.999386810712926e-06, |
|
"loss": 1.2532, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.1091024421186172, |
|
"grad_norm": 10.892903720339616, |
|
"learning_rate": 4.9987981961644855e-06, |
|
"loss": 1.2982, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11163970821439899, |
|
"grad_norm": 4.797131746271338, |
|
"learning_rate": 4.998013448636512e-06, |
|
"loss": 1.271, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11417697431018078, |
|
"grad_norm": 18.557197921826557, |
|
"learning_rate": 4.997032629724564e-06, |
|
"loss": 1.2825, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11671424040596258, |
|
"grad_norm": 15.001794851401208, |
|
"learning_rate": 4.995855816414024e-06, |
|
"loss": 1.2765, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11925150650174438, |
|
"grad_norm": 10.965425508561747, |
|
"learning_rate": 4.9944831010740576e-06, |
|
"loss": 1.2585, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12178877259752617, |
|
"grad_norm": 6.63862480684244, |
|
"learning_rate": 4.992914591450358e-06, |
|
"loss": 1.2929, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12432603869330797, |
|
"grad_norm": 6.984972641361711, |
|
"learning_rate": 4.991150410656697e-06, |
|
"loss": 1.2733, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.12686330478908975, |
|
"grad_norm": 18.903688844829993, |
|
"learning_rate": 4.9891906971652545e-06, |
|
"loss": 1.2558, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12940057088487156, |
|
"grad_norm": 7.177194302737898, |
|
"learning_rate": 4.987035604795753e-06, |
|
"loss": 1.2659, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.13193783698065334, |
|
"grad_norm": 10.981192386536817, |
|
"learning_rate": 4.984685302703385e-06, |
|
"loss": 1.2606, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13447510307643515, |
|
"grad_norm": 9.47429133139194, |
|
"learning_rate": 4.982139975365533e-06, |
|
"loss": 1.2785, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.13701236917221693, |
|
"grad_norm": 17.731391325371494, |
|
"learning_rate": 4.979399822567292e-06, |
|
"loss": 1.2709, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.13954963526799874, |
|
"grad_norm": 13.256927828543148, |
|
"learning_rate": 4.976465059385788e-06, |
|
"loss": 1.248, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.14208690136378052, |
|
"grad_norm": 9.285697826647898, |
|
"learning_rate": 4.973335916173294e-06, |
|
"loss": 1.2462, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.14462416745956233, |
|
"grad_norm": 10.853767808913329, |
|
"learning_rate": 4.970012638539152e-06, |
|
"loss": 1.2533, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1471614335553441, |
|
"grad_norm": 16.143378806845213, |
|
"learning_rate": 4.966495487330496e-06, |
|
"loss": 1.2526, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14969869965112592, |
|
"grad_norm": 46.2688542748775, |
|
"learning_rate": 4.962784738611774e-06, |
|
"loss": 1.265, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1522359657469077, |
|
"grad_norm": 6.132364126798611, |
|
"learning_rate": 4.958880683643082e-06, |
|
"loss": 1.2733, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1547732318426895, |
|
"grad_norm": 7.964503594558826, |
|
"learning_rate": 4.954783628857302e-06, |
|
"loss": 1.2626, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.1573104979384713, |
|
"grad_norm": 13.68977454870837, |
|
"learning_rate": 4.95049389583605e-06, |
|
"loss": 1.2657, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1598477640342531, |
|
"grad_norm": 14.540626560719817, |
|
"learning_rate": 4.9460118212844355e-06, |
|
"loss": 1.2372, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.16238503013003489, |
|
"grad_norm": 17.246204651829867, |
|
"learning_rate": 4.941337757004631e-06, |
|
"loss": 1.2355, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1649222962258167, |
|
"grad_norm": 5.660216888492281, |
|
"learning_rate": 4.936472069868262e-06, |
|
"loss": 1.2439, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.16745956232159848, |
|
"grad_norm": 32.30203917124339, |
|
"learning_rate": 4.931415141787607e-06, |
|
"loss": 1.2384, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16999682841738029, |
|
"grad_norm": 9.519199679528043, |
|
"learning_rate": 4.926167369685626e-06, |
|
"loss": 1.2452, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.17253409451316207, |
|
"grad_norm": 9.559300124244501, |
|
"learning_rate": 4.920729165464799e-06, |
|
"loss": 1.2564, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.17507136060894388, |
|
"grad_norm": 7.099728750507589, |
|
"learning_rate": 4.915100955974802e-06, |
|
"loss": 1.2695, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.17760862670472566, |
|
"grad_norm": 8.197172932423973, |
|
"learning_rate": 4.909283182978998e-06, |
|
"loss": 1.2379, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18014589280050744, |
|
"grad_norm": 9.726988142249633, |
|
"learning_rate": 4.903276303119765e-06, |
|
"loss": 1.2483, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.18268315889628925, |
|
"grad_norm": 5.550536710160647, |
|
"learning_rate": 4.897080787882656e-06, |
|
"loss": 1.2493, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.18522042499207103, |
|
"grad_norm": 15.553779441262579, |
|
"learning_rate": 4.890697123559385e-06, |
|
"loss": 1.2635, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.18775769108785284, |
|
"grad_norm": 7.070319331844888, |
|
"learning_rate": 4.884125811209665e-06, |
|
"loss": 1.2439, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.19029495718363462, |
|
"grad_norm": 9.252450309844942, |
|
"learning_rate": 4.877367366621874e-06, |
|
"loss": 1.2423, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19283222327941643, |
|
"grad_norm": 8.297702205658675, |
|
"learning_rate": 4.870422320272576e-06, |
|
"loss": 1.2322, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1953694893751982, |
|
"grad_norm": 14.013643448769688, |
|
"learning_rate": 4.863291217284872e-06, |
|
"loss": 1.2354, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.19790675547098002, |
|
"grad_norm": 8.039135652565351, |
|
"learning_rate": 4.855974617385629e-06, |
|
"loss": 1.2257, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2004440215667618, |
|
"grad_norm": 6.465541475557579, |
|
"learning_rate": 4.8484730948615336e-06, |
|
"loss": 1.2477, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2029812876625436, |
|
"grad_norm": 38.736482675881, |
|
"learning_rate": 4.840787238514019e-06, |
|
"loss": 1.242, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2055185537583254, |
|
"grad_norm": 6.2027098825049745, |
|
"learning_rate": 4.832917651613055e-06, |
|
"loss": 1.2481, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.2080558198541072, |
|
"grad_norm": 7.699764524449651, |
|
"learning_rate": 4.824864951849787e-06, |
|
"loss": 1.2422, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.210593085949889, |
|
"grad_norm": 7.045914854236814, |
|
"learning_rate": 4.8166297712880635e-06, |
|
"loss": 1.2296, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.2131303520456708, |
|
"grad_norm": 6.848231755692189, |
|
"learning_rate": 4.808212756314815e-06, |
|
"loss": 1.2185, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.21566761814145258, |
|
"grad_norm": 7.591181911551738, |
|
"learning_rate": 4.7996145675893255e-06, |
|
"loss": 1.2348, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.2182048842372344, |
|
"grad_norm": 13.992556734602806, |
|
"learning_rate": 4.7908358799913735e-06, |
|
"loss": 1.259, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.22074215033301617, |
|
"grad_norm": 13.882288651265467, |
|
"learning_rate": 4.781877382568261e-06, |
|
"loss": 1.2305, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.22327941642879798, |
|
"grad_norm": 8.53486541599463, |
|
"learning_rate": 4.772739778480729e-06, |
|
"loss": 1.2343, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.22581668252457976, |
|
"grad_norm": 9.455012927215373, |
|
"learning_rate": 4.7634237849477645e-06, |
|
"loss": 1.2194, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.22835394862036157, |
|
"grad_norm": 13.790102358483903, |
|
"learning_rate": 4.7539301331903125e-06, |
|
"loss": 1.2267, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23089121471614335, |
|
"grad_norm": 14.2481962464266, |
|
"learning_rate": 4.7442595683738705e-06, |
|
"loss": 1.2132, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.23342848081192516, |
|
"grad_norm": 14.416373746309988, |
|
"learning_rate": 4.734412849550007e-06, |
|
"loss": 1.2094, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.23596574690770694, |
|
"grad_norm": 9.539618032350603, |
|
"learning_rate": 4.7243907495967815e-06, |
|
"loss": 1.2294, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.23850301300348875, |
|
"grad_norm": 5.29440353230921, |
|
"learning_rate": 4.7141940551580824e-06, |
|
"loss": 1.2208, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.24104027909927053, |
|
"grad_norm": 12.765581152374985, |
|
"learning_rate": 4.703823566581877e-06, |
|
"loss": 1.2324, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.24357754519505234, |
|
"grad_norm": 8.58495407107471, |
|
"learning_rate": 4.693280097857398e-06, |
|
"loss": 1.2222, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.24611481129083412, |
|
"grad_norm": 9.562572836922989, |
|
"learning_rate": 4.6825644765512475e-06, |
|
"loss": 1.2185, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.24865207738661593, |
|
"grad_norm": 9.23186285176845, |
|
"learning_rate": 4.6716775437424465e-06, |
|
"loss": 1.2192, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.25118934348239774, |
|
"grad_norm": 5.073620743419921, |
|
"learning_rate": 4.660620153956409e-06, |
|
"loss": 1.2241, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2537266095781795, |
|
"grad_norm": 8.006469237916, |
|
"learning_rate": 4.649393175097879e-06, |
|
"loss": 1.2281, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2562638756739613, |
|
"grad_norm": 8.265727241877757, |
|
"learning_rate": 4.637997488382801e-06, |
|
"loss": 1.2286, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2588011417697431, |
|
"grad_norm": 6.416174543416175, |
|
"learning_rate": 4.626433988269156e-06, |
|
"loss": 1.2217, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.2613384078655249, |
|
"grad_norm": 7.028656405225393, |
|
"learning_rate": 4.614703582386755e-06, |
|
"loss": 1.2181, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2638756739613067, |
|
"grad_norm": 5.865759830273811, |
|
"learning_rate": 4.602807191465993e-06, |
|
"loss": 1.2382, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2664129400570885, |
|
"grad_norm": 13.872254227369961, |
|
"learning_rate": 4.5907457492655895e-06, |
|
"loss": 1.2359, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2689502061528703, |
|
"grad_norm": 13.664329240119352, |
|
"learning_rate": 4.578520202499286e-06, |
|
"loss": 1.2154, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.27148747224865205, |
|
"grad_norm": 14.099557581365481, |
|
"learning_rate": 4.566131510761548e-06, |
|
"loss": 1.2279, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.27402473834443386, |
|
"grad_norm": 9.810709350520776, |
|
"learning_rate": 4.553580646452238e-06, |
|
"loss": 1.2276, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.27656200444021567, |
|
"grad_norm": 6.042192876271809, |
|
"learning_rate": 4.5408685947002915e-06, |
|
"loss": 1.2159, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.2790992705359975, |
|
"grad_norm": 7.672603258347116, |
|
"learning_rate": 4.5279963532864e-06, |
|
"loss": 1.209, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.28163653663177923, |
|
"grad_norm": 7.981333790512745, |
|
"learning_rate": 4.5149649325646875e-06, |
|
"loss": 1.2227, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.28417380272756104, |
|
"grad_norm": 7.786264862698563, |
|
"learning_rate": 4.501775355383406e-06, |
|
"loss": 1.2299, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.28671106882334285, |
|
"grad_norm": 23.110853155380898, |
|
"learning_rate": 4.48842865700466e-06, |
|
"loss": 1.1972, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.28924833491912466, |
|
"grad_norm": 13.48010845409501, |
|
"learning_rate": 4.474925885023136e-06, |
|
"loss": 1.2043, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2917856010149064, |
|
"grad_norm": 9.52808074720464, |
|
"learning_rate": 4.461268099283886e-06, |
|
"loss": 1.2107, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2943228671106882, |
|
"grad_norm": 7.355296672812608, |
|
"learning_rate": 4.4474563717991345e-06, |
|
"loss": 1.2014, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.29686013320647003, |
|
"grad_norm": 6.320569333174802, |
|
"learning_rate": 4.433491786664134e-06, |
|
"loss": 1.2068, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.29939739930225184, |
|
"grad_norm": 10.030481328592858, |
|
"learning_rate": 4.419375439972075e-06, |
|
"loss": 1.2276, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3019346653980336, |
|
"grad_norm": 6.409643921761458, |
|
"learning_rate": 4.405108439728057e-06, |
|
"loss": 1.217, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.3044719314938154, |
|
"grad_norm": 8.589272885921144, |
|
"learning_rate": 4.390691905762111e-06, |
|
"loss": 1.2141, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3070091975895972, |
|
"grad_norm": 8.928374282259929, |
|
"learning_rate": 4.376126969641311e-06, |
|
"loss": 1.2067, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.309546463685379, |
|
"grad_norm": 6.743646650661921, |
|
"learning_rate": 4.361414774580952e-06, |
|
"loss": 1.2126, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3120837297811608, |
|
"grad_norm": 11.981450710885838, |
|
"learning_rate": 4.34655647535482e-06, |
|
"loss": 1.1928, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.3146209958769426, |
|
"grad_norm": 9.30568404144152, |
|
"learning_rate": 4.3315532382045535e-06, |
|
"loss": 1.2233, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.3171582619727244, |
|
"grad_norm": 9.124358676503277, |
|
"learning_rate": 4.3164062407480974e-06, |
|
"loss": 1.2208, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.3196955280685062, |
|
"grad_norm": 9.471883506779196, |
|
"learning_rate": 4.301116671887281e-06, |
|
"loss": 1.2009, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.32223279416428796, |
|
"grad_norm": 6.941617674454578, |
|
"learning_rate": 4.285685731714493e-06, |
|
"loss": 1.2188, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.32477006026006977, |
|
"grad_norm": 12.5252338418682, |
|
"learning_rate": 4.270114631418487e-06, |
|
"loss": 1.1947, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.3273073263558516, |
|
"grad_norm": 11.108780709122486, |
|
"learning_rate": 4.254404593189316e-06, |
|
"loss": 1.2063, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.3298445924516334, |
|
"grad_norm": 8.555766487500795, |
|
"learning_rate": 4.238556850122394e-06, |
|
"loss": 1.1988, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.33238185854741514, |
|
"grad_norm": 5.929688704429909, |
|
"learning_rate": 4.222572646121723e-06, |
|
"loss": 1.2084, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.33491912464319695, |
|
"grad_norm": 7.403468884662153, |
|
"learning_rate": 4.2064532358022446e-06, |
|
"loss": 1.209, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.33745639073897876, |
|
"grad_norm": 11.518020383952775, |
|
"learning_rate": 4.190199884391371e-06, |
|
"loss": 1.2069, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.33999365683476057, |
|
"grad_norm": 8.809667396360213, |
|
"learning_rate": 4.173813867629672e-06, |
|
"loss": 1.2291, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.3425309229305423, |
|
"grad_norm": 10.783849201384296, |
|
"learning_rate": 4.157296471670747e-06, |
|
"loss": 1.1787, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.34506818902632413, |
|
"grad_norm": 5.923243263259089, |
|
"learning_rate": 4.140648992980269e-06, |
|
"loss": 1.1972, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.34760545512210594, |
|
"grad_norm": 7.577491053933149, |
|
"learning_rate": 4.1238727382342245e-06, |
|
"loss": 1.2193, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.35014272121788775, |
|
"grad_norm": 15.766504679065461, |
|
"learning_rate": 4.106969024216348e-06, |
|
"loss": 1.1896, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3526799873136695, |
|
"grad_norm": 5.128475553118625, |
|
"learning_rate": 4.089939177714778e-06, |
|
"loss": 1.1916, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.3552172534094513, |
|
"grad_norm": 31.663873389243054, |
|
"learning_rate": 4.0727845354178995e-06, |
|
"loss": 1.2037, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3577545195052331, |
|
"grad_norm": 5.0548828855937975, |
|
"learning_rate": 4.055506443809441e-06, |
|
"loss": 1.2243, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.3602917856010149, |
|
"grad_norm": 6.115218417588514, |
|
"learning_rate": 4.038106259062778e-06, |
|
"loss": 1.1927, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3628290516967967, |
|
"grad_norm": 12.060043038462219, |
|
"learning_rate": 4.020585346934493e-06, |
|
"loss": 1.2077, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.3653663177925785, |
|
"grad_norm": 5.116355293016737, |
|
"learning_rate": 4.002945082657167e-06, |
|
"loss": 1.2, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3679035838883603, |
|
"grad_norm": 7.955675984619225, |
|
"learning_rate": 3.985186850831446e-06, |
|
"loss": 1.1917, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.37044084998414206, |
|
"grad_norm": 7.18463544096605, |
|
"learning_rate": 3.967312045317357e-06, |
|
"loss": 1.1917, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.37297811607992387, |
|
"grad_norm": 13.238317735595116, |
|
"learning_rate": 3.9493220691249e-06, |
|
"loss": 1.207, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3755153821757057, |
|
"grad_norm": 17.592657693606654, |
|
"learning_rate": 3.931218334303933e-06, |
|
"loss": 1.1963, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3780526482714875, |
|
"grad_norm": 13.71018630119955, |
|
"learning_rate": 3.913002261833331e-06, |
|
"loss": 1.201, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.38058991436726924, |
|
"grad_norm": 4.264076010568766, |
|
"learning_rate": 3.894675281509455e-06, |
|
"loss": 1.2088, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38312718046305105, |
|
"grad_norm": 7.007776785175352, |
|
"learning_rate": 3.876238831833927e-06, |
|
"loss": 1.216, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.38566444655883286, |
|
"grad_norm": 6.561717211592676, |
|
"learning_rate": 3.857694359900719e-06, |
|
"loss": 1.211, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3882017126546147, |
|
"grad_norm": 9.351379080752213, |
|
"learning_rate": 3.83904332128257e-06, |
|
"loss": 1.173, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3907389787503964, |
|
"grad_norm": 5.389365116394579, |
|
"learning_rate": 3.820287179916736e-06, |
|
"loss": 1.1883, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.39327624484617824, |
|
"grad_norm": 4.843914279235025, |
|
"learning_rate": 3.8014274079900842e-06, |
|
"loss": 1.2002, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.39581351094196005, |
|
"grad_norm": 8.271741525384778, |
|
"learning_rate": 3.7824654858235433e-06, |
|
"loss": 1.2019, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.39835077703774185, |
|
"grad_norm": 16.214867643854607, |
|
"learning_rate": 3.763402901755905e-06, |
|
"loss": 1.1936, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4008880431335236, |
|
"grad_norm": 6.671379474352313, |
|
"learning_rate": 3.7442411520270096e-06, |
|
"loss": 1.1872, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.4034253092293054, |
|
"grad_norm": 8.707941769488675, |
|
"learning_rate": 3.7249817406602996e-06, |
|
"loss": 1.197, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.4059625753250872, |
|
"grad_norm": 6.5621327717722995, |
|
"learning_rate": 3.7056261793447707e-06, |
|
"loss": 1.1983, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.40849984142086904, |
|
"grad_norm": 7.068156090864818, |
|
"learning_rate": 3.686175987316317e-06, |
|
"loss": 1.1853, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.4110371075166508, |
|
"grad_norm": 7.890900003065767, |
|
"learning_rate": 3.6666326912384854e-06, |
|
"loss": 1.1925, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.4135743736124326, |
|
"grad_norm": 6.7232162942204665, |
|
"learning_rate": 3.6469978250826433e-06, |
|
"loss": 1.1954, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.4161116397082144, |
|
"grad_norm": 10.010345842937971, |
|
"learning_rate": 3.6272729300075808e-06, |
|
"loss": 1.209, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.4186489058039962, |
|
"grad_norm": 5.740167290944564, |
|
"learning_rate": 3.6074595542385387e-06, |
|
"loss": 1.185, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.421186171899778, |
|
"grad_norm": 7.619213938711831, |
|
"learning_rate": 3.5875592529456926e-06, |
|
"loss": 1.2068, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4237234379955598, |
|
"grad_norm": 11.256221468519641, |
|
"learning_rate": 3.567573588122079e-06, |
|
"loss": 1.1777, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.4262607040913416, |
|
"grad_norm": 5.7316159588312106, |
|
"learning_rate": 3.5475041284609977e-06, |
|
"loss": 1.1843, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.4287979701871234, |
|
"grad_norm": 5.682433911486404, |
|
"learning_rate": 3.527352449232886e-06, |
|
"loss": 1.1777, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.43133523628290515, |
|
"grad_norm": 6.786252039469139, |
|
"learning_rate": 3.5071201321616673e-06, |
|
"loss": 1.1888, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.43387250237868696, |
|
"grad_norm": 5.407930902552358, |
|
"learning_rate": 3.4868087653006045e-06, |
|
"loss": 1.1901, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.4364097684744688, |
|
"grad_norm": 6.399699747010517, |
|
"learning_rate": 3.466419942907652e-06, |
|
"loss": 1.2107, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.4389470345702506, |
|
"grad_norm": 7.825821295728962, |
|
"learning_rate": 3.445955265320321e-06, |
|
"loss": 1.1918, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.44148430066603234, |
|
"grad_norm": 9.488411335781475, |
|
"learning_rate": 3.425416338830067e-06, |
|
"loss": 1.189, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.44402156676181415, |
|
"grad_norm": 5.88274413352344, |
|
"learning_rate": 3.4048047755562093e-06, |
|
"loss": 1.183, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.44655883285759596, |
|
"grad_norm": 5.6831092141225055, |
|
"learning_rate": 3.3841221933193965e-06, |
|
"loss": 1.1882, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4490960989533777, |
|
"grad_norm": 19.983254773097197, |
|
"learning_rate": 3.3633702155146216e-06, |
|
"loss": 1.1911, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.4516333650491595, |
|
"grad_norm": 12.549540089577652, |
|
"learning_rate": 3.342550470983798e-06, |
|
"loss": 1.1811, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.45417063114494133, |
|
"grad_norm": 5.976267812124573, |
|
"learning_rate": 3.3216645938879134e-06, |
|
"loss": 1.1865, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.45670789724072314, |
|
"grad_norm": 9.218000333375254, |
|
"learning_rate": 3.3007142235787624e-06, |
|
"loss": 1.1842, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4592451633365049, |
|
"grad_norm": 6.320981630879619, |
|
"learning_rate": 3.2797010044702697e-06, |
|
"loss": 1.1776, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.4617824294322867, |
|
"grad_norm": 6.190899234462718, |
|
"learning_rate": 3.258626585909422e-06, |
|
"loss": 1.1917, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.4643196955280685, |
|
"grad_norm": 7.909431608694366, |
|
"learning_rate": 3.2374926220468067e-06, |
|
"loss": 1.1789, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.4668569616238503, |
|
"grad_norm": 5.69897203883322, |
|
"learning_rate": 3.216300771706776e-06, |
|
"loss": 1.1733, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.4693942277196321, |
|
"grad_norm": 20.816445025574357, |
|
"learning_rate": 3.1950526982572484e-06, |
|
"loss": 1.1712, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.4719314938154139, |
|
"grad_norm": 11.982164138237327, |
|
"learning_rate": 3.1737500694791424e-06, |
|
"loss": 1.1815, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.4744687599111957, |
|
"grad_norm": 24.74208511970109, |
|
"learning_rate": 3.1523945574354763e-06, |
|
"loss": 1.1863, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4770060260069775, |
|
"grad_norm": 7.842936948875007, |
|
"learning_rate": 3.130987838340126e-06, |
|
"loss": 1.1834, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.47954329210275926, |
|
"grad_norm": 6.4021446109798505, |
|
"learning_rate": 3.1095315924262544e-06, |
|
"loss": 1.184, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.48208055819854106, |
|
"grad_norm": 11.634701038383552, |
|
"learning_rate": 3.0880275038144296e-06, |
|
"loss": 1.1748, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4846178242943229, |
|
"grad_norm": 13.567790113056438, |
|
"learning_rate": 3.066477260380441e-06, |
|
"loss": 1.1647, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.4871550903901047, |
|
"grad_norm": 8.85124389947068, |
|
"learning_rate": 3.044882553622808e-06, |
|
"loss": 1.1906, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.48969235648588644, |
|
"grad_norm": 4.941061338275534, |
|
"learning_rate": 3.0232450785300207e-06, |
|
"loss": 1.1653, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.49222962258166825, |
|
"grad_norm": 6.532283199565601, |
|
"learning_rate": 3.0015665334474937e-06, |
|
"loss": 1.1869, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.49476688867745006, |
|
"grad_norm": 9.662609662864591, |
|
"learning_rate": 2.9798486199442627e-06, |
|
"loss": 1.1707, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.49730415477323187, |
|
"grad_norm": 7.538232506265863, |
|
"learning_rate": 2.958093042679429e-06, |
|
"loss": 1.192, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.4998414208690136, |
|
"grad_norm": 72.64241187548325, |
|
"learning_rate": 2.9363015092683566e-06, |
|
"loss": 1.1762, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.5023786869647955, |
|
"grad_norm": 9.900383081254226, |
|
"learning_rate": 2.9144757301486387e-06, |
|
"loss": 1.169, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.5049159530605772, |
|
"grad_norm": 6.366667290767634, |
|
"learning_rate": 2.8926174184458484e-06, |
|
"loss": 1.174, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.507453219156359, |
|
"grad_norm": 6.005401235845572, |
|
"learning_rate": 2.8707282898390703e-06, |
|
"loss": 1.1596, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5099904852521409, |
|
"grad_norm": 8.195058772390636, |
|
"learning_rate": 2.848810062426236e-06, |
|
"loss": 1.1636, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.5125277513479226, |
|
"grad_norm": 6.0595964456253055, |
|
"learning_rate": 2.82686445658927e-06, |
|
"loss": 1.1622, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.5150650174437044, |
|
"grad_norm": 23.70711855670117, |
|
"learning_rate": 2.8048931948590537e-06, |
|
"loss": 1.1679, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.5176022835394862, |
|
"grad_norm": 5.907186404005163, |
|
"learning_rate": 2.7828980017802236e-06, |
|
"loss": 1.1883, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.520139549635268, |
|
"grad_norm": 7.344357732813401, |
|
"learning_rate": 2.760880603775811e-06, |
|
"loss": 1.173, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.5226768157310498, |
|
"grad_norm": 7.426398590607293, |
|
"learning_rate": 2.73884272901173e-06, |
|
"loss": 1.191, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5252140818268316, |
|
"grad_norm": 7.057487598088109, |
|
"learning_rate": 2.7167861072611374e-06, |
|
"loss": 1.1717, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.5277513479226134, |
|
"grad_norm": 7.223259822911996, |
|
"learning_rate": 2.6947124697686553e-06, |
|
"loss": 1.1638, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5302886140183952, |
|
"grad_norm": 10.407626808887795, |
|
"learning_rate": 2.6726235491144886e-06, |
|
"loss": 1.1682, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.532825880114177, |
|
"grad_norm": 6.589791365210622, |
|
"learning_rate": 2.650521079078433e-06, |
|
"loss": 1.1645, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5353631462099587, |
|
"grad_norm": 11.999381746057887, |
|
"learning_rate": 2.6284067945037855e-06, |
|
"loss": 1.1646, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5379004123057406, |
|
"grad_norm": 7.613081340354655, |
|
"learning_rate": 2.6062824311611775e-06, |
|
"loss": 1.1705, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5404376784015223, |
|
"grad_norm": 6.366501800737734, |
|
"learning_rate": 2.5841497256123326e-06, |
|
"loss": 1.1573, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5429749444973041, |
|
"grad_norm": 18.74317163106254, |
|
"learning_rate": 2.5620104150737626e-06, |
|
"loss": 1.165, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.545512210593086, |
|
"grad_norm": 11.370554877805095, |
|
"learning_rate": 2.5398662372804105e-06, |
|
"loss": 1.1701, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5480494766888677, |
|
"grad_norm": 6.5553872071155554, |
|
"learning_rate": 2.517718930349254e-06, |
|
"loss": 1.1475, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5505867427846496, |
|
"grad_norm": 5.741036264156473, |
|
"learning_rate": 2.495570232642881e-06, |
|
"loss": 1.1483, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5531240088804313, |
|
"grad_norm": 6.767884733983484, |
|
"learning_rate": 2.47342188263304e-06, |
|
"loss": 1.1689, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5556612749762131, |
|
"grad_norm": 13.529490798235566, |
|
"learning_rate": 2.4512756187641936e-06, |
|
"loss": 1.1614, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.558198541071995, |
|
"grad_norm": 9.536331865874597, |
|
"learning_rate": 2.4291331793170545e-06, |
|
"loss": 1.1606, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5607358071677767, |
|
"grad_norm": 4.782009512084195, |
|
"learning_rate": 2.4069963022721597e-06, |
|
"loss": 1.1817, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5632730732635585, |
|
"grad_norm": 12.360173413460117, |
|
"learning_rate": 2.3848667251734424e-06, |
|
"loss": 1.1682, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5658103393593403, |
|
"grad_norm": 6.111517885569779, |
|
"learning_rate": 2.3627461849918604e-06, |
|
"loss": 1.1526, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5683476054551221, |
|
"grad_norm": 5.607775951834258, |
|
"learning_rate": 2.3406364179890532e-06, |
|
"loss": 1.1562, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.570884871550904, |
|
"grad_norm": 14.959200098647552, |
|
"learning_rate": 2.3185391595810635e-06, |
|
"loss": 1.1592, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5734221376466857, |
|
"grad_norm": 5.308853581413382, |
|
"learning_rate": 2.2964561442021255e-06, |
|
"loss": 1.1432, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5759594037424675, |
|
"grad_norm": 7.548080678466621, |
|
"learning_rate": 2.2743891051685222e-06, |
|
"loss": 1.1594, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5784966698382493, |
|
"grad_norm": 12.307106947771112, |
|
"learning_rate": 2.2523397745425387e-06, |
|
"loss": 1.1627, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5810339359340311, |
|
"grad_norm": 6.981005935033202, |
|
"learning_rate": 2.2303098829965124e-06, |
|
"loss": 1.1669, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5835712020298128, |
|
"grad_norm": 4.0554069435676805, |
|
"learning_rate": 2.208301159676987e-06, |
|
"loss": 1.1631, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5861084681255947, |
|
"grad_norm": 5.723818233434463, |
|
"learning_rate": 2.1863153320689958e-06, |
|
"loss": 1.1435, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5886457342213764, |
|
"grad_norm": 14.029515219171557, |
|
"learning_rate": 2.164354125860462e-06, |
|
"loss": 1.1499, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.5911830003171583, |
|
"grad_norm": 7.141317865826808, |
|
"learning_rate": 2.1424192648067582e-06, |
|
"loss": 1.1832, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5937202664129401, |
|
"grad_norm": 6.123987923859252, |
|
"learning_rate": 2.120512470595396e-06, |
|
"loss": 1.1503, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5962575325087218, |
|
"grad_norm": 6.013035072240124, |
|
"learning_rate": 2.098635462710898e-06, |
|
"loss": 1.1601, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5987947986045037, |
|
"grad_norm": 4.754989174196253, |
|
"learning_rate": 2.0767899582998293e-06, |
|
"loss": 1.1641, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.6013320647002854, |
|
"grad_norm": 14.321979761759893, |
|
"learning_rate": 2.054977672036018e-06, |
|
"loss": 1.1417, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.6038693307960672, |
|
"grad_norm": 10.760086157167773, |
|
"learning_rate": 2.033200315985969e-06, |
|
"loss": 1.1482, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.6064065968918491, |
|
"grad_norm": 12.684127714673926, |
|
"learning_rate": 2.011459599474483e-06, |
|
"loss": 1.1497, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.6089438629876308, |
|
"grad_norm": 6.757204304399067, |
|
"learning_rate": 1.989757228950491e-06, |
|
"loss": 1.1583, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6114811290834127, |
|
"grad_norm": 14.606095290784646, |
|
"learning_rate": 1.9680949078531097e-06, |
|
"loss": 1.1528, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.6140183951791944, |
|
"grad_norm": 7.4618089543548995, |
|
"learning_rate": 1.9464743364779388e-06, |
|
"loss": 1.1594, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.6165556612749762, |
|
"grad_norm": 5.801205347666092, |
|
"learning_rate": 1.924897211843606e-06, |
|
"loss": 1.1428, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.619092927370758, |
|
"grad_norm": 9.568596376954924, |
|
"learning_rate": 1.9033652275585624e-06, |
|
"loss": 1.1321, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.6216301934665398, |
|
"grad_norm": 6.223606964643186, |
|
"learning_rate": 1.8818800736881518e-06, |
|
"loss": 1.1568, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.6241674595623216, |
|
"grad_norm": 55.44156669240569, |
|
"learning_rate": 1.8604434366219573e-06, |
|
"loss": 1.1594, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.6267047256581034, |
|
"grad_norm": 10.778268976853173, |
|
"learning_rate": 1.8390569989414303e-06, |
|
"loss": 1.1588, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.6292419917538852, |
|
"grad_norm": 5.7854893075832745, |
|
"learning_rate": 1.8177224392878279e-06, |
|
"loss": 1.1535, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.6317792578496669, |
|
"grad_norm": 13.300897200946059, |
|
"learning_rate": 1.7964414322304525e-06, |
|
"loss": 1.1339, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.6343165239454488, |
|
"grad_norm": 7.933290169459755, |
|
"learning_rate": 1.7752156481352123e-06, |
|
"loss": 1.1388, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6368537900412305, |
|
"grad_norm": 5.231248714361998, |
|
"learning_rate": 1.7540467530335172e-06, |
|
"loss": 1.1374, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6393910561370124, |
|
"grad_norm": 8.049075294237095, |
|
"learning_rate": 1.732936408491504e-06, |
|
"loss": 1.1587, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6419283222327942, |
|
"grad_norm": 10.356860200627077, |
|
"learning_rate": 1.7118862714796253e-06, |
|
"loss": 1.1572, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6444655883285759, |
|
"grad_norm": 4.057173286082011, |
|
"learning_rate": 1.6908979942425868e-06, |
|
"loss": 1.141, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6470028544243578, |
|
"grad_norm": 7.051670677225095, |
|
"learning_rate": 1.6699732241696636e-06, |
|
"loss": 1.1581, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6495401205201395, |
|
"grad_norm": 7.595067587076725, |
|
"learning_rate": 1.649113603665396e-06, |
|
"loss": 1.1466, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6520773866159213, |
|
"grad_norm": 6.1918160975787, |
|
"learning_rate": 1.628320770020673e-06, |
|
"loss": 1.1615, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6546146527117032, |
|
"grad_norm": 14.301509764417062, |
|
"learning_rate": 1.6075963552842211e-06, |
|
"loss": 1.1641, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6571519188074849, |
|
"grad_norm": 6.875665068775734, |
|
"learning_rate": 1.5869419861345042e-06, |
|
"loss": 1.142, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6596891849032668, |
|
"grad_norm": 7.541794891346959, |
|
"learning_rate": 1.5663592837520453e-06, |
|
"loss": 1.1469, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6622264509990485, |
|
"grad_norm": 12.46212690117266, |
|
"learning_rate": 1.5458498636921727e-06, |
|
"loss": 1.1598, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6647637170948303, |
|
"grad_norm": 7.379528099438572, |
|
"learning_rate": 1.5254153357582208e-06, |
|
"loss": 1.1582, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6673009831906122, |
|
"grad_norm": 4.907692232962446, |
|
"learning_rate": 1.5050573038751693e-06, |
|
"loss": 1.1452, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6698382492863939, |
|
"grad_norm": 6.079547602614854, |
|
"learning_rate": 1.4847773659637546e-06, |
|
"loss": 1.1297, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6723755153821757, |
|
"grad_norm": 5.450549895798723, |
|
"learning_rate": 1.4645771138150433e-06, |
|
"loss": 1.1603, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6749127814779575, |
|
"grad_norm": 4.368959175756317, |
|
"learning_rate": 1.4444581329654916e-06, |
|
"loss": 1.1439, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6774500475737393, |
|
"grad_norm": 9.44947484477195, |
|
"learning_rate": 1.424422002572502e-06, |
|
"loss": 1.1749, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6799873136695211, |
|
"grad_norm": 10.050734306484166, |
|
"learning_rate": 1.404470295290461e-06, |
|
"loss": 1.1545, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6825245797653029, |
|
"grad_norm": 12.005497232488006, |
|
"learning_rate": 1.3846045771473116e-06, |
|
"loss": 1.149, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6850618458610847, |
|
"grad_norm": 5.9236681532953925, |
|
"learning_rate": 1.3648264074216282e-06, |
|
"loss": 1.1394, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6875991119568665, |
|
"grad_norm": 9.66814179972378, |
|
"learning_rate": 1.345137338520231e-06, |
|
"loss": 1.1463, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6901363780526483, |
|
"grad_norm": 37.98586030031826, |
|
"learning_rate": 1.3255389158563299e-06, |
|
"loss": 1.1521, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.69267364414843, |
|
"grad_norm": 4.6889018268529705, |
|
"learning_rate": 1.3060326777282312e-06, |
|
"loss": 1.1437, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6952109102442119, |
|
"grad_norm": 14.32310959027276, |
|
"learning_rate": 1.2866201551985935e-06, |
|
"loss": 1.1394, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6977481763399936, |
|
"grad_norm": 6.337986082170704, |
|
"learning_rate": 1.2673028719742461e-06, |
|
"loss": 1.1455, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.7002854424357755, |
|
"grad_norm": 15.376375293741257, |
|
"learning_rate": 1.2480823442866017e-06, |
|
"loss": 1.1401, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.7028227085315573, |
|
"grad_norm": 5.041231343225159, |
|
"learning_rate": 1.2289600807726406e-06, |
|
"loss": 1.156, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.705359974627339, |
|
"grad_norm": 6.869042630836821, |
|
"learning_rate": 1.2099375823564948e-06, |
|
"loss": 1.1615, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.7078972407231209, |
|
"grad_norm": 12.482011259323276, |
|
"learning_rate": 1.1910163421316447e-06, |
|
"loss": 1.1321, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.7104345068189026, |
|
"grad_norm": 6.399176247127537, |
|
"learning_rate": 1.1721978452437205e-06, |
|
"loss": 1.153, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.7129717729146844, |
|
"grad_norm": 6.270550675278331, |
|
"learning_rate": 1.1534835687739323e-06, |
|
"loss": 1.1551, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.7155090390104663, |
|
"grad_norm": 4.695573760402524, |
|
"learning_rate": 1.1348749816231347e-06, |
|
"loss": 1.12, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.718046305106248, |
|
"grad_norm": 6.7962150090749995, |
|
"learning_rate": 1.1163735443965298e-06, |
|
"loss": 1.1684, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.7205835712020298, |
|
"grad_norm": 8.834255061556856, |
|
"learning_rate": 1.0979807092890205e-06, |
|
"loss": 1.1414, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.7231208372978116, |
|
"grad_norm": 6.053865837653651, |
|
"learning_rate": 1.079697919971232e-06, |
|
"loss": 1.1355, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.7256581033935934, |
|
"grad_norm": 19.55990021937961, |
|
"learning_rate": 1.0615266114761932e-06, |
|
"loss": 1.122, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.7281953694893752, |
|
"grad_norm": 8.424559736567739, |
|
"learning_rate": 1.0434682100866995e-06, |
|
"loss": 1.1422, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.730732635585157, |
|
"grad_norm": 5.986352186146355, |
|
"learning_rate": 1.0255241332233636e-06, |
|
"loss": 1.1312, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.7332699016809388, |
|
"grad_norm": 20.203613019858313, |
|
"learning_rate": 1.0076957893333602e-06, |
|
"loss": 1.1271, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.7358071677767206, |
|
"grad_norm": 19.714820742115197, |
|
"learning_rate": 9.899845777798777e-07, |
|
"loss": 1.154, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7383444338725024, |
|
"grad_norm": 13.735614202784944, |
|
"learning_rate": 9.723918887322757e-07, |
|
"loss": 1.1359, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7408816999682841, |
|
"grad_norm": 6.129734292866207, |
|
"learning_rate": 9.549191030569751e-07, |
|
"loss": 1.1313, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.743418966064066, |
|
"grad_norm": 7.37718825001788, |
|
"learning_rate": 9.375675922090707e-07, |
|
"loss": 1.1319, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7459562321598477, |
|
"grad_norm": 4.427499085813396, |
|
"learning_rate": 9.203387181246831e-07, |
|
"loss": 1.1399, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7484934982556296, |
|
"grad_norm": 29.506989634102897, |
|
"learning_rate": 9.032338331140603e-07, |
|
"loss": 1.158, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7510307643514114, |
|
"grad_norm": 5.7586848271722495, |
|
"learning_rate": 8.862542797554341e-07, |
|
"loss": 1.1486, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7535680304471931, |
|
"grad_norm": 8.650793498941589, |
|
"learning_rate": 8.694013907896363e-07, |
|
"loss": 1.1461, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.756105296542975, |
|
"grad_norm": 25.74372735382993, |
|
"learning_rate": 8.526764890154965e-07, |
|
"loss": 1.1353, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7586425626387567, |
|
"grad_norm": 5.0755009425487625, |
|
"learning_rate": 8.36080887186011e-07, |
|
"loss": 1.1553, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7611798287345385, |
|
"grad_norm": 5.4429094875047115, |
|
"learning_rate": 8.19615887905301e-07, |
|
"loss": 1.1598, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7637170948303204, |
|
"grad_norm": 6.821786199139472, |
|
"learning_rate": 8.032827835263773e-07, |
|
"loss": 1.1318, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7662543609261021, |
|
"grad_norm": 4.919272329647964, |
|
"learning_rate": 7.87082856049696e-07, |
|
"loss": 1.1188, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.768791627021884, |
|
"grad_norm": 6.777841153958167, |
|
"learning_rate": 7.710173770225335e-07, |
|
"loss": 1.1351, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7713288931176657, |
|
"grad_norm": 9.668494091003875, |
|
"learning_rate": 7.550876074391852e-07, |
|
"loss": 1.1388, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7738661592134475, |
|
"grad_norm": 5.891510434517859, |
|
"learning_rate": 7.392947976419867e-07, |
|
"loss": 1.1307, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7764034253092293, |
|
"grad_norm": 4.629328829939173, |
|
"learning_rate": 7.23640187223173e-07, |
|
"loss": 1.1442, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7789406914050111, |
|
"grad_norm": 12.582358698587784, |
|
"learning_rate": 7.081250049275804e-07, |
|
"loss": 1.147, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7814779575007929, |
|
"grad_norm": 12.6779267097399, |
|
"learning_rate": 6.927504685562075e-07, |
|
"loss": 1.1461, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.7840152235965747, |
|
"grad_norm": 4.951520708126675, |
|
"learning_rate": 6.775177848706193e-07, |
|
"loss": 1.1272, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7865524896923565, |
|
"grad_norm": 8.236149543257104, |
|
"learning_rate": 6.624281494982359e-07, |
|
"loss": 1.1478, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7890897557881383, |
|
"grad_norm": 15.16637456873453, |
|
"learning_rate": 6.474827468384811e-07, |
|
"loss": 1.1411, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7916270218839201, |
|
"grad_norm": 11.435649281181314, |
|
"learning_rate": 6.326827499698218e-07, |
|
"loss": 1.128, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7941642879797018, |
|
"grad_norm": 7.369755927934268, |
|
"learning_rate": 6.180293205576873e-07, |
|
"loss": 1.1503, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.7967015540754837, |
|
"grad_norm": 7.910994986606561, |
|
"learning_rate": 6.035236087632928e-07, |
|
"loss": 1.1305, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7992388201712655, |
|
"grad_norm": 4.538240411984233, |
|
"learning_rate": 5.891667531533643e-07, |
|
"loss": 1.1624, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.8017760862670472, |
|
"grad_norm": 7.092761380330254, |
|
"learning_rate": 5.749598806107634e-07, |
|
"loss": 1.1571, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.8043133523628291, |
|
"grad_norm": 6.547620136814868, |
|
"learning_rate": 5.609041062460451e-07, |
|
"loss": 1.1253, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.8068506184586108, |
|
"grad_norm": 5.7902809800252175, |
|
"learning_rate": 5.470005333099288e-07, |
|
"loss": 1.1229, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.8093878845543926, |
|
"grad_norm": 7.71191733136425, |
|
"learning_rate": 5.332502531067007e-07, |
|
"loss": 1.1279, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.8119251506501745, |
|
"grad_norm": 4.0181700071905295, |
|
"learning_rate": 5.196543449085617e-07, |
|
"loss": 1.1268, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8144624167459562, |
|
"grad_norm": 8.13336313004874, |
|
"learning_rate": 5.062138758709098e-07, |
|
"loss": 1.1409, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.8169996828417381, |
|
"grad_norm": 9.802266531368725, |
|
"learning_rate": 4.929299009485799e-07, |
|
"loss": 1.1385, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.8195369489375198, |
|
"grad_norm": 6.636513914507316, |
|
"learning_rate": 4.798034628130396e-07, |
|
"loss": 1.1454, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.8220742150333016, |
|
"grad_norm": 4.227565986903538, |
|
"learning_rate": 4.668355917705486e-07, |
|
"loss": 1.1257, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.8246114811290834, |
|
"grad_norm": 8.17867439720174, |
|
"learning_rate": 4.540273056812869e-07, |
|
"loss": 1.1337, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8271487472248652, |
|
"grad_norm": 5.424511876014973, |
|
"learning_rate": 4.4137960987946707e-07, |
|
"loss": 1.1265, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.829686013320647, |
|
"grad_norm": 7.92386965501813, |
|
"learning_rate": 4.2889349709441945e-07, |
|
"loss": 1.1303, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.8322232794164288, |
|
"grad_norm": 10.370529382382568, |
|
"learning_rate": 4.165699473726756e-07, |
|
"loss": 1.1401, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.8347605455122106, |
|
"grad_norm": 7.578569844581463, |
|
"learning_rate": 4.044099280010405e-07, |
|
"loss": 1.1413, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.8372978116079924, |
|
"grad_norm": 4.726374382802555, |
|
"learning_rate": 3.9241439343067205e-07, |
|
"loss": 1.1189, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.8398350777037742, |
|
"grad_norm": 7.984970758900439, |
|
"learning_rate": 3.8058428520216407e-07, |
|
"loss": 1.1397, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.842372343799556, |
|
"grad_norm": 9.346707987442102, |
|
"learning_rate": 3.689205318716424e-07, |
|
"loss": 1.1348, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.8449096098953378, |
|
"grad_norm": 5.016282437243575, |
|
"learning_rate": 3.574240489378847e-07, |
|
"loss": 1.1473, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8474468759911196, |
|
"grad_norm": 6.2078193804951365, |
|
"learning_rate": 3.4609573877046054e-07, |
|
"loss": 1.1445, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.8499841420869013, |
|
"grad_norm": 5.5201202597039085, |
|
"learning_rate": 3.3493649053890325e-07, |
|
"loss": 1.1434, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8525214081826832, |
|
"grad_norm": 17.229253814968214, |
|
"learning_rate": 3.239471801429186e-07, |
|
"loss": 1.1305, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8550586742784649, |
|
"grad_norm": 8.70530756713737, |
|
"learning_rate": 3.1312867014363534e-07, |
|
"loss": 1.1322, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8575959403742468, |
|
"grad_norm": 7.012844741047474, |
|
"learning_rate": 3.024818096958995e-07, |
|
"loss": 1.1303, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8601332064700286, |
|
"grad_norm": 4.756022687074469, |
|
"learning_rate": 2.920074344816268e-07, |
|
"loss": 1.1259, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8626704725658103, |
|
"grad_norm": 4.7358895032579555, |
|
"learning_rate": 2.8170636664420715e-07, |
|
"loss": 1.1299, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8652077386615922, |
|
"grad_norm": 8.683945332579281, |
|
"learning_rate": 2.7157941472397393e-07, |
|
"loss": 1.1496, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8677450047573739, |
|
"grad_norm": 5.37690958165037, |
|
"learning_rate": 2.6162737359474195e-07, |
|
"loss": 1.1242, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.8702822708531557, |
|
"grad_norm": 7.164420959217891, |
|
"learning_rate": 2.518510244014161e-07, |
|
"loss": 1.1284, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8728195369489375, |
|
"grad_norm": 7.002358607609309, |
|
"learning_rate": 2.4225113449867834e-07, |
|
"loss": 1.1304, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8753568030447193, |
|
"grad_norm": 4.573582959362798, |
|
"learning_rate": 2.3282845739075855e-07, |
|
"loss": 1.1282, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8778940691405012, |
|
"grad_norm": 4.899953600348181, |
|
"learning_rate": 2.2358373267229006e-07, |
|
"loss": 1.1298, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8804313352362829, |
|
"grad_norm": 5.271497221424272, |
|
"learning_rate": 2.1451768597025995e-07, |
|
"loss": 1.1256, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8829686013320647, |
|
"grad_norm": 6.165041784465411, |
|
"learning_rate": 2.0563102888705027e-07, |
|
"loss": 1.1286, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8855058674278465, |
|
"grad_norm": 11.088879537008697, |
|
"learning_rate": 1.9692445894458845e-07, |
|
"loss": 1.1252, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8880431335236283, |
|
"grad_norm": 9.424576280240377, |
|
"learning_rate": 1.883986595295953e-07, |
|
"loss": 1.1151, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.89058039961941, |
|
"grad_norm": 5.754831282009581, |
|
"learning_rate": 1.8005429983994487e-07, |
|
"loss": 1.1334, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8931176657151919, |
|
"grad_norm": 10.215467753583647, |
|
"learning_rate": 1.7189203483213984e-07, |
|
"loss": 1.1411, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8956549318109737, |
|
"grad_norm": 12.91382494925556, |
|
"learning_rate": 1.6391250516990448e-07, |
|
"loss": 1.1236, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8981921979067554, |
|
"grad_norm": 10.128521645418763, |
|
"learning_rate": 1.5611633717389467e-07, |
|
"loss": 1.1159, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.9007294640025373, |
|
"grad_norm": 9.199799321868463, |
|
"learning_rate": 1.4850414277254088e-07, |
|
"loss": 1.1368, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.903266730098319, |
|
"grad_norm": 4.892191302349406, |
|
"learning_rate": 1.41076519454017e-07, |
|
"loss": 1.1305, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.9058039961941009, |
|
"grad_norm": 5.956493220397556, |
|
"learning_rate": 1.3383405021933998e-07, |
|
"loss": 1.1264, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.9083412622898827, |
|
"grad_norm": 5.275784999413581, |
|
"learning_rate": 1.267773035366135e-07, |
|
"loss": 1.1195, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.9108785283856644, |
|
"grad_norm": 9.540366769966049, |
|
"learning_rate": 1.1990683329640567e-07, |
|
"loss": 1.1381, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.9134157944814463, |
|
"grad_norm": 4.801855741665693, |
|
"learning_rate": 1.1322317876827416e-07, |
|
"loss": 1.1315, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.915953060577228, |
|
"grad_norm": 5.056163865646408, |
|
"learning_rate": 1.0672686455843934e-07, |
|
"loss": 1.1382, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.9184903266730098, |
|
"grad_norm": 8.706560880258346, |
|
"learning_rate": 1.004184005686068e-07, |
|
"loss": 1.119, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.9210275927687916, |
|
"grad_norm": 6.266874220907701, |
|
"learning_rate": 9.429828195594459e-08, |
|
"loss": 1.1316, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.9235648588645734, |
|
"grad_norm": 4.598497385552673, |
|
"learning_rate": 8.836698909421848e-08, |
|
"loss": 1.1316, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.9261021249603553, |
|
"grad_norm": 5.423769767831511, |
|
"learning_rate": 8.2624987536086e-08, |
|
"loss": 1.1305, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.928639391056137, |
|
"grad_norm": 7.1199155337882045, |
|
"learning_rate": 7.707272797655597e-08, |
|
"loss": 1.104, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.9311766571519188, |
|
"grad_norm": 5.964210281087181, |
|
"learning_rate": 7.171064621761121e-08, |
|
"loss": 1.1392, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.9337139232477006, |
|
"grad_norm": 7.0222277210918485, |
|
"learning_rate": 6.653916313400483e-08, |
|
"loss": 1.1307, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.9362511893434824, |
|
"grad_norm": 6.820545499544659, |
|
"learning_rate": 6.155868464022218e-08, |
|
"loss": 1.1311, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.9387884554392641, |
|
"grad_norm": 8.543306468091403, |
|
"learning_rate": 5.676960165862333e-08, |
|
"loss": 1.1396, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.941325721535046, |
|
"grad_norm": 5.998847968884944, |
|
"learning_rate": 5.217229008875696e-08, |
|
"loss": 1.1142, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.9438629876308278, |
|
"grad_norm": 7.233927115402458, |
|
"learning_rate": 4.7767110777856285e-08, |
|
"loss": 1.1316, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.9464002537266096, |
|
"grad_norm": 14.818426116599344, |
|
"learning_rate": 4.355440949251638e-08, |
|
"loss": 1.1291, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.9489375198223914, |
|
"grad_norm": 11.275146260000886, |
|
"learning_rate": 3.953451689155369e-08, |
|
"loss": 1.1377, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.9514747859181731, |
|
"grad_norm": 4.976497150845647, |
|
"learning_rate": 3.5707748500053706e-08, |
|
"loss": 1.1341, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.954012052013955, |
|
"grad_norm": 8.165947802169324, |
|
"learning_rate": 3.2074404684603325e-08, |
|
"loss": 1.1131, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.9565493181097368, |
|
"grad_norm": 7.141736239500508, |
|
"learning_rate": 2.863477062971659e-08, |
|
"loss": 1.1354, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9590865842055185, |
|
"grad_norm": 6.6723385448588814, |
|
"learning_rate": 2.5389116315448768e-08, |
|
"loss": 1.1349, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9616238503013004, |
|
"grad_norm": 9.205939869377257, |
|
"learning_rate": 2.2337696496206317e-08, |
|
"loss": 1.1358, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9641611163970821, |
|
"grad_norm": 6.430232530777119, |
|
"learning_rate": 1.948075068075067e-08, |
|
"loss": 1.1454, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.966698382492864, |
|
"grad_norm": 4.597818211923664, |
|
"learning_rate": 1.6818503113398832e-08, |
|
"loss": 1.121, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9692356485886457, |
|
"grad_norm": 11.864724480832432, |
|
"learning_rate": 1.4351162756422454e-08, |
|
"loss": 1.1279, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9717729146844275, |
|
"grad_norm": 10.20794178542716, |
|
"learning_rate": 1.2078923273646236e-08, |
|
"loss": 1.1423, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9743101807802094, |
|
"grad_norm": 6.419864028983356, |
|
"learning_rate": 1.0001963015247585e-08, |
|
"loss": 1.1206, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9768474468759911, |
|
"grad_norm": 6.587741451008303, |
|
"learning_rate": 8.120445003755306e-09, |
|
"loss": 1.1429, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9793847129717729, |
|
"grad_norm": 4.807651227856488, |
|
"learning_rate": 6.434516921257905e-09, |
|
"loss": 1.1184, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9819219790675547, |
|
"grad_norm": 17.54136739327046, |
|
"learning_rate": 4.9443110978078525e-09, |
|
"loss": 1.143, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9844592451633365, |
|
"grad_norm": 84.07353671535705, |
|
"learning_rate": 3.649944501037672e-09, |
|
"loss": 1.1314, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9869965112591182, |
|
"grad_norm": 5.176156752927596, |
|
"learning_rate": 2.5515187269772866e-09, |
|
"loss": 1.1163, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.9895337773549001, |
|
"grad_norm": 4.911729600848666, |
|
"learning_rate": 1.6491199920809498e-09, |
|
"loss": 1.1519, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9920710434506819, |
|
"grad_norm": 5.8502366235556345, |
|
"learning_rate": 9.428191264596042e-10, |
|
"loss": 1.1472, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9946083095464637, |
|
"grad_norm": 17.952194768106622, |
|
"learning_rate": 4.3267156832033085e-10, |
|
"loss": 1.1114, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9971455756422455, |
|
"grad_norm": 13.67739524605559, |
|
"learning_rate": 1.187173596167712e-10, |
|
"loss": 1.1338, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9996828417380272, |
|
"grad_norm": 10.095200847738084, |
|
"learning_rate": 9.811429044215282e-13, |
|
"loss": 1.1399, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9999365683476055, |
|
"step": 3941, |
|
"total_flos": 2.4957269414919537e+18, |
|
"train_loss": 1.201914404482746, |
|
"train_runtime": 115316.4416, |
|
"train_samples_per_second": 4.375, |
|
"train_steps_per_second": 0.034 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3941, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.4957269414919537e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|