|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.7757731958762886, |
|
"eval_steps": 49, |
|
"global_step": 1164, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002577319587628866, |
|
"grad_norm": 3.206880709337614, |
|
"learning_rate": 5e-08, |
|
"loss": 1.772, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002577319587628866, |
|
"eval_loss": 1.6304376125335693, |
|
"eval_runtime": 78.4604, |
|
"eval_samples_per_second": 21.195, |
|
"eval_steps_per_second": 1.326, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005154639175257732, |
|
"grad_norm": 3.3587112552116953, |
|
"learning_rate": 1e-07, |
|
"loss": 1.666, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.007731958762886598, |
|
"grad_norm": 3.1385995190528324, |
|
"learning_rate": 1.5e-07, |
|
"loss": 1.5471, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.010309278350515464, |
|
"grad_norm": 3.531264158181801, |
|
"learning_rate": 2e-07, |
|
"loss": 1.6718, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01288659793814433, |
|
"grad_norm": 2.993529294622099, |
|
"learning_rate": 1.9999979406617412e-07, |
|
"loss": 1.6334, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.015463917525773196, |
|
"grad_norm": 3.151745142356583, |
|
"learning_rate": 1.999991762655447e-07, |
|
"loss": 1.5647, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01804123711340206, |
|
"grad_norm": 3.3440809481325333, |
|
"learning_rate": 1.9999814660065617e-07, |
|
"loss": 1.7122, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.020618556701030927, |
|
"grad_norm": 3.1146822679211805, |
|
"learning_rate": 1.9999670507574944e-07, |
|
"loss": 1.5921, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.023195876288659795, |
|
"grad_norm": 3.345986552710787, |
|
"learning_rate": 1.9999485169676173e-07, |
|
"loss": 1.7131, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02577319587628866, |
|
"grad_norm": 2.9626668283812045, |
|
"learning_rate": 1.9999258647132644e-07, |
|
"loss": 1.6699, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.028350515463917526, |
|
"grad_norm": 3.4953806538783527, |
|
"learning_rate": 1.9998990940877333e-07, |
|
"loss": 1.6785, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.030927835051546393, |
|
"grad_norm": 3.3004651951030097, |
|
"learning_rate": 1.9998682052012837e-07, |
|
"loss": 1.6681, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03350515463917526, |
|
"grad_norm": 2.9639928990218802, |
|
"learning_rate": 1.9998331981811364e-07, |
|
"loss": 1.5618, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03608247422680412, |
|
"grad_norm": 3.0779182905002234, |
|
"learning_rate": 1.9997940731714744e-07, |
|
"loss": 1.7039, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03865979381443299, |
|
"grad_norm": 2.9325641285273574, |
|
"learning_rate": 1.9997508303334409e-07, |
|
"loss": 1.6219, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.041237113402061855, |
|
"grad_norm": 2.8809471060714555, |
|
"learning_rate": 1.9997034698451393e-07, |
|
"loss": 1.7566, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04381443298969072, |
|
"grad_norm": 3.341100705652755, |
|
"learning_rate": 1.999651991901632e-07, |
|
"loss": 1.6958, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04639175257731959, |
|
"grad_norm": 2.8521720798434216, |
|
"learning_rate": 1.9995963967149398e-07, |
|
"loss": 1.5833, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04896907216494845, |
|
"grad_norm": 3.2447253207769338, |
|
"learning_rate": 1.9995366845140414e-07, |
|
"loss": 1.6854, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05154639175257732, |
|
"grad_norm": 3.033054116340073, |
|
"learning_rate": 1.999472855544872e-07, |
|
"loss": 1.6768, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05412371134020619, |
|
"grad_norm": 2.7106967773151665, |
|
"learning_rate": 1.9994049100703232e-07, |
|
"loss": 1.5709, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05670103092783505, |
|
"grad_norm": 2.8131217459267974, |
|
"learning_rate": 1.9993328483702392e-07, |
|
"loss": 1.5352, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.059278350515463915, |
|
"grad_norm": 2.9454711288855115, |
|
"learning_rate": 1.9992566707414195e-07, |
|
"loss": 1.6292, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.061855670103092786, |
|
"grad_norm": 2.719048700095618, |
|
"learning_rate": 1.9991763774976155e-07, |
|
"loss": 1.6504, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06443298969072164, |
|
"grad_norm": 2.6465097422508914, |
|
"learning_rate": 1.9990919689695282e-07, |
|
"loss": 1.6398, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06701030927835051, |
|
"grad_norm": 2.565964847805824, |
|
"learning_rate": 1.9990034455048098e-07, |
|
"loss": 1.6024, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06958762886597938, |
|
"grad_norm": 2.4151701145393787, |
|
"learning_rate": 1.9989108074680595e-07, |
|
"loss": 1.6316, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07216494845360824, |
|
"grad_norm": 2.6823187985959276, |
|
"learning_rate": 1.998814055240823e-07, |
|
"loss": 1.7421, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07474226804123711, |
|
"grad_norm": 2.6044420857485755, |
|
"learning_rate": 1.998713189221592e-07, |
|
"loss": 1.5983, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07731958762886598, |
|
"grad_norm": 2.3579361514426784, |
|
"learning_rate": 1.9986082098258008e-07, |
|
"loss": 1.5468, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07989690721649484, |
|
"grad_norm": 2.3088177834083146, |
|
"learning_rate": 1.9984991174858257e-07, |
|
"loss": 1.5852, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08247422680412371, |
|
"grad_norm": 2.5839184979450005, |
|
"learning_rate": 1.9983859126509825e-07, |
|
"loss": 1.6647, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08505154639175258, |
|
"grad_norm": 2.2905291979602844, |
|
"learning_rate": 1.9982685957875257e-07, |
|
"loss": 1.5935, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08762886597938144, |
|
"grad_norm": 2.3660300818606568, |
|
"learning_rate": 1.998147167378645e-07, |
|
"loss": 1.7655, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09020618556701031, |
|
"grad_norm": 2.269544029552125, |
|
"learning_rate": 1.9980216279244653e-07, |
|
"loss": 1.6383, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09278350515463918, |
|
"grad_norm": 2.2148823132358477, |
|
"learning_rate": 1.9978919779420423e-07, |
|
"loss": 1.7191, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09536082474226804, |
|
"grad_norm": 2.295307555280267, |
|
"learning_rate": 1.9977582179653632e-07, |
|
"loss": 1.5571, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0979381443298969, |
|
"grad_norm": 2.1570012388049262, |
|
"learning_rate": 1.9976203485453414e-07, |
|
"loss": 1.642, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.10051546391752578, |
|
"grad_norm": 2.327694183291453, |
|
"learning_rate": 1.9974783702498166e-07, |
|
"loss": 1.6388, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.10309278350515463, |
|
"grad_norm": 2.3531823980910382, |
|
"learning_rate": 1.9973322836635516e-07, |
|
"loss": 1.6407, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1056701030927835, |
|
"grad_norm": 2.148246998681959, |
|
"learning_rate": 1.9971820893882297e-07, |
|
"loss": 1.6316, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.10824742268041238, |
|
"grad_norm": 1.824359532091145, |
|
"learning_rate": 1.9970277880424528e-07, |
|
"loss": 1.4812, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.11082474226804123, |
|
"grad_norm": 1.8420872667750698, |
|
"learning_rate": 1.9968693802617374e-07, |
|
"loss": 1.6208, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.1134020618556701, |
|
"grad_norm": 1.9242569129206386, |
|
"learning_rate": 1.9967068666985148e-07, |
|
"loss": 1.6866, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.11597938144329897, |
|
"grad_norm": 1.7555101549111227, |
|
"learning_rate": 1.9965402480221257e-07, |
|
"loss": 1.59, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11855670103092783, |
|
"grad_norm": 1.83328616320706, |
|
"learning_rate": 1.9963695249188181e-07, |
|
"loss": 1.7787, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1211340206185567, |
|
"grad_norm": 1.5464144842738474, |
|
"learning_rate": 1.9961946980917453e-07, |
|
"loss": 1.5605, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.12371134020618557, |
|
"grad_norm": 1.5700132071559665, |
|
"learning_rate": 1.9960157682609632e-07, |
|
"loss": 1.5188, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.12628865979381443, |
|
"grad_norm": 1.551927803815323, |
|
"learning_rate": 1.9958327361634247e-07, |
|
"loss": 1.5921, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.12628865979381443, |
|
"eval_loss": 1.5858733654022217, |
|
"eval_runtime": 78.6563, |
|
"eval_samples_per_second": 21.143, |
|
"eval_steps_per_second": 1.322, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.12886597938144329, |
|
"grad_norm": 1.6459186978386617, |
|
"learning_rate": 1.9956456025529805e-07, |
|
"loss": 1.6407, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13144329896907217, |
|
"grad_norm": 1.6778367242552643, |
|
"learning_rate": 1.9954543682003732e-07, |
|
"loss": 1.5755, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.13402061855670103, |
|
"grad_norm": 1.5846228635636366, |
|
"learning_rate": 1.9952590338932356e-07, |
|
"loss": 1.5236, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.13659793814432988, |
|
"grad_norm": 1.530322622789531, |
|
"learning_rate": 1.9950596004360864e-07, |
|
"loss": 1.6474, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.13917525773195877, |
|
"grad_norm": 1.5541727762346491, |
|
"learning_rate": 1.994856068650327e-07, |
|
"loss": 1.5926, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.14175257731958762, |
|
"grad_norm": 1.5422089413059752, |
|
"learning_rate": 1.9946484393742394e-07, |
|
"loss": 1.6057, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.14432989690721648, |
|
"grad_norm": 1.5086078750620586, |
|
"learning_rate": 1.994436713462982e-07, |
|
"loss": 1.6139, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.14690721649484537, |
|
"grad_norm": 1.4904490748313473, |
|
"learning_rate": 1.994220891788584e-07, |
|
"loss": 1.5613, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.14948453608247422, |
|
"grad_norm": 1.4446085113828102, |
|
"learning_rate": 1.9940009752399457e-07, |
|
"loss": 1.5838, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.15206185567010308, |
|
"grad_norm": 1.4944945344118559, |
|
"learning_rate": 1.9937769647228327e-07, |
|
"loss": 1.6009, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.15463917525773196, |
|
"grad_norm": 1.3673177038874413, |
|
"learning_rate": 1.9935488611598714e-07, |
|
"loss": 1.5295, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15721649484536082, |
|
"grad_norm": 1.489918654317649, |
|
"learning_rate": 1.9933166654905465e-07, |
|
"loss": 1.6855, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.15979381443298968, |
|
"grad_norm": 1.4085364811053838, |
|
"learning_rate": 1.993080378671197e-07, |
|
"loss": 1.6171, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.16237113402061856, |
|
"grad_norm": 1.4063494910858265, |
|
"learning_rate": 1.992840001675012e-07, |
|
"loss": 1.548, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.16494845360824742, |
|
"grad_norm": 1.4013900053822443, |
|
"learning_rate": 1.9925955354920263e-07, |
|
"loss": 1.5674, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.16752577319587628, |
|
"grad_norm": 1.3995913424696536, |
|
"learning_rate": 1.9923469811291173e-07, |
|
"loss": 1.644, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.17010309278350516, |
|
"grad_norm": 1.4951716735691833, |
|
"learning_rate": 1.99209433961e-07, |
|
"loss": 1.6752, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.17268041237113402, |
|
"grad_norm": 1.4354454580093134, |
|
"learning_rate": 1.9918376119752226e-07, |
|
"loss": 1.6076, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.17525773195876287, |
|
"grad_norm": 1.5307588716137506, |
|
"learning_rate": 1.9915767992821639e-07, |
|
"loss": 1.6192, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.17783505154639176, |
|
"grad_norm": 1.37638400966553, |
|
"learning_rate": 1.9913119026050267e-07, |
|
"loss": 1.5744, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.18041237113402062, |
|
"grad_norm": 1.3694054278862016, |
|
"learning_rate": 1.9910429230348344e-07, |
|
"loss": 1.4495, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18298969072164947, |
|
"grad_norm": 1.4276322894882787, |
|
"learning_rate": 1.9907698616794276e-07, |
|
"loss": 1.6427, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.18556701030927836, |
|
"grad_norm": 1.475589693442013, |
|
"learning_rate": 1.990492719663457e-07, |
|
"loss": 1.6231, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.18814432989690721, |
|
"grad_norm": 1.505476760952321, |
|
"learning_rate": 1.990211498128381e-07, |
|
"loss": 1.7036, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.19072164948453607, |
|
"grad_norm": 1.4498365666960409, |
|
"learning_rate": 1.9899261982324607e-07, |
|
"loss": 1.5564, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.19329896907216496, |
|
"grad_norm": 1.4542099562182622, |
|
"learning_rate": 1.9896368211507535e-07, |
|
"loss": 1.6012, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1958762886597938, |
|
"grad_norm": 1.408394462248393, |
|
"learning_rate": 1.9893433680751103e-07, |
|
"loss": 1.5493, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.19845360824742267, |
|
"grad_norm": 1.4023960052363178, |
|
"learning_rate": 1.9890458402141688e-07, |
|
"loss": 1.6452, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.20103092783505155, |
|
"grad_norm": 1.4823050133687188, |
|
"learning_rate": 1.988744238793351e-07, |
|
"loss": 1.5991, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2036082474226804, |
|
"grad_norm": 1.32937819085943, |
|
"learning_rate": 1.9884385650548548e-07, |
|
"loss": 1.5358, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.20618556701030927, |
|
"grad_norm": 1.3471888309972797, |
|
"learning_rate": 1.9881288202576517e-07, |
|
"loss": 1.5426, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.20876288659793815, |
|
"grad_norm": 1.34250330197651, |
|
"learning_rate": 1.98781500567748e-07, |
|
"loss": 1.5743, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.211340206185567, |
|
"grad_norm": 1.3158395928293942, |
|
"learning_rate": 1.9874971226068412e-07, |
|
"loss": 1.5914, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.21391752577319587, |
|
"grad_norm": 1.3088201655236604, |
|
"learning_rate": 1.9871751723549926e-07, |
|
"loss": 1.5307, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.21649484536082475, |
|
"grad_norm": 1.4622234110087462, |
|
"learning_rate": 1.9868491562479426e-07, |
|
"loss": 1.6698, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.2190721649484536, |
|
"grad_norm": 1.2966036743967264, |
|
"learning_rate": 1.9865190756284464e-07, |
|
"loss": 1.6172, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.22164948453608246, |
|
"grad_norm": 1.3416821729559592, |
|
"learning_rate": 1.9861849318559995e-07, |
|
"loss": 1.6395, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.22422680412371135, |
|
"grad_norm": 1.4246775767306445, |
|
"learning_rate": 1.9858467263068319e-07, |
|
"loss": 1.6048, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2268041237113402, |
|
"grad_norm": 1.332606463309659, |
|
"learning_rate": 1.9855044603739028e-07, |
|
"loss": 1.6383, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.22938144329896906, |
|
"grad_norm": 1.380602547288226, |
|
"learning_rate": 1.9851581354668948e-07, |
|
"loss": 1.64, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.23195876288659795, |
|
"grad_norm": 1.3407177446168135, |
|
"learning_rate": 1.984807753012208e-07, |
|
"loss": 1.7039, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2345360824742268, |
|
"grad_norm": 1.338866434398542, |
|
"learning_rate": 1.9844533144529547e-07, |
|
"loss": 1.5236, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.23711340206185566, |
|
"grad_norm": 1.274500058980513, |
|
"learning_rate": 1.9840948212489526e-07, |
|
"loss": 1.5713, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.23969072164948454, |
|
"grad_norm": 1.3410204352377493, |
|
"learning_rate": 1.9837322748767194e-07, |
|
"loss": 1.6058, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2422680412371134, |
|
"grad_norm": 1.3188947135915765, |
|
"learning_rate": 1.983365676829466e-07, |
|
"loss": 1.6209, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.24484536082474226, |
|
"grad_norm": 1.2787506674738858, |
|
"learning_rate": 1.9829950286170913e-07, |
|
"loss": 1.5984, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.24742268041237114, |
|
"grad_norm": 1.3508302652980064, |
|
"learning_rate": 1.9826203317661756e-07, |
|
"loss": 1.5126, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.3775203706307013, |
|
"learning_rate": 1.9822415878199737e-07, |
|
"loss": 1.5806, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.25257731958762886, |
|
"grad_norm": 1.3953183701272227, |
|
"learning_rate": 1.9818587983384095e-07, |
|
"loss": 1.6391, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.25257731958762886, |
|
"eval_loss": 1.5530622005462646, |
|
"eval_runtime": 78.7591, |
|
"eval_samples_per_second": 21.115, |
|
"eval_steps_per_second": 1.32, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2551546391752577, |
|
"grad_norm": 1.2639205955569304, |
|
"learning_rate": 1.981471964898069e-07, |
|
"loss": 1.6154, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.25773195876288657, |
|
"grad_norm": 1.33461619126327, |
|
"learning_rate": 1.9810810890921942e-07, |
|
"loss": 1.5841, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2603092783505155, |
|
"grad_norm": 1.3223001702133927, |
|
"learning_rate": 1.980686172530676e-07, |
|
"loss": 1.6292, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.26288659793814434, |
|
"grad_norm": 1.2560649642869146, |
|
"learning_rate": 1.9802872168400478e-07, |
|
"loss": 1.5673, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2654639175257732, |
|
"grad_norm": 1.2597104528650152, |
|
"learning_rate": 1.9798842236634795e-07, |
|
"loss": 1.6508, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.26804123711340205, |
|
"grad_norm": 1.407282635250448, |
|
"learning_rate": 1.979477194660769e-07, |
|
"loss": 1.4872, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2706185567010309, |
|
"grad_norm": 1.2016832149108632, |
|
"learning_rate": 1.9790661315083375e-07, |
|
"loss": 1.5604, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.27319587628865977, |
|
"grad_norm": 1.149030350241683, |
|
"learning_rate": 1.978651035899221e-07, |
|
"loss": 1.421, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2757731958762887, |
|
"grad_norm": 1.3215975195174274, |
|
"learning_rate": 1.9782319095430643e-07, |
|
"loss": 1.5786, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.27835051546391754, |
|
"grad_norm": 1.2703092272910235, |
|
"learning_rate": 1.9778087541661131e-07, |
|
"loss": 1.484, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2809278350515464, |
|
"grad_norm": 1.2413825121259754, |
|
"learning_rate": 1.9773815715112072e-07, |
|
"loss": 1.5041, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.28350515463917525, |
|
"grad_norm": 1.2972955973409976, |
|
"learning_rate": 1.9769503633377743e-07, |
|
"loss": 1.5719, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2860824742268041, |
|
"grad_norm": 1.3905442390636398, |
|
"learning_rate": 1.9765151314218209e-07, |
|
"loss": 1.5788, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.28865979381443296, |
|
"grad_norm": 1.269867236059509, |
|
"learning_rate": 1.976075877555927e-07, |
|
"loss": 1.5358, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2912371134020619, |
|
"grad_norm": 1.2521107632001138, |
|
"learning_rate": 1.975632603549237e-07, |
|
"loss": 1.5908, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.29381443298969073, |
|
"grad_norm": 1.2496393834141784, |
|
"learning_rate": 1.9751853112274527e-07, |
|
"loss": 1.5506, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.2963917525773196, |
|
"grad_norm": 1.2871218607928567, |
|
"learning_rate": 1.974734002432827e-07, |
|
"loss": 1.5275, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.29896907216494845, |
|
"grad_norm": 1.2976234741205572, |
|
"learning_rate": 1.9742786790241546e-07, |
|
"loss": 1.5444, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3015463917525773, |
|
"grad_norm": 1.2017823329368622, |
|
"learning_rate": 1.9738193428767654e-07, |
|
"loss": 1.543, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.30412371134020616, |
|
"grad_norm": 1.226770431675134, |
|
"learning_rate": 1.9733559958825167e-07, |
|
"loss": 1.5397, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.30670103092783507, |
|
"grad_norm": 1.3442951015324778, |
|
"learning_rate": 1.9728886399497844e-07, |
|
"loss": 1.5852, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.30927835051546393, |
|
"grad_norm": 1.2017473551527889, |
|
"learning_rate": 1.9724172770034564e-07, |
|
"loss": 1.5318, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3118556701030928, |
|
"grad_norm": 1.211656114042897, |
|
"learning_rate": 1.9719419089849246e-07, |
|
"loss": 1.5028, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.31443298969072164, |
|
"grad_norm": 1.400130154858166, |
|
"learning_rate": 1.9714625378520756e-07, |
|
"loss": 1.5582, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3170103092783505, |
|
"grad_norm": 1.3086898697605782, |
|
"learning_rate": 1.9709791655792847e-07, |
|
"loss": 1.6549, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.31958762886597936, |
|
"grad_norm": 1.278029367300382, |
|
"learning_rate": 1.9704917941574052e-07, |
|
"loss": 1.5557, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.32216494845360827, |
|
"grad_norm": 1.2356382868741678, |
|
"learning_rate": 1.9700004255937627e-07, |
|
"loss": 1.5288, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3247422680412371, |
|
"grad_norm": 1.28937440464536, |
|
"learning_rate": 1.9695050619121457e-07, |
|
"loss": 1.5266, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.327319587628866, |
|
"grad_norm": 1.4414848109811116, |
|
"learning_rate": 1.9690057051527963e-07, |
|
"loss": 1.6097, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.32989690721649484, |
|
"grad_norm": 1.2136781418976954, |
|
"learning_rate": 1.9685023573724035e-07, |
|
"loss": 1.4935, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3324742268041237, |
|
"grad_norm": 1.3341115569144475, |
|
"learning_rate": 1.9679950206440948e-07, |
|
"loss": 1.5987, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.33505154639175255, |
|
"grad_norm": 1.329559323076734, |
|
"learning_rate": 1.967483697057425e-07, |
|
"loss": 1.5782, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.33762886597938147, |
|
"grad_norm": 1.2026583523005048, |
|
"learning_rate": 1.9669683887183714e-07, |
|
"loss": 1.5482, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3402061855670103, |
|
"grad_norm": 1.230715216092296, |
|
"learning_rate": 1.966449097749322e-07, |
|
"loss": 1.637, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3427835051546392, |
|
"grad_norm": 1.3616177214331797, |
|
"learning_rate": 1.965925826289068e-07, |
|
"loss": 1.5264, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.34536082474226804, |
|
"grad_norm": 1.1816372421732182, |
|
"learning_rate": 1.965398576492796e-07, |
|
"loss": 1.5349, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3479381443298969, |
|
"grad_norm": 1.3503944653975188, |
|
"learning_rate": 1.964867350532077e-07, |
|
"loss": 1.5317, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.35051546391752575, |
|
"grad_norm": 1.3016847854244378, |
|
"learning_rate": 1.9643321505948584e-07, |
|
"loss": 1.6062, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.35309278350515466, |
|
"grad_norm": 1.19908669818476, |
|
"learning_rate": 1.9637929788854564e-07, |
|
"loss": 1.6179, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.3556701030927835, |
|
"grad_norm": 1.1945706816984818, |
|
"learning_rate": 1.9632498376245445e-07, |
|
"loss": 1.5982, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3582474226804124, |
|
"grad_norm": 1.233096157789794, |
|
"learning_rate": 1.9627027290491458e-07, |
|
"loss": 1.572, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.36082474226804123, |
|
"grad_norm": 1.2228780779938433, |
|
"learning_rate": 1.9621516554126237e-07, |
|
"loss": 1.5789, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3634020618556701, |
|
"grad_norm": 1.1898193013734535, |
|
"learning_rate": 1.961596618984672e-07, |
|
"loss": 1.4511, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.36597938144329895, |
|
"grad_norm": 1.25230398028528, |
|
"learning_rate": 1.9610376220513066e-07, |
|
"loss": 1.5529, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.36855670103092786, |
|
"grad_norm": 1.2693796938125035, |
|
"learning_rate": 1.960474666914855e-07, |
|
"loss": 1.5403, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3711340206185567, |
|
"grad_norm": 1.3275717703634924, |
|
"learning_rate": 1.9599077558939464e-07, |
|
"loss": 1.4989, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.37371134020618557, |
|
"grad_norm": 1.1489906814896371, |
|
"learning_rate": 1.959336891323505e-07, |
|
"loss": 1.5074, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.37628865979381443, |
|
"grad_norm": 1.1875368070507506, |
|
"learning_rate": 1.958762075554737e-07, |
|
"loss": 1.5219, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3788659793814433, |
|
"grad_norm": 1.2013715546004073, |
|
"learning_rate": 1.9581833109551228e-07, |
|
"loss": 1.5413, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.3788659793814433, |
|
"eval_loss": 1.5337220430374146, |
|
"eval_runtime": 78.6436, |
|
"eval_samples_per_second": 21.146, |
|
"eval_steps_per_second": 1.322, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.38144329896907214, |
|
"grad_norm": 1.348552262306386, |
|
"learning_rate": 1.9576005999084056e-07, |
|
"loss": 1.5713, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.38402061855670105, |
|
"grad_norm": 1.2579524096365415, |
|
"learning_rate": 1.9570139448145852e-07, |
|
"loss": 1.5042, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3865979381443299, |
|
"grad_norm": 1.2007903800378994, |
|
"learning_rate": 1.9564233480899028e-07, |
|
"loss": 1.4753, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.38917525773195877, |
|
"grad_norm": 1.14999357355067, |
|
"learning_rate": 1.955828812166836e-07, |
|
"loss": 1.489, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3917525773195876, |
|
"grad_norm": 1.2834202884360733, |
|
"learning_rate": 1.955230339494086e-07, |
|
"loss": 1.5672, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3943298969072165, |
|
"grad_norm": 1.2110339834614112, |
|
"learning_rate": 1.9546279325365675e-07, |
|
"loss": 1.5138, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.39690721649484534, |
|
"grad_norm": 1.2447583871603898, |
|
"learning_rate": 1.9540215937754007e-07, |
|
"loss": 1.5324, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.39948453608247425, |
|
"grad_norm": 1.2169740146814894, |
|
"learning_rate": 1.9534113257078978e-07, |
|
"loss": 1.5228, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4020618556701031, |
|
"grad_norm": 1.3339392292279337, |
|
"learning_rate": 1.9527971308475568e-07, |
|
"loss": 1.5537, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.40463917525773196, |
|
"grad_norm": 1.1629410191581253, |
|
"learning_rate": 1.952179011724047e-07, |
|
"loss": 1.4565, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4072164948453608, |
|
"grad_norm": 1.2166854685328994, |
|
"learning_rate": 1.951556970883201e-07, |
|
"loss": 1.4996, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.4097938144329897, |
|
"grad_norm": 1.1864599175194743, |
|
"learning_rate": 1.9509310108870037e-07, |
|
"loss": 1.5078, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.41237113402061853, |
|
"grad_norm": 1.2614891919139117, |
|
"learning_rate": 1.9503011343135826e-07, |
|
"loss": 1.6787, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.41494845360824745, |
|
"grad_norm": 1.2538176997908546, |
|
"learning_rate": 1.9496673437571945e-07, |
|
"loss": 1.5567, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.4175257731958763, |
|
"grad_norm": 1.2100512003350425, |
|
"learning_rate": 1.9490296418282183e-07, |
|
"loss": 1.5835, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.42010309278350516, |
|
"grad_norm": 1.176294102289334, |
|
"learning_rate": 1.9483880311531423e-07, |
|
"loss": 1.4902, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.422680412371134, |
|
"grad_norm": 1.2400060721796176, |
|
"learning_rate": 1.9477425143745525e-07, |
|
"loss": 1.5971, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4252577319587629, |
|
"grad_norm": 1.1621100701911136, |
|
"learning_rate": 1.9470930941511243e-07, |
|
"loss": 1.5171, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.42783505154639173, |
|
"grad_norm": 1.2424661949562683, |
|
"learning_rate": 1.9464397731576091e-07, |
|
"loss": 1.4954, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.43041237113402064, |
|
"grad_norm": 1.23770627068237, |
|
"learning_rate": 1.9457825540848255e-07, |
|
"loss": 1.5326, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.4329896907216495, |
|
"grad_norm": 1.1862612005970397, |
|
"learning_rate": 1.9451214396396453e-07, |
|
"loss": 1.4912, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.43556701030927836, |
|
"grad_norm": 1.2831749441379539, |
|
"learning_rate": 1.9444564325449853e-07, |
|
"loss": 1.6117, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4381443298969072, |
|
"grad_norm": 1.1531718726331943, |
|
"learning_rate": 1.943787535539795e-07, |
|
"loss": 1.4855, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.44072164948453607, |
|
"grad_norm": 1.1826441581231952, |
|
"learning_rate": 1.9431147513790446e-07, |
|
"loss": 1.5582, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.44329896907216493, |
|
"grad_norm": 1.1887449944628656, |
|
"learning_rate": 1.9424380828337143e-07, |
|
"loss": 1.5564, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.44587628865979384, |
|
"grad_norm": 1.249570543310612, |
|
"learning_rate": 1.9417575326907831e-07, |
|
"loss": 1.621, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4484536082474227, |
|
"grad_norm": 1.3090306728609684, |
|
"learning_rate": 1.941073103753217e-07, |
|
"loss": 1.5282, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.45103092783505155, |
|
"grad_norm": 1.2503633263430554, |
|
"learning_rate": 1.9403847988399566e-07, |
|
"loss": 1.5513, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4536082474226804, |
|
"grad_norm": 1.2018168355345367, |
|
"learning_rate": 1.9396926207859085e-07, |
|
"loss": 1.4957, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.45618556701030927, |
|
"grad_norm": 1.168765093642791, |
|
"learning_rate": 1.9389965724419288e-07, |
|
"loss": 1.5004, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.4587628865979381, |
|
"grad_norm": 1.250633142422843, |
|
"learning_rate": 1.9382966566748167e-07, |
|
"loss": 1.5387, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.46134020618556704, |
|
"grad_norm": 1.171229347123422, |
|
"learning_rate": 1.9375928763672982e-07, |
|
"loss": 1.596, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4639175257731959, |
|
"grad_norm": 1.1693848944378227, |
|
"learning_rate": 1.9368852344180166e-07, |
|
"loss": 1.5147, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.46649484536082475, |
|
"grad_norm": 1.2828987442740891, |
|
"learning_rate": 1.9361737337415204e-07, |
|
"loss": 1.5539, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.4690721649484536, |
|
"grad_norm": 1.1925907017733204, |
|
"learning_rate": 1.9354583772682512e-07, |
|
"loss": 1.5752, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.47164948453608246, |
|
"grad_norm": 1.321152376647017, |
|
"learning_rate": 1.93473916794453e-07, |
|
"loss": 1.5952, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.4742268041237113, |
|
"grad_norm": 1.2480635026506552, |
|
"learning_rate": 1.934016108732548e-07, |
|
"loss": 1.5068, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.47680412371134023, |
|
"grad_norm": 1.2890663133137021, |
|
"learning_rate": 1.9332892026103517e-07, |
|
"loss": 1.4498, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4793814432989691, |
|
"grad_norm": 1.278439525246191, |
|
"learning_rate": 1.932558452571833e-07, |
|
"loss": 1.5061, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.48195876288659795, |
|
"grad_norm": 1.2481302944858157, |
|
"learning_rate": 1.931823861626714e-07, |
|
"loss": 1.5672, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.4845360824742268, |
|
"grad_norm": 1.2421848632538859, |
|
"learning_rate": 1.9310854328005378e-07, |
|
"loss": 1.4985, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.48711340206185566, |
|
"grad_norm": 1.1840656288458875, |
|
"learning_rate": 1.930343169134654e-07, |
|
"loss": 1.556, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.4896907216494845, |
|
"grad_norm": 1.2585791993336888, |
|
"learning_rate": 1.929597073686206e-07, |
|
"loss": 1.5539, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.49226804123711343, |
|
"grad_norm": 1.123656686890668, |
|
"learning_rate": 1.9288471495281203e-07, |
|
"loss": 1.5377, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.4948453608247423, |
|
"grad_norm": 1.276688134117863, |
|
"learning_rate": 1.9280933997490912e-07, |
|
"loss": 1.5845, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.49742268041237114, |
|
"grad_norm": 1.231953746707157, |
|
"learning_rate": 1.9273358274535702e-07, |
|
"loss": 1.6142, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.3230553754067966, |
|
"learning_rate": 1.926574435761753e-07, |
|
"loss": 1.4738, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5025773195876289, |
|
"grad_norm": 1.2436732656409537, |
|
"learning_rate": 1.9258092278095657e-07, |
|
"loss": 1.5969, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5051546391752577, |
|
"grad_norm": 1.221047910828976, |
|
"learning_rate": 1.925040206748652e-07, |
|
"loss": 1.5962, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5051546391752577, |
|
"eval_loss": 1.520858883857727, |
|
"eval_runtime": 78.5683, |
|
"eval_samples_per_second": 21.166, |
|
"eval_steps_per_second": 1.324, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5077319587628866, |
|
"grad_norm": 1.2212270479150868, |
|
"learning_rate": 1.924267375746361e-07, |
|
"loss": 1.5033, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5103092783505154, |
|
"grad_norm": 1.2178250609326542, |
|
"learning_rate": 1.9234907379857334e-07, |
|
"loss": 1.577, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5128865979381443, |
|
"grad_norm": 1.1521118751035526, |
|
"learning_rate": 1.9227102966654895e-07, |
|
"loss": 1.4468, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5154639175257731, |
|
"grad_norm": 1.2132226025196962, |
|
"learning_rate": 1.9219260550000143e-07, |
|
"loss": 1.5135, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5180412371134021, |
|
"grad_norm": 1.191186345232448, |
|
"learning_rate": 1.921138016219345e-07, |
|
"loss": 1.5146, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.520618556701031, |
|
"grad_norm": 1.2208830731174638, |
|
"learning_rate": 1.9203461835691592e-07, |
|
"loss": 1.5452, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5231958762886598, |
|
"grad_norm": 1.2176060346511148, |
|
"learning_rate": 1.9195505603107594e-07, |
|
"loss": 1.5144, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5257731958762887, |
|
"grad_norm": 1.1351041872872305, |
|
"learning_rate": 1.9187511497210597e-07, |
|
"loss": 1.5463, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5283505154639175, |
|
"grad_norm": 1.1782470225350157, |
|
"learning_rate": 1.9179479550925747e-07, |
|
"loss": 1.4878, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5309278350515464, |
|
"grad_norm": 1.0942788691010794, |
|
"learning_rate": 1.9171409797334025e-07, |
|
"loss": 1.5423, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5335051546391752, |
|
"grad_norm": 1.2422690533739307, |
|
"learning_rate": 1.9163302269672137e-07, |
|
"loss": 1.5543, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5360824742268041, |
|
"grad_norm": 1.187410857798478, |
|
"learning_rate": 1.9155157001332372e-07, |
|
"loss": 1.4864, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.538659793814433, |
|
"grad_norm": 1.2521757262499582, |
|
"learning_rate": 1.9146974025862448e-07, |
|
"loss": 1.5678, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.5412371134020618, |
|
"grad_norm": 1.1895335891190835, |
|
"learning_rate": 1.91387533769654e-07, |
|
"loss": 1.5359, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5438144329896907, |
|
"grad_norm": 1.156080510817116, |
|
"learning_rate": 1.9130495088499417e-07, |
|
"loss": 1.4179, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.5463917525773195, |
|
"grad_norm": 1.2160395280121006, |
|
"learning_rate": 1.912219919447772e-07, |
|
"loss": 1.5288, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5489690721649485, |
|
"grad_norm": 1.187251015976325, |
|
"learning_rate": 1.9113865729068413e-07, |
|
"loss": 1.5829, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5515463917525774, |
|
"grad_norm": 1.2325994836421947, |
|
"learning_rate": 1.9105494726594342e-07, |
|
"loss": 1.5918, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5541237113402062, |
|
"grad_norm": 1.2136013415323126, |
|
"learning_rate": 1.9097086221532964e-07, |
|
"loss": 1.5093, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5567010309278351, |
|
"grad_norm": 1.1685027007257103, |
|
"learning_rate": 1.9088640248516185e-07, |
|
"loss": 1.5992, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5592783505154639, |
|
"grad_norm": 1.2470178729913264, |
|
"learning_rate": 1.908015684233024e-07, |
|
"loss": 1.5845, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.5618556701030928, |
|
"grad_norm": 1.3342781963513264, |
|
"learning_rate": 1.9071636037915533e-07, |
|
"loss": 1.5227, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.5644329896907216, |
|
"grad_norm": 1.2834111003737632, |
|
"learning_rate": 1.90630778703665e-07, |
|
"loss": 1.5278, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.5670103092783505, |
|
"grad_norm": 1.2731317285054349, |
|
"learning_rate": 1.9054482374931466e-07, |
|
"loss": 1.558, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5695876288659794, |
|
"grad_norm": 1.2315820199483811, |
|
"learning_rate": 1.9045849587012496e-07, |
|
"loss": 1.5586, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.5721649484536082, |
|
"grad_norm": 1.2995032591648374, |
|
"learning_rate": 1.9037179542165253e-07, |
|
"loss": 1.5726, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.5747422680412371, |
|
"grad_norm": 1.2207628382258247, |
|
"learning_rate": 1.902847227609884e-07, |
|
"loss": 1.5622, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.5773195876288659, |
|
"grad_norm": 1.1578307509849368, |
|
"learning_rate": 1.901972782467568e-07, |
|
"loss": 1.5029, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5798969072164949, |
|
"grad_norm": 1.2559554939477484, |
|
"learning_rate": 1.9010946223911333e-07, |
|
"loss": 1.5536, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5824742268041238, |
|
"grad_norm": 1.1912957688409214, |
|
"learning_rate": 1.9002127509974374e-07, |
|
"loss": 1.4107, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5850515463917526, |
|
"grad_norm": 1.347391803127549, |
|
"learning_rate": 1.899327171918623e-07, |
|
"loss": 1.4981, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.5876288659793815, |
|
"grad_norm": 1.1735029116257494, |
|
"learning_rate": 1.8984378888021042e-07, |
|
"loss": 1.4931, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5902061855670103, |
|
"grad_norm": 1.1491563326269614, |
|
"learning_rate": 1.8975449053105503e-07, |
|
"loss": 1.439, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.5927835051546392, |
|
"grad_norm": 1.1281459530728108, |
|
"learning_rate": 1.8966482251218715e-07, |
|
"loss": 1.5317, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.595360824742268, |
|
"grad_norm": 1.1698523464033057, |
|
"learning_rate": 1.8957478519292032e-07, |
|
"loss": 1.533, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.5979381443298969, |
|
"grad_norm": 1.2253794089203258, |
|
"learning_rate": 1.8948437894408918e-07, |
|
"loss": 1.566, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6005154639175257, |
|
"grad_norm": 1.2704578177761554, |
|
"learning_rate": 1.893936041380478e-07, |
|
"loss": 1.5496, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6030927835051546, |
|
"grad_norm": 1.270569192705897, |
|
"learning_rate": 1.8930246114866822e-07, |
|
"loss": 1.4762, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6056701030927835, |
|
"grad_norm": 1.1748786103242588, |
|
"learning_rate": 1.8921095035133896e-07, |
|
"loss": 1.5641, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6082474226804123, |
|
"grad_norm": 1.2029791452687832, |
|
"learning_rate": 1.891190721229634e-07, |
|
"loss": 1.5694, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.6108247422680413, |
|
"grad_norm": 1.19680587233996, |
|
"learning_rate": 1.890268268419582e-07, |
|
"loss": 1.5538, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.6134020618556701, |
|
"grad_norm": 1.1874592772095638, |
|
"learning_rate": 1.8893421488825187e-07, |
|
"loss": 1.4978, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.615979381443299, |
|
"grad_norm": 1.216069233807722, |
|
"learning_rate": 1.888412366432831e-07, |
|
"loss": 1.584, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.6185567010309279, |
|
"grad_norm": 1.2090175073299552, |
|
"learning_rate": 1.8874789248999913e-07, |
|
"loss": 1.5486, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6211340206185567, |
|
"grad_norm": 1.1599735542109655, |
|
"learning_rate": 1.8865418281285444e-07, |
|
"loss": 1.512, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.6237113402061856, |
|
"grad_norm": 1.1508476690774565, |
|
"learning_rate": 1.885601079978088e-07, |
|
"loss": 1.4699, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.6262886597938144, |
|
"grad_norm": 1.294126202956922, |
|
"learning_rate": 1.8846566843232594e-07, |
|
"loss": 1.6185, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6288659793814433, |
|
"grad_norm": 1.1538551018422412, |
|
"learning_rate": 1.883708645053719e-07, |
|
"loss": 1.5284, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.6314432989690721, |
|
"grad_norm": 1.1790058528070886, |
|
"learning_rate": 1.882756966074134e-07, |
|
"loss": 1.5235, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6314432989690721, |
|
"eval_loss": 1.510589361190796, |
|
"eval_runtime": 78.6198, |
|
"eval_samples_per_second": 21.152, |
|
"eval_steps_per_second": 1.323, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.634020618556701, |
|
"grad_norm": 1.1938102380471263, |
|
"learning_rate": 1.8818016513041623e-07, |
|
"loss": 1.5028, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6365979381443299, |
|
"grad_norm": 1.231310461159998, |
|
"learning_rate": 1.8808427046784362e-07, |
|
"loss": 1.5686, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.6391752577319587, |
|
"grad_norm": 1.3015696329059996, |
|
"learning_rate": 1.8798801301465467e-07, |
|
"loss": 1.579, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.6417525773195877, |
|
"grad_norm": 1.1482602866030465, |
|
"learning_rate": 1.8789139316730269e-07, |
|
"loss": 1.5331, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.6443298969072165, |
|
"grad_norm": 1.231219314227984, |
|
"learning_rate": 1.8779441132373359e-07, |
|
"loss": 1.5366, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6469072164948454, |
|
"grad_norm": 1.2531642119413817, |
|
"learning_rate": 1.876970678833842e-07, |
|
"loss": 1.5246, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.6494845360824743, |
|
"grad_norm": 1.1332607994718875, |
|
"learning_rate": 1.8759936324718066e-07, |
|
"loss": 1.5029, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.6520618556701031, |
|
"grad_norm": 1.123414985710231, |
|
"learning_rate": 1.8750129781753677e-07, |
|
"loss": 1.5992, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.654639175257732, |
|
"grad_norm": 1.1601574273566644, |
|
"learning_rate": 1.874028719983523e-07, |
|
"loss": 1.4271, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.6572164948453608, |
|
"grad_norm": 1.2155208006708451, |
|
"learning_rate": 1.8730408619501138e-07, |
|
"loss": 1.5939, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.6597938144329897, |
|
"grad_norm": 1.181434829014358, |
|
"learning_rate": 1.8720494081438076e-07, |
|
"loss": 1.5416, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.6623711340206185, |
|
"grad_norm": 1.1457316456562228, |
|
"learning_rate": 1.8710543626480818e-07, |
|
"loss": 1.4854, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.6649484536082474, |
|
"grad_norm": 1.1872624778137861, |
|
"learning_rate": 1.8700557295612072e-07, |
|
"loss": 1.5045, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.6675257731958762, |
|
"grad_norm": 1.2856636838183533, |
|
"learning_rate": 1.8690535129962305e-07, |
|
"loss": 1.4678, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.6701030927835051, |
|
"grad_norm": 1.131984435899355, |
|
"learning_rate": 1.8680477170809572e-07, |
|
"loss": 1.5706, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6726804123711341, |
|
"grad_norm": 1.2653048133418598, |
|
"learning_rate": 1.8670383459579356e-07, |
|
"loss": 1.5623, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.6752577319587629, |
|
"grad_norm": 1.2245543813976405, |
|
"learning_rate": 1.8660254037844388e-07, |
|
"loss": 1.5039, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.6778350515463918, |
|
"grad_norm": 1.1778675556929805, |
|
"learning_rate": 1.8650088947324475e-07, |
|
"loss": 1.5143, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.6804123711340206, |
|
"grad_norm": 1.1796106429583424, |
|
"learning_rate": 1.863988822988634e-07, |
|
"loss": 1.5867, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.6829896907216495, |
|
"grad_norm": 1.143095546666012, |
|
"learning_rate": 1.8629651927543443e-07, |
|
"loss": 1.4735, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.6855670103092784, |
|
"grad_norm": 1.1803235220482347, |
|
"learning_rate": 1.8619380082455796e-07, |
|
"loss": 1.4606, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6881443298969072, |
|
"grad_norm": 1.2218442431344259, |
|
"learning_rate": 1.8609072736929806e-07, |
|
"loss": 1.5409, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.6907216494845361, |
|
"grad_norm": 1.2044546146531363, |
|
"learning_rate": 1.85987299334181e-07, |
|
"loss": 1.5279, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.6932989690721649, |
|
"grad_norm": 1.2619745333120211, |
|
"learning_rate": 1.8588351714519335e-07, |
|
"loss": 1.5244, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.6958762886597938, |
|
"grad_norm": 1.256000322805203, |
|
"learning_rate": 1.8577938122978042e-07, |
|
"loss": 1.5294, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6984536082474226, |
|
"grad_norm": 1.2356982681147777, |
|
"learning_rate": 1.856748920168443e-07, |
|
"loss": 1.5036, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.7010309278350515, |
|
"grad_norm": 1.2037362943983936, |
|
"learning_rate": 1.855700499367423e-07, |
|
"loss": 1.5235, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.7036082474226805, |
|
"grad_norm": 1.2017143929693659, |
|
"learning_rate": 1.85464855421285e-07, |
|
"loss": 1.4204, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.7061855670103093, |
|
"grad_norm": 1.1908996404734937, |
|
"learning_rate": 1.8535930890373465e-07, |
|
"loss": 1.4969, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7087628865979382, |
|
"grad_norm": 1.1577329971672512, |
|
"learning_rate": 1.8525341081880312e-07, |
|
"loss": 1.5319, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.711340206185567, |
|
"grad_norm": 1.1714981246895275, |
|
"learning_rate": 1.8514716160265045e-07, |
|
"loss": 1.4177, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.7139175257731959, |
|
"grad_norm": 1.1688981848930113, |
|
"learning_rate": 1.8504056169288274e-07, |
|
"loss": 1.5234, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.7164948453608248, |
|
"grad_norm": 1.176710170060508, |
|
"learning_rate": 1.8493361152855057e-07, |
|
"loss": 1.499, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.7190721649484536, |
|
"grad_norm": 1.1039383442864374, |
|
"learning_rate": 1.8482631155014703e-07, |
|
"loss": 1.5258, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.7216494845360825, |
|
"grad_norm": 1.232497346510154, |
|
"learning_rate": 1.84718662199606e-07, |
|
"loss": 1.5564, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7242268041237113, |
|
"grad_norm": 1.1628995381634444, |
|
"learning_rate": 1.8461066392030046e-07, |
|
"loss": 1.4091, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.7268041237113402, |
|
"grad_norm": 1.2777142820565022, |
|
"learning_rate": 1.8450231715704026e-07, |
|
"loss": 1.4754, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.729381443298969, |
|
"grad_norm": 1.2162243240659913, |
|
"learning_rate": 1.843936223560707e-07, |
|
"loss": 1.5473, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.7319587628865979, |
|
"grad_norm": 1.2147904802438685, |
|
"learning_rate": 1.8428457996507053e-07, |
|
"loss": 1.5296, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7345360824742269, |
|
"grad_norm": 1.19577901711321, |
|
"learning_rate": 1.8417519043315004e-07, |
|
"loss": 1.542, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7371134020618557, |
|
"grad_norm": 1.252475138336633, |
|
"learning_rate": 1.8406545421084938e-07, |
|
"loss": 1.5293, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.7396907216494846, |
|
"grad_norm": 1.1515656379492916, |
|
"learning_rate": 1.8395537175013654e-07, |
|
"loss": 1.5272, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.7422680412371134, |
|
"grad_norm": 1.1517700578396561, |
|
"learning_rate": 1.8384494350440552e-07, |
|
"loss": 1.5133, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.7448453608247423, |
|
"grad_norm": 1.217323252639824, |
|
"learning_rate": 1.8373416992847458e-07, |
|
"loss": 1.5009, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.7474226804123711, |
|
"grad_norm": 1.1814204725087243, |
|
"learning_rate": 1.8362305147858428e-07, |
|
"loss": 1.4538, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.1842613200601082, |
|
"learning_rate": 1.835115886123955e-07, |
|
"loss": 1.3816, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.7525773195876289, |
|
"grad_norm": 1.2063574196502098, |
|
"learning_rate": 1.8339978178898778e-07, |
|
"loss": 1.5965, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.7551546391752577, |
|
"grad_norm": 1.2685230099116653, |
|
"learning_rate": 1.8328763146885725e-07, |
|
"loss": 1.5637, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.7577319587628866, |
|
"grad_norm": 1.295213064366882, |
|
"learning_rate": 1.8317513811391476e-07, |
|
"loss": 1.5592, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.7577319587628866, |
|
"eval_loss": 1.5018398761749268, |
|
"eval_runtime": 78.561, |
|
"eval_samples_per_second": 21.168, |
|
"eval_steps_per_second": 1.324, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.7603092783505154, |
|
"grad_norm": 1.1669863622367527, |
|
"learning_rate": 1.830623021874841e-07, |
|
"loss": 1.5081, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.7628865979381443, |
|
"grad_norm": 1.1910397422917334, |
|
"learning_rate": 1.8294912415429992e-07, |
|
"loss": 1.523, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.7654639175257731, |
|
"grad_norm": 1.1665026656613802, |
|
"learning_rate": 1.8283560448050594e-07, |
|
"loss": 1.4753, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.7680412371134021, |
|
"grad_norm": 1.212187645390271, |
|
"learning_rate": 1.8272174363365297e-07, |
|
"loss": 1.4983, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.770618556701031, |
|
"grad_norm": 1.2227876601034444, |
|
"learning_rate": 1.8260754208269701e-07, |
|
"loss": 1.5019, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.7731958762886598, |
|
"grad_norm": 1.2358555763549743, |
|
"learning_rate": 1.8249300029799733e-07, |
|
"loss": 1.5965, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7757731958762887, |
|
"grad_norm": 1.187640438130257, |
|
"learning_rate": 1.8237811875131444e-07, |
|
"loss": 1.591, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.7783505154639175, |
|
"grad_norm": 1.2214707732869985, |
|
"learning_rate": 1.8226289791580828e-07, |
|
"loss": 1.5274, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.7809278350515464, |
|
"grad_norm": 1.2019657180078016, |
|
"learning_rate": 1.8214733826603625e-07, |
|
"loss": 1.5021, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.7835051546391752, |
|
"grad_norm": 1.16960231687607, |
|
"learning_rate": 1.820314402779511e-07, |
|
"loss": 1.5763, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.7860824742268041, |
|
"grad_norm": 1.152389731802479, |
|
"learning_rate": 1.8191520442889918e-07, |
|
"loss": 1.5176, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.788659793814433, |
|
"grad_norm": 1.1132515669118002, |
|
"learning_rate": 1.8179863119761833e-07, |
|
"loss": 1.4634, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.7912371134020618, |
|
"grad_norm": 1.1607539313280772, |
|
"learning_rate": 1.8168172106423606e-07, |
|
"loss": 1.4798, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.7938144329896907, |
|
"grad_norm": 1.2145359718563615, |
|
"learning_rate": 1.8156447451026728e-07, |
|
"loss": 1.594, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.7963917525773195, |
|
"grad_norm": 1.1870844292463605, |
|
"learning_rate": 1.814468920186127e-07, |
|
"loss": 1.478, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.7989690721649485, |
|
"grad_norm": 1.1233767004431354, |
|
"learning_rate": 1.8132897407355653e-07, |
|
"loss": 1.5882, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8015463917525774, |
|
"grad_norm": 1.1738330684693277, |
|
"learning_rate": 1.8121072116076464e-07, |
|
"loss": 1.4284, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.8041237113402062, |
|
"grad_norm": 1.247978839030236, |
|
"learning_rate": 1.8109213376728257e-07, |
|
"loss": 1.5824, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.8067010309278351, |
|
"grad_norm": 1.2318777988562417, |
|
"learning_rate": 1.8097321238153336e-07, |
|
"loss": 1.5185, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.8092783505154639, |
|
"grad_norm": 1.137207160847728, |
|
"learning_rate": 1.808539574933158e-07, |
|
"loss": 1.448, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.8118556701030928, |
|
"grad_norm": 1.203622066974504, |
|
"learning_rate": 1.8073436959380212e-07, |
|
"loss": 1.5003, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.8144329896907216, |
|
"grad_norm": 1.1618827104260305, |
|
"learning_rate": 1.8061444917553627e-07, |
|
"loss": 1.4603, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.8170103092783505, |
|
"grad_norm": 1.1455984024451822, |
|
"learning_rate": 1.8049419673243164e-07, |
|
"loss": 1.4366, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.8195876288659794, |
|
"grad_norm": 1.1500253179290463, |
|
"learning_rate": 1.803736127597691e-07, |
|
"loss": 1.5403, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.8221649484536082, |
|
"grad_norm": 1.2632412244799347, |
|
"learning_rate": 1.8025269775419507e-07, |
|
"loss": 1.5003, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.8247422680412371, |
|
"grad_norm": 1.142698108221298, |
|
"learning_rate": 1.8013145221371934e-07, |
|
"loss": 1.4732, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8273195876288659, |
|
"grad_norm": 1.2124460871646654, |
|
"learning_rate": 1.8000987663771306e-07, |
|
"loss": 1.5311, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.8298969072164949, |
|
"grad_norm": 1.2348590930541292, |
|
"learning_rate": 1.798879715269067e-07, |
|
"loss": 1.5741, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.8324742268041238, |
|
"grad_norm": 1.1498349377386237, |
|
"learning_rate": 1.79765737383388e-07, |
|
"loss": 1.361, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.8350515463917526, |
|
"grad_norm": 1.189403441559741, |
|
"learning_rate": 1.796431747105998e-07, |
|
"loss": 1.5002, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.8376288659793815, |
|
"grad_norm": 1.2170644285030623, |
|
"learning_rate": 1.7952028401333816e-07, |
|
"loss": 1.5508, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.8402061855670103, |
|
"grad_norm": 1.2305649106918, |
|
"learning_rate": 1.793970657977501e-07, |
|
"loss": 1.5185, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.8427835051546392, |
|
"grad_norm": 1.1928858589906648, |
|
"learning_rate": 1.7927352057133156e-07, |
|
"loss": 1.5859, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.845360824742268, |
|
"grad_norm": 1.2402447474397933, |
|
"learning_rate": 1.791496488429254e-07, |
|
"loss": 1.4482, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.8479381443298969, |
|
"grad_norm": 1.3004615784711493, |
|
"learning_rate": 1.7902545112271916e-07, |
|
"loss": 1.4996, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.8505154639175257, |
|
"grad_norm": 1.2029226714523475, |
|
"learning_rate": 1.7890092792224314e-07, |
|
"loss": 1.4729, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8530927835051546, |
|
"grad_norm": 1.1646016402710766, |
|
"learning_rate": 1.7877607975436803e-07, |
|
"loss": 1.511, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.8556701030927835, |
|
"grad_norm": 1.1748241861140345, |
|
"learning_rate": 1.7865090713330312e-07, |
|
"loss": 1.5406, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.8582474226804123, |
|
"grad_norm": 1.1988219111182623, |
|
"learning_rate": 1.785254105745939e-07, |
|
"loss": 1.5364, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.8608247422680413, |
|
"grad_norm": 1.2920016906616154, |
|
"learning_rate": 1.7839959059512014e-07, |
|
"loss": 1.5188, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.8634020618556701, |
|
"grad_norm": 1.1390205414249481, |
|
"learning_rate": 1.7827344771309362e-07, |
|
"loss": 1.4749, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.865979381443299, |
|
"grad_norm": 1.207725667468718, |
|
"learning_rate": 1.7814698244805603e-07, |
|
"loss": 1.5144, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.8685567010309279, |
|
"grad_norm": 1.2708389359824341, |
|
"learning_rate": 1.780201953208769e-07, |
|
"loss": 1.4633, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.8711340206185567, |
|
"grad_norm": 1.3588744934998203, |
|
"learning_rate": 1.7789308685375146e-07, |
|
"loss": 1.5194, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.8737113402061856, |
|
"grad_norm": 1.1714299642439896, |
|
"learning_rate": 1.7776565757019829e-07, |
|
"loss": 1.4378, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.8762886597938144, |
|
"grad_norm": 1.2349197329756814, |
|
"learning_rate": 1.7763790799505743e-07, |
|
"loss": 1.501, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8788659793814433, |
|
"grad_norm": 1.145994840644305, |
|
"learning_rate": 1.7750983865448804e-07, |
|
"loss": 1.3569, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.8814432989690721, |
|
"grad_norm": 1.147878510470048, |
|
"learning_rate": 1.773814500759663e-07, |
|
"loss": 1.4907, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.884020618556701, |
|
"grad_norm": 1.2101479142325238, |
|
"learning_rate": 1.7725274278828324e-07, |
|
"loss": 1.5045, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.884020618556701, |
|
"eval_loss": 1.4945380687713623, |
|
"eval_runtime": 78.6415, |
|
"eval_samples_per_second": 21.147, |
|
"eval_steps_per_second": 1.322, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.8865979381443299, |
|
"grad_norm": 1.2038990843843793, |
|
"learning_rate": 1.7712371732154257e-07, |
|
"loss": 1.4554, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.8891752577319587, |
|
"grad_norm": 1.1472367305664413, |
|
"learning_rate": 1.7699437420715838e-07, |
|
"loss": 1.4611, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.8917525773195877, |
|
"grad_norm": 1.2170090657627353, |
|
"learning_rate": 1.768647139778532e-07, |
|
"loss": 1.4619, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.8943298969072165, |
|
"grad_norm": 1.1815824919293882, |
|
"learning_rate": 1.7673473716765553e-07, |
|
"loss": 1.5022, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.8969072164948454, |
|
"grad_norm": 1.1967591939256936, |
|
"learning_rate": 1.766044443118978e-07, |
|
"loss": 1.4812, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.8994845360824743, |
|
"grad_norm": 1.228975686058958, |
|
"learning_rate": 1.7647383594721413e-07, |
|
"loss": 1.4943, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.9020618556701031, |
|
"grad_norm": 1.2132506060158343, |
|
"learning_rate": 1.7634291261153818e-07, |
|
"loss": 1.4852, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.904639175257732, |
|
"grad_norm": 1.2581183528068558, |
|
"learning_rate": 1.7621167484410076e-07, |
|
"loss": 1.5311, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.9072164948453608, |
|
"grad_norm": 1.1976025658343157, |
|
"learning_rate": 1.7608012318542776e-07, |
|
"loss": 1.5623, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.9097938144329897, |
|
"grad_norm": 1.2081117148971663, |
|
"learning_rate": 1.7594825817733804e-07, |
|
"loss": 1.4877, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.9123711340206185, |
|
"grad_norm": 1.25102310904074, |
|
"learning_rate": 1.7581608036294074e-07, |
|
"loss": 1.5166, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.9149484536082474, |
|
"grad_norm": 1.1251058107211171, |
|
"learning_rate": 1.7568359028663362e-07, |
|
"loss": 1.4818, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.9175257731958762, |
|
"grad_norm": 1.162404179159399, |
|
"learning_rate": 1.7555078849410042e-07, |
|
"loss": 1.4684, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.9201030927835051, |
|
"grad_norm": 1.1939177374027512, |
|
"learning_rate": 1.754176755323088e-07, |
|
"loss": 1.3906, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.9226804123711341, |
|
"grad_norm": 1.2277839442625762, |
|
"learning_rate": 1.7528425194950793e-07, |
|
"loss": 1.5206, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.9252577319587629, |
|
"grad_norm": 1.1589149786868607, |
|
"learning_rate": 1.7515051829522643e-07, |
|
"loss": 1.5117, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.9278350515463918, |
|
"grad_norm": 1.161766915938516, |
|
"learning_rate": 1.7501647512026993e-07, |
|
"loss": 1.5142, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9304123711340206, |
|
"grad_norm": 1.1895671903848675, |
|
"learning_rate": 1.7488212297671897e-07, |
|
"loss": 1.5279, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.9329896907216495, |
|
"grad_norm": 1.3331865087236399, |
|
"learning_rate": 1.7474746241792646e-07, |
|
"loss": 1.4476, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.9355670103092784, |
|
"grad_norm": 1.1227191881644327, |
|
"learning_rate": 1.746124939985158e-07, |
|
"loss": 1.436, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.9381443298969072, |
|
"grad_norm": 1.1453288975869358, |
|
"learning_rate": 1.7447721827437817e-07, |
|
"loss": 1.4721, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.9407216494845361, |
|
"grad_norm": 1.1800301680843552, |
|
"learning_rate": 1.7434163580267056e-07, |
|
"loss": 1.4648, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.9432989690721649, |
|
"grad_norm": 1.1592086626138536, |
|
"learning_rate": 1.7420574714181327e-07, |
|
"loss": 1.4645, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.9458762886597938, |
|
"grad_norm": 1.1969987793516494, |
|
"learning_rate": 1.7406955285148782e-07, |
|
"loss": 1.4628, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.9484536082474226, |
|
"grad_norm": 1.25319893461736, |
|
"learning_rate": 1.7393305349263432e-07, |
|
"loss": 1.5327, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.9510309278350515, |
|
"grad_norm": 1.1235076122412295, |
|
"learning_rate": 1.7379624962744954e-07, |
|
"loss": 1.457, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.9536082474226805, |
|
"grad_norm": 1.215770975088775, |
|
"learning_rate": 1.7365914181938438e-07, |
|
"loss": 1.4802, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.9561855670103093, |
|
"grad_norm": 1.1400445439752551, |
|
"learning_rate": 1.7352173063314147e-07, |
|
"loss": 1.4078, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.9587628865979382, |
|
"grad_norm": 1.219412218457137, |
|
"learning_rate": 1.7338401663467307e-07, |
|
"loss": 1.4863, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.961340206185567, |
|
"grad_norm": 1.2307165231693638, |
|
"learning_rate": 1.732460003911786e-07, |
|
"loss": 1.547, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.9639175257731959, |
|
"grad_norm": 1.1928743718959285, |
|
"learning_rate": 1.731076824711023e-07, |
|
"loss": 1.4681, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.9664948453608248, |
|
"grad_norm": 1.2210774438706382, |
|
"learning_rate": 1.7296906344413101e-07, |
|
"loss": 1.5359, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.9690721649484536, |
|
"grad_norm": 1.1755911854453769, |
|
"learning_rate": 1.7283014388119157e-07, |
|
"loss": 1.5286, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.9716494845360825, |
|
"grad_norm": 1.1189926107564905, |
|
"learning_rate": 1.7269092435444878e-07, |
|
"loss": 1.4309, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.9742268041237113, |
|
"grad_norm": 1.209816536244005, |
|
"learning_rate": 1.7255140543730282e-07, |
|
"loss": 1.4689, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.9768041237113402, |
|
"grad_norm": 1.1866285142861848, |
|
"learning_rate": 1.7241158770438697e-07, |
|
"loss": 1.4972, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.979381443298969, |
|
"grad_norm": 1.1354634757481643, |
|
"learning_rate": 1.722714717315652e-07, |
|
"loss": 1.4873, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9819587628865979, |
|
"grad_norm": 1.2944770552807037, |
|
"learning_rate": 1.7213105809593e-07, |
|
"loss": 1.4974, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.9845360824742269, |
|
"grad_norm": 1.103791679895453, |
|
"learning_rate": 1.719903473757996e-07, |
|
"loss": 1.4338, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.9871134020618557, |
|
"grad_norm": 1.1784721051806777, |
|
"learning_rate": 1.7184934015071594e-07, |
|
"loss": 1.4041, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.9896907216494846, |
|
"grad_norm": 1.1348338130977504, |
|
"learning_rate": 1.7170803700144225e-07, |
|
"loss": 1.4413, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.9922680412371134, |
|
"grad_norm": 1.2250889412679622, |
|
"learning_rate": 1.7156643850996044e-07, |
|
"loss": 1.4629, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.9948453608247423, |
|
"grad_norm": 1.1045983289273678, |
|
"learning_rate": 1.7142454525946888e-07, |
|
"loss": 1.5546, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.9974226804123711, |
|
"grad_norm": 1.1516418913315656, |
|
"learning_rate": 1.7128235783437998e-07, |
|
"loss": 1.5631, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.252168700059035, |
|
"learning_rate": 1.7113987682031778e-07, |
|
"loss": 1.4422, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.0025773195876289, |
|
"grad_norm": 1.189319163542339, |
|
"learning_rate": 1.7099710280411546e-07, |
|
"loss": 1.5383, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.0051546391752577, |
|
"grad_norm": 1.2727165097128585, |
|
"learning_rate": 1.70854036373813e-07, |
|
"loss": 1.5408, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0077319587628866, |
|
"grad_norm": 1.1517050348302873, |
|
"learning_rate": 1.7071067811865473e-07, |
|
"loss": 1.5864, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.0103092783505154, |
|
"grad_norm": 1.3325861122052731, |
|
"learning_rate": 1.7056702862908702e-07, |
|
"loss": 1.5524, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.0103092783505154, |
|
"eval_loss": 1.4885141849517822, |
|
"eval_runtime": 78.424, |
|
"eval_samples_per_second": 21.205, |
|
"eval_steps_per_second": 1.326, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.0128865979381443, |
|
"grad_norm": 1.1135739405983736, |
|
"learning_rate": 1.7042308849675554e-07, |
|
"loss": 1.5054, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.0154639175257731, |
|
"grad_norm": 1.1782103759330078, |
|
"learning_rate": 1.7027885831450317e-07, |
|
"loss": 1.4809, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.018041237113402, |
|
"grad_norm": 1.1307316665373648, |
|
"learning_rate": 1.701343386763674e-07, |
|
"loss": 1.4176, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.0206185567010309, |
|
"grad_norm": 1.2226276517588748, |
|
"learning_rate": 1.6998953017757785e-07, |
|
"loss": 1.5829, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.0231958762886597, |
|
"grad_norm": 1.2403418129653008, |
|
"learning_rate": 1.698444334145539e-07, |
|
"loss": 1.5954, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.0257731958762886, |
|
"grad_norm": 1.1302836106915826, |
|
"learning_rate": 1.6969904898490212e-07, |
|
"loss": 1.4231, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.0283505154639174, |
|
"grad_norm": 1.141960483416689, |
|
"learning_rate": 1.6955337748741405e-07, |
|
"loss": 1.4287, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.0309278350515463, |
|
"grad_norm": 1.196477232474438, |
|
"learning_rate": 1.694074195220634e-07, |
|
"loss": 1.5239, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0335051546391754, |
|
"grad_norm": 1.183187501385808, |
|
"learning_rate": 1.692611756900038e-07, |
|
"loss": 1.497, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.0360824742268042, |
|
"grad_norm": 1.150174147558412, |
|
"learning_rate": 1.691146465935663e-07, |
|
"loss": 1.5532, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.038659793814433, |
|
"grad_norm": 1.2448204002333718, |
|
"learning_rate": 1.689678328362569e-07, |
|
"loss": 1.416, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.041237113402062, |
|
"grad_norm": 1.1109759208202117, |
|
"learning_rate": 1.6882073502275392e-07, |
|
"loss": 1.5012, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.0438144329896908, |
|
"grad_norm": 1.1567096038742686, |
|
"learning_rate": 1.6867335375890566e-07, |
|
"loss": 1.5053, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.0463917525773196, |
|
"grad_norm": 1.1754138924074398, |
|
"learning_rate": 1.6852568965172792e-07, |
|
"loss": 1.5129, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.0489690721649485, |
|
"grad_norm": 1.23193132568122, |
|
"learning_rate": 1.6837774330940136e-07, |
|
"loss": 1.5573, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.0515463917525774, |
|
"grad_norm": 1.154132682102343, |
|
"learning_rate": 1.6822951534126908e-07, |
|
"loss": 1.4258, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.0541237113402062, |
|
"grad_norm": 1.1683702220075676, |
|
"learning_rate": 1.680810063578342e-07, |
|
"loss": 1.493, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.056701030927835, |
|
"grad_norm": 1.1355190434284121, |
|
"learning_rate": 1.6793221697075716e-07, |
|
"loss": 1.5119, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.059278350515464, |
|
"grad_norm": 1.1992497667084585, |
|
"learning_rate": 1.6778314779285324e-07, |
|
"loss": 1.538, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.0618556701030928, |
|
"grad_norm": 1.1517964539720562, |
|
"learning_rate": 1.6763379943809027e-07, |
|
"loss": 1.4665, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.0644329896907216, |
|
"grad_norm": 1.0984210499840694, |
|
"learning_rate": 1.6748417252158577e-07, |
|
"loss": 1.4328, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.0670103092783505, |
|
"grad_norm": 1.1299450982658101, |
|
"learning_rate": 1.6733426765960456e-07, |
|
"loss": 1.5028, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.0695876288659794, |
|
"grad_norm": 1.212850591316243, |
|
"learning_rate": 1.6718408546955635e-07, |
|
"loss": 1.5834, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.0721649484536082, |
|
"grad_norm": 1.187341231477269, |
|
"learning_rate": 1.6703362656999299e-07, |
|
"loss": 1.5069, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.074742268041237, |
|
"grad_norm": 1.2469684651532016, |
|
"learning_rate": 1.6688289158060593e-07, |
|
"loss": 1.518, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.077319587628866, |
|
"grad_norm": 1.254398054291776, |
|
"learning_rate": 1.6673188112222395e-07, |
|
"loss": 1.578, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.0798969072164948, |
|
"grad_norm": 1.1499801218824168, |
|
"learning_rate": 1.665805958168102e-07, |
|
"loss": 1.4979, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.0824742268041236, |
|
"grad_norm": 1.1976396691121443, |
|
"learning_rate": 1.664290362874599e-07, |
|
"loss": 1.4914, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.0850515463917525, |
|
"grad_norm": 1.1348401564795523, |
|
"learning_rate": 1.662772031583978e-07, |
|
"loss": 1.3902, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.0876288659793814, |
|
"grad_norm": 1.2267166932133524, |
|
"learning_rate": 1.6612509705497542e-07, |
|
"loss": 1.4352, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.0902061855670102, |
|
"grad_norm": 1.2873463533597629, |
|
"learning_rate": 1.6597271860366856e-07, |
|
"loss": 1.4478, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.092783505154639, |
|
"grad_norm": 1.3679857975054832, |
|
"learning_rate": 1.6582006843207478e-07, |
|
"loss": 1.5168, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.0953608247422681, |
|
"grad_norm": 1.326554289290517, |
|
"learning_rate": 1.6566714716891078e-07, |
|
"loss": 1.5008, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.097938144329897, |
|
"grad_norm": 1.168969016350491, |
|
"learning_rate": 1.6551395544400978e-07, |
|
"loss": 1.4917, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.1005154639175259, |
|
"grad_norm": 1.2413798753485674, |
|
"learning_rate": 1.6536049388831893e-07, |
|
"loss": 1.4502, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.1030927835051547, |
|
"grad_norm": 1.1635621820926023, |
|
"learning_rate": 1.652067631338967e-07, |
|
"loss": 1.557, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.1056701030927836, |
|
"grad_norm": 1.1573375306268514, |
|
"learning_rate": 1.6505276381391036e-07, |
|
"loss": 1.4244, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.1082474226804124, |
|
"grad_norm": 1.2312412177915255, |
|
"learning_rate": 1.6489849656263335e-07, |
|
"loss": 1.5494, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.1108247422680413, |
|
"grad_norm": 1.219284880839308, |
|
"learning_rate": 1.647439620154425e-07, |
|
"loss": 1.5306, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.1134020618556701, |
|
"grad_norm": 1.173558682623126, |
|
"learning_rate": 1.6458916080881563e-07, |
|
"loss": 1.4429, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.0025773195876289, |
|
"grad_norm": 1.229487690642213, |
|
"learning_rate": 1.6443409358032887e-07, |
|
"loss": 1.5753, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.0051546391752577, |
|
"grad_norm": 1.2105170741564812, |
|
"learning_rate": 1.6427876096865392e-07, |
|
"loss": 1.5334, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.0077319587628866, |
|
"grad_norm": 1.204008054808549, |
|
"learning_rate": 1.6412316361355562e-07, |
|
"loss": 1.42, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.0103092783505154, |
|
"grad_norm": 1.1326791826110472, |
|
"learning_rate": 1.6396730215588912e-07, |
|
"loss": 1.4714, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.0128865979381443, |
|
"grad_norm": 1.1200550697122906, |
|
"learning_rate": 1.6381117723759734e-07, |
|
"loss": 1.514, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.0154639175257731, |
|
"grad_norm": 1.1890623492712462, |
|
"learning_rate": 1.6365478950170833e-07, |
|
"loss": 1.4181, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.018041237113402, |
|
"grad_norm": 1.1631198253400261, |
|
"learning_rate": 1.6349813959233255e-07, |
|
"loss": 1.4062, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.0206185567010309, |
|
"grad_norm": 1.1360996622048518, |
|
"learning_rate": 1.6334122815466031e-07, |
|
"loss": 1.4486, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0231958762886597, |
|
"grad_norm": 1.1864758464899412, |
|
"learning_rate": 1.6318405583495913e-07, |
|
"loss": 1.5347, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.0231958762886597, |
|
"eval_loss": 1.4830812215805054, |
|
"eval_runtime": 78.5114, |
|
"eval_samples_per_second": 21.182, |
|
"eval_steps_per_second": 1.325, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.0257731958762886, |
|
"grad_norm": 1.1301160006601543, |
|
"learning_rate": 1.6302662328057085e-07, |
|
"loss": 1.4353, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.0283505154639174, |
|
"grad_norm": 1.1894059515483042, |
|
"learning_rate": 1.6286893113990932e-07, |
|
"loss": 1.469, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.0309278350515463, |
|
"grad_norm": 1.1496261846772073, |
|
"learning_rate": 1.627109800624574e-07, |
|
"loss": 1.5501, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.0335051546391754, |
|
"grad_norm": 1.2088185832357161, |
|
"learning_rate": 1.6255277069876451e-07, |
|
"loss": 1.4899, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.0360824742268042, |
|
"grad_norm": 1.1253812221554047, |
|
"learning_rate": 1.6239430370044387e-07, |
|
"loss": 1.4122, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.038659793814433, |
|
"grad_norm": 1.1716232931347121, |
|
"learning_rate": 1.6223557972016973e-07, |
|
"loss": 1.439, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.041237113402062, |
|
"grad_norm": 1.18342528126353, |
|
"learning_rate": 1.6207659941167485e-07, |
|
"loss": 1.5094, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.0438144329896908, |
|
"grad_norm": 1.2039062898512076, |
|
"learning_rate": 1.6191736342974767e-07, |
|
"loss": 1.4619, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.0463917525773196, |
|
"grad_norm": 1.2183703075903023, |
|
"learning_rate": 1.617578724302297e-07, |
|
"loss": 1.5232, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.0489690721649485, |
|
"grad_norm": 1.1388070881208434, |
|
"learning_rate": 1.615981270700128e-07, |
|
"loss": 1.4638, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.0515463917525774, |
|
"grad_norm": 1.0877380908149572, |
|
"learning_rate": 1.6143812800703642e-07, |
|
"loss": 1.4447, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.0541237113402062, |
|
"grad_norm": 1.1716268781083103, |
|
"learning_rate": 1.6127787590028495e-07, |
|
"loss": 1.5212, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.056701030927835, |
|
"grad_norm": 1.107434556978612, |
|
"learning_rate": 1.6111737140978493e-07, |
|
"loss": 1.4558, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.059278350515464, |
|
"grad_norm": 1.2519354029249565, |
|
"learning_rate": 1.609566151966025e-07, |
|
"loss": 1.4528, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.0618556701030928, |
|
"grad_norm": 1.1919323581174677, |
|
"learning_rate": 1.6079560792284045e-07, |
|
"loss": 1.5621, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.0644329896907216, |
|
"grad_norm": 1.1817947401366415, |
|
"learning_rate": 1.6063435025163568e-07, |
|
"loss": 1.4662, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.0670103092783505, |
|
"grad_norm": 1.2557632574926887, |
|
"learning_rate": 1.6047284284715642e-07, |
|
"loss": 1.4804, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.0695876288659794, |
|
"grad_norm": 1.2611184908202628, |
|
"learning_rate": 1.6031108637459932e-07, |
|
"loss": 1.3898, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.0721649484536082, |
|
"grad_norm": 1.2900278262304008, |
|
"learning_rate": 1.6014908150018703e-07, |
|
"loss": 1.5064, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.074742268041237, |
|
"grad_norm": 1.195779708533936, |
|
"learning_rate": 1.5998682889116524e-07, |
|
"loss": 1.5224, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.077319587628866, |
|
"grad_norm": 1.1566664249843968, |
|
"learning_rate": 1.5982432921579993e-07, |
|
"loss": 1.4517, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.0798969072164948, |
|
"grad_norm": 1.2001020296312388, |
|
"learning_rate": 1.596615831433747e-07, |
|
"loss": 1.5602, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.0824742268041236, |
|
"grad_norm": 1.1943899233375934, |
|
"learning_rate": 1.5949859134418796e-07, |
|
"loss": 1.3757, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.0850515463917525, |
|
"grad_norm": 1.231964645169981, |
|
"learning_rate": 1.5933535448955027e-07, |
|
"loss": 1.4859, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.0876288659793814, |
|
"grad_norm": 1.1068734683342414, |
|
"learning_rate": 1.5917187325178137e-07, |
|
"loss": 1.4629, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.0902061855670102, |
|
"grad_norm": 1.1513773116941175, |
|
"learning_rate": 1.590081483042076e-07, |
|
"loss": 1.5125, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.0927835051546393, |
|
"grad_norm": 1.265359820624344, |
|
"learning_rate": 1.5884418032115906e-07, |
|
"loss": 1.5204, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.0953608247422681, |
|
"grad_norm": 1.1596012619544869, |
|
"learning_rate": 1.5867996997796683e-07, |
|
"loss": 1.4528, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.097938144329897, |
|
"grad_norm": 1.1953930948748877, |
|
"learning_rate": 1.5851551795096025e-07, |
|
"loss": 1.404, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.1005154639175259, |
|
"grad_norm": 1.1467999018042732, |
|
"learning_rate": 1.5835082491746393e-07, |
|
"loss": 1.5314, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.1030927835051547, |
|
"grad_norm": 1.208554802219746, |
|
"learning_rate": 1.581858915557953e-07, |
|
"loss": 1.4632, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.1056701030927836, |
|
"grad_norm": 1.210149302840143, |
|
"learning_rate": 1.580207185452614e-07, |
|
"loss": 1.4828, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.1082474226804124, |
|
"grad_norm": 1.0949101750229728, |
|
"learning_rate": 1.5785530656615654e-07, |
|
"loss": 1.4612, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.1108247422680413, |
|
"grad_norm": 1.1550991304470553, |
|
"learning_rate": 1.576896562997591e-07, |
|
"loss": 1.5112, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.1134020618556701, |
|
"grad_norm": 1.267086705459486, |
|
"learning_rate": 1.5752376842832898e-07, |
|
"loss": 1.5086, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.115979381443299, |
|
"grad_norm": 1.157659801945543, |
|
"learning_rate": 1.573576436351046e-07, |
|
"loss": 1.4721, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.1185567010309279, |
|
"grad_norm": 1.1792779255646542, |
|
"learning_rate": 1.571912826043003e-07, |
|
"loss": 1.4216, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.1211340206185567, |
|
"grad_norm": 1.279434721476292, |
|
"learning_rate": 1.5702468602110331e-07, |
|
"loss": 1.4098, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.1237113402061856, |
|
"grad_norm": 1.2412716991217037, |
|
"learning_rate": 1.5685785457167113e-07, |
|
"loss": 1.4855, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.1262886597938144, |
|
"grad_norm": 1.1878566044688987, |
|
"learning_rate": 1.5669078894312847e-07, |
|
"loss": 1.5252, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.1288659793814433, |
|
"grad_norm": 1.2441727908973987, |
|
"learning_rate": 1.565234898235646e-07, |
|
"loss": 1.5462, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.1314432989690721, |
|
"grad_norm": 1.176061624777031, |
|
"learning_rate": 1.5635595790203056e-07, |
|
"loss": 1.5135, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.134020618556701, |
|
"grad_norm": 1.246481799384192, |
|
"learning_rate": 1.5618819386853602e-07, |
|
"loss": 1.5357, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.1365979381443299, |
|
"grad_norm": 1.2042279646873306, |
|
"learning_rate": 1.5602019841404688e-07, |
|
"loss": 1.5146, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.1391752577319587, |
|
"grad_norm": 1.1664753868373192, |
|
"learning_rate": 1.5585197223048202e-07, |
|
"loss": 1.5007, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.1417525773195876, |
|
"grad_norm": 1.0786695822166654, |
|
"learning_rate": 1.5568351601071068e-07, |
|
"loss": 1.4637, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.1443298969072164, |
|
"grad_norm": 1.1782507265833873, |
|
"learning_rate": 1.5551483044854952e-07, |
|
"loss": 1.4811, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.1469072164948453, |
|
"grad_norm": 1.2326350516083906, |
|
"learning_rate": 1.5534591623875985e-07, |
|
"loss": 1.5482, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.1494845360824741, |
|
"grad_norm": 1.3932475474558166, |
|
"learning_rate": 1.551767740770446e-07, |
|
"loss": 1.4994, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.1494845360824741, |
|
"eval_loss": 1.4784166812896729, |
|
"eval_runtime": 78.5816, |
|
"eval_samples_per_second": 21.163, |
|
"eval_steps_per_second": 1.323, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.152061855670103, |
|
"grad_norm": 1.2782842614630645, |
|
"learning_rate": 1.5500740466004562e-07, |
|
"loss": 1.4751, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.1546391752577319, |
|
"grad_norm": 1.216799121655535, |
|
"learning_rate": 1.5483780868534083e-07, |
|
"loss": 1.4724, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.1572164948453607, |
|
"grad_norm": 1.1868499010457458, |
|
"learning_rate": 1.546679868514411e-07, |
|
"loss": 1.4335, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.1597938144329896, |
|
"grad_norm": 1.199212625101753, |
|
"learning_rate": 1.544979398577877e-07, |
|
"loss": 1.428, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.1623711340206186, |
|
"grad_norm": 1.1357296953077098, |
|
"learning_rate": 1.543276684047491e-07, |
|
"loss": 1.4542, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.1649484536082475, |
|
"grad_norm": 1.213487385523563, |
|
"learning_rate": 1.5415717319361846e-07, |
|
"loss": 1.4724, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.1675257731958764, |
|
"grad_norm": 1.1099648565570772, |
|
"learning_rate": 1.5398645492661028e-07, |
|
"loss": 1.4254, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.1701030927835052, |
|
"grad_norm": 1.1324706525701729, |
|
"learning_rate": 1.5381551430685795e-07, |
|
"loss": 1.5048, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.172680412371134, |
|
"grad_norm": 1.312867551517799, |
|
"learning_rate": 1.5364435203841056e-07, |
|
"loss": 1.4713, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.175257731958763, |
|
"grad_norm": 1.1933326421003594, |
|
"learning_rate": 1.5347296882623017e-07, |
|
"loss": 1.5138, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1778350515463918, |
|
"grad_norm": 1.0985685695284346, |
|
"learning_rate": 1.533013653761887e-07, |
|
"loss": 1.433, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.1804123711340206, |
|
"grad_norm": 1.1149163975715322, |
|
"learning_rate": 1.5312954239506533e-07, |
|
"loss": 1.3835, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.1829896907216495, |
|
"grad_norm": 1.3227767494195912, |
|
"learning_rate": 1.529575005905433e-07, |
|
"loss": 1.4895, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.1855670103092784, |
|
"grad_norm": 1.214579951187228, |
|
"learning_rate": 1.5278524067120717e-07, |
|
"loss": 1.5998, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.1881443298969072, |
|
"grad_norm": 1.242415460112634, |
|
"learning_rate": 1.5261276334653982e-07, |
|
"loss": 1.419, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.190721649484536, |
|
"grad_norm": 1.2389773021924564, |
|
"learning_rate": 1.5244006932691953e-07, |
|
"loss": 1.4202, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.193298969072165, |
|
"grad_norm": 1.2349193613971634, |
|
"learning_rate": 1.5226715932361716e-07, |
|
"loss": 1.5457, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.1958762886597938, |
|
"grad_norm": 1.1148921709276238, |
|
"learning_rate": 1.5209403404879302e-07, |
|
"loss": 1.3884, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.1984536082474226, |
|
"grad_norm": 1.2416754407978092, |
|
"learning_rate": 1.5192069421549416e-07, |
|
"loss": 1.4643, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.2010309278350515, |
|
"grad_norm": 1.240689395283768, |
|
"learning_rate": 1.5174714053765122e-07, |
|
"loss": 1.572, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.2036082474226804, |
|
"grad_norm": 1.140745518174075, |
|
"learning_rate": 1.5157337373007578e-07, |
|
"loss": 1.3663, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.2061855670103092, |
|
"grad_norm": 1.1312657539313165, |
|
"learning_rate": 1.5139939450845698e-07, |
|
"loss": 1.4681, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.208762886597938, |
|
"grad_norm": 1.1613965817840117, |
|
"learning_rate": 1.51225203589359e-07, |
|
"loss": 1.548, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.211340206185567, |
|
"grad_norm": 1.1253218321610134, |
|
"learning_rate": 1.5105080169021789e-07, |
|
"loss": 1.4644, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.2139175257731958, |
|
"grad_norm": 1.1202729459915262, |
|
"learning_rate": 1.5087618952933866e-07, |
|
"loss": 1.4874, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.2164948453608249, |
|
"grad_norm": 1.1754915638068841, |
|
"learning_rate": 1.5070136782589233e-07, |
|
"loss": 1.4904, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.2190721649484537, |
|
"grad_norm": 1.211459122094429, |
|
"learning_rate": 1.5052633729991294e-07, |
|
"loss": 1.4832, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.2216494845360826, |
|
"grad_norm": 1.2489759850317173, |
|
"learning_rate": 1.5035109867229456e-07, |
|
"loss": 1.4464, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.2242268041237114, |
|
"grad_norm": 1.2194317834170105, |
|
"learning_rate": 1.5017565266478848e-07, |
|
"loss": 1.4897, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.2268041237113403, |
|
"grad_norm": 1.1036732258357687, |
|
"learning_rate": 1.5e-07, |
|
"loss": 1.491, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.2293814432989691, |
|
"grad_norm": 1.1658107658884465, |
|
"learning_rate": 1.4982414140138563e-07, |
|
"loss": 1.4678, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.231958762886598, |
|
"grad_norm": 1.2704801398111358, |
|
"learning_rate": 1.4964807759325008e-07, |
|
"loss": 1.3781, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.2345360824742269, |
|
"grad_norm": 1.1848897409786574, |
|
"learning_rate": 1.4947180930074323e-07, |
|
"loss": 1.4799, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.2371134020618557, |
|
"grad_norm": 1.2016447040520333, |
|
"learning_rate": 1.492953372498571e-07, |
|
"loss": 1.5686, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.2396907216494846, |
|
"grad_norm": 1.2911746325303657, |
|
"learning_rate": 1.4911866216742307e-07, |
|
"loss": 1.5241, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.2422680412371134, |
|
"grad_norm": 1.1990990248512616, |
|
"learning_rate": 1.4894178478110855e-07, |
|
"loss": 1.5357, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.2448453608247423, |
|
"grad_norm": 1.149144012214145, |
|
"learning_rate": 1.4876470581941434e-07, |
|
"loss": 1.4571, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.2474226804123711, |
|
"grad_norm": 1.198321859008649, |
|
"learning_rate": 1.485874260116714e-07, |
|
"loss": 1.5113, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.2113266741136735, |
|
"learning_rate": 1.4840994608803788e-07, |
|
"loss": 1.4782, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.2525773195876289, |
|
"grad_norm": 1.1425317175556289, |
|
"learning_rate": 1.4823226677949622e-07, |
|
"loss": 1.5012, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.2551546391752577, |
|
"grad_norm": 1.268980235594048, |
|
"learning_rate": 1.4805438881784995e-07, |
|
"loss": 1.4529, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.2577319587628866, |
|
"grad_norm": 1.151209820959519, |
|
"learning_rate": 1.478763129357209e-07, |
|
"loss": 1.4734, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.2603092783505154, |
|
"grad_norm": 1.2260752095042977, |
|
"learning_rate": 1.4769803986654603e-07, |
|
"loss": 1.4896, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.2628865979381443, |
|
"grad_norm": 1.2017887268263763, |
|
"learning_rate": 1.4751957034457445e-07, |
|
"loss": 1.4667, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.2654639175257731, |
|
"grad_norm": 1.2271959233872554, |
|
"learning_rate": 1.4734090510486432e-07, |
|
"loss": 1.4888, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.268041237113402, |
|
"grad_norm": 1.2197382019523413, |
|
"learning_rate": 1.4716204488328006e-07, |
|
"loss": 1.5358, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.2706185567010309, |
|
"grad_norm": 1.1416105765632265, |
|
"learning_rate": 1.4698299041648902e-07, |
|
"loss": 1.4275, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.2731958762886597, |
|
"grad_norm": 1.2103999127902116, |
|
"learning_rate": 1.468037424419586e-07, |
|
"loss": 1.4822, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.2757731958762886, |
|
"grad_norm": 1.2127169663908728, |
|
"learning_rate": 1.4662430169795328e-07, |
|
"loss": 1.4477, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.2757731958762886, |
|
"eval_loss": 1.474165678024292, |
|
"eval_runtime": 78.6592, |
|
"eval_samples_per_second": 21.142, |
|
"eval_steps_per_second": 1.322, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.2783505154639174, |
|
"grad_norm": 1.2719723678439783, |
|
"learning_rate": 1.464446689235314e-07, |
|
"loss": 1.5694, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.2809278350515463, |
|
"grad_norm": 1.094905461428815, |
|
"learning_rate": 1.4626484485854228e-07, |
|
"loss": 1.4405, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.2835051546391751, |
|
"grad_norm": 1.1572746515704029, |
|
"learning_rate": 1.4608483024362303e-07, |
|
"loss": 1.49, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.286082474226804, |
|
"grad_norm": 1.2136708668686302, |
|
"learning_rate": 1.4590462582019566e-07, |
|
"loss": 1.5488, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.2886597938144329, |
|
"grad_norm": 1.1351781538641772, |
|
"learning_rate": 1.4572423233046385e-07, |
|
"loss": 1.4436, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.291237113402062, |
|
"grad_norm": 1.2233902585418839, |
|
"learning_rate": 1.455436505174101e-07, |
|
"loss": 1.4752, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.2938144329896908, |
|
"grad_norm": 1.2111257906769834, |
|
"learning_rate": 1.453628811247924e-07, |
|
"loss": 1.5437, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.2963917525773196, |
|
"grad_norm": 1.214330730454999, |
|
"learning_rate": 1.4518192489714148e-07, |
|
"loss": 1.5874, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.2989690721649485, |
|
"grad_norm": 1.1501171354212085, |
|
"learning_rate": 1.4500078257975746e-07, |
|
"loss": 1.4441, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.3015463917525774, |
|
"grad_norm": 1.256132517451847, |
|
"learning_rate": 1.4481945491870692e-07, |
|
"loss": 1.4869, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.3041237113402062, |
|
"grad_norm": 1.153698353782002, |
|
"learning_rate": 1.4463794266081993e-07, |
|
"loss": 1.4298, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.306701030927835, |
|
"grad_norm": 1.1141900425922164, |
|
"learning_rate": 1.4445624655368672e-07, |
|
"loss": 1.4081, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.309278350515464, |
|
"grad_norm": 1.1727962553732723, |
|
"learning_rate": 1.4427436734565474e-07, |
|
"loss": 1.4843, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.3118556701030928, |
|
"grad_norm": 1.1904748231664284, |
|
"learning_rate": 1.4409230578582564e-07, |
|
"loss": 1.4408, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.3144329896907216, |
|
"grad_norm": 1.1596562097777137, |
|
"learning_rate": 1.4391006262405212e-07, |
|
"loss": 1.5078, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.3170103092783505, |
|
"grad_norm": 1.1362387372168263, |
|
"learning_rate": 1.4372763861093478e-07, |
|
"loss": 1.4596, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.3195876288659794, |
|
"grad_norm": 1.2438435278065572, |
|
"learning_rate": 1.4354503449781913e-07, |
|
"loss": 1.536, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.3221649484536082, |
|
"grad_norm": 1.182522665170931, |
|
"learning_rate": 1.4336225103679243e-07, |
|
"loss": 1.5611, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.324742268041237, |
|
"grad_norm": 1.2822957709992338, |
|
"learning_rate": 1.4317928898068066e-07, |
|
"loss": 1.4826, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.327319587628866, |
|
"grad_norm": 1.2758012985116745, |
|
"learning_rate": 1.4299614908304528e-07, |
|
"loss": 1.4543, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.3298969072164948, |
|
"grad_norm": 1.164766457118801, |
|
"learning_rate": 1.4281283209818038e-07, |
|
"loss": 1.4061, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.3324742268041236, |
|
"grad_norm": 1.1663065580316805, |
|
"learning_rate": 1.4262933878110923e-07, |
|
"loss": 1.5151, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.3350515463917525, |
|
"grad_norm": 1.1525726704239359, |
|
"learning_rate": 1.4244566988758152e-07, |
|
"loss": 1.5209, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.3376288659793816, |
|
"grad_norm": 1.194456252210575, |
|
"learning_rate": 1.4226182617406994e-07, |
|
"loss": 1.5003, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.3402061855670104, |
|
"grad_norm": 1.2788205228042828, |
|
"learning_rate": 1.4207780839776734e-07, |
|
"loss": 1.5807, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.3427835051546393, |
|
"grad_norm": 1.2101911204508933, |
|
"learning_rate": 1.4189361731658336e-07, |
|
"loss": 1.4851, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.3453608247422681, |
|
"grad_norm": 1.143725315674112, |
|
"learning_rate": 1.417092536891415e-07, |
|
"loss": 1.5258, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.347938144329897, |
|
"grad_norm": 1.1692223610404973, |
|
"learning_rate": 1.4152471827477593e-07, |
|
"loss": 1.4843, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.3505154639175259, |
|
"grad_norm": 1.106947712823219, |
|
"learning_rate": 1.413400118335283e-07, |
|
"loss": 1.4339, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.3530927835051547, |
|
"grad_norm": 1.27487397886756, |
|
"learning_rate": 1.4115513512614468e-07, |
|
"loss": 1.4993, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.3556701030927836, |
|
"grad_norm": 1.2236429851509971, |
|
"learning_rate": 1.4097008891407245e-07, |
|
"loss": 1.4858, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.3582474226804124, |
|
"grad_norm": 1.156634200386137, |
|
"learning_rate": 1.407848739594571e-07, |
|
"loss": 1.4973, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.3608247422680413, |
|
"grad_norm": 1.287092803375809, |
|
"learning_rate": 1.4059949102513913e-07, |
|
"loss": 1.476, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.3634020618556701, |
|
"grad_norm": 1.2572273439235049, |
|
"learning_rate": 1.404139408746508e-07, |
|
"loss": 1.4798, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.365979381443299, |
|
"grad_norm": 1.2276167223192924, |
|
"learning_rate": 1.4022822427221322e-07, |
|
"loss": 1.497, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.3685567010309279, |
|
"grad_norm": 1.2392858668139202, |
|
"learning_rate": 1.4004234198273302e-07, |
|
"loss": 1.5471, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.3711340206185567, |
|
"grad_norm": 1.2887104141411092, |
|
"learning_rate": 1.3985629477179915e-07, |
|
"loss": 1.4953, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.3737113402061856, |
|
"grad_norm": 1.2401450542055277, |
|
"learning_rate": 1.3967008340567996e-07, |
|
"loss": 1.5095, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.3762886597938144, |
|
"grad_norm": 1.1989888153377388, |
|
"learning_rate": 1.3948370865131977e-07, |
|
"loss": 1.5633, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.3788659793814433, |
|
"grad_norm": 1.1616958019574242, |
|
"learning_rate": 1.3929717127633597e-07, |
|
"loss": 1.5035, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.3814432989690721, |
|
"grad_norm": 1.1581446950268255, |
|
"learning_rate": 1.3911047204901557e-07, |
|
"loss": 1.5232, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.384020618556701, |
|
"grad_norm": 1.2240328360723358, |
|
"learning_rate": 1.3892361173831243e-07, |
|
"loss": 1.4948, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.3865979381443299, |
|
"grad_norm": 1.2405325514200207, |
|
"learning_rate": 1.3873659111384362e-07, |
|
"loss": 1.4815, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.3891752577319587, |
|
"grad_norm": 1.253563661932654, |
|
"learning_rate": 1.385494109458866e-07, |
|
"loss": 1.4284, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.3917525773195876, |
|
"grad_norm": 1.1541355431922666, |
|
"learning_rate": 1.3836207200537596e-07, |
|
"loss": 1.4213, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.3943298969072164, |
|
"grad_norm": 1.2315631871967962, |
|
"learning_rate": 1.381745750639002e-07, |
|
"loss": 1.5876, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.3969072164948453, |
|
"grad_norm": 1.3294003251532183, |
|
"learning_rate": 1.3798692089369854e-07, |
|
"loss": 1.5821, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.3994845360824741, |
|
"grad_norm": 1.1726722981119444, |
|
"learning_rate": 1.3779911026765784e-07, |
|
"loss": 1.4679, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.402061855670103, |
|
"grad_norm": 1.1782526174868226, |
|
"learning_rate": 1.3761114395930927e-07, |
|
"loss": 1.4851, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.402061855670103, |
|
"eval_loss": 1.4704606533050537, |
|
"eval_runtime": 78.4306, |
|
"eval_samples_per_second": 21.203, |
|
"eval_steps_per_second": 1.326, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.4046391752577319, |
|
"grad_norm": 1.192382882455904, |
|
"learning_rate": 1.3742302274282532e-07, |
|
"loss": 1.4707, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.4072164948453607, |
|
"grad_norm": 1.1364191762169735, |
|
"learning_rate": 1.3723474739301636e-07, |
|
"loss": 1.4066, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.4097938144329896, |
|
"grad_norm": 1.1453269827664123, |
|
"learning_rate": 1.3704631868532767e-07, |
|
"loss": 1.4515, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.4123711340206184, |
|
"grad_norm": 1.1956529180296382, |
|
"learning_rate": 1.3685773739583617e-07, |
|
"loss": 1.5102, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.4149484536082475, |
|
"grad_norm": 1.2356880855065446, |
|
"learning_rate": 1.3666900430124717e-07, |
|
"loss": 1.497, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.4175257731958764, |
|
"grad_norm": 1.1639642247143227, |
|
"learning_rate": 1.3648012017889121e-07, |
|
"loss": 1.485, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.4201030927835052, |
|
"grad_norm": 1.3028192646105916, |
|
"learning_rate": 1.3629108580672093e-07, |
|
"loss": 1.5073, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.422680412371134, |
|
"grad_norm": 1.1389634389377659, |
|
"learning_rate": 1.3610190196330775e-07, |
|
"loss": 1.4455, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.425257731958763, |
|
"grad_norm": 1.2726998162356058, |
|
"learning_rate": 1.3591256942783868e-07, |
|
"loss": 1.6226, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.4278350515463918, |
|
"grad_norm": 1.257001783763068, |
|
"learning_rate": 1.3572308898011326e-07, |
|
"loss": 1.4527, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.4304123711340206, |
|
"grad_norm": 1.1897100853456886, |
|
"learning_rate": 1.3553346140054013e-07, |
|
"loss": 1.4748, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.4329896907216495, |
|
"grad_norm": 1.144640373535268, |
|
"learning_rate": 1.3534368747013394e-07, |
|
"loss": 1.4733, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.4355670103092784, |
|
"grad_norm": 1.2252518120948153, |
|
"learning_rate": 1.351537679705121e-07, |
|
"loss": 1.4539, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.4381443298969072, |
|
"grad_norm": 1.1565118663607803, |
|
"learning_rate": 1.3496370368389165e-07, |
|
"loss": 1.5236, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.440721649484536, |
|
"grad_norm": 1.2594818027515957, |
|
"learning_rate": 1.3477349539308584e-07, |
|
"loss": 1.4856, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.443298969072165, |
|
"grad_norm": 1.1419387268061763, |
|
"learning_rate": 1.3458314388150115e-07, |
|
"loss": 1.4153, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.4458762886597938, |
|
"grad_norm": 1.098148594961463, |
|
"learning_rate": 1.3439264993313385e-07, |
|
"loss": 1.4447, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.4484536082474226, |
|
"grad_norm": 1.2022510861175644, |
|
"learning_rate": 1.342020143325669e-07, |
|
"loss": 1.5516, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.4510309278350515, |
|
"grad_norm": 1.1444341747665796, |
|
"learning_rate": 1.3401123786496663e-07, |
|
"loss": 1.4224, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.4536082474226804, |
|
"grad_norm": 1.1349715757276768, |
|
"learning_rate": 1.3382032131607965e-07, |
|
"loss": 1.3973, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.4561855670103092, |
|
"grad_norm": 1.1228999228709107, |
|
"learning_rate": 1.3362926547222946e-07, |
|
"loss": 1.4149, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.458762886597938, |
|
"grad_norm": 1.2396644989009444, |
|
"learning_rate": 1.3343807112031327e-07, |
|
"loss": 1.4999, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.4613402061855671, |
|
"grad_norm": 1.1458789067959891, |
|
"learning_rate": 1.3324673904779874e-07, |
|
"loss": 1.4606, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.463917525773196, |
|
"grad_norm": 1.1579136550048348, |
|
"learning_rate": 1.3305527004272087e-07, |
|
"loss": 1.5091, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.4664948453608249, |
|
"grad_norm": 1.1065943702186947, |
|
"learning_rate": 1.3286366489367846e-07, |
|
"loss": 1.4981, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.4690721649484537, |
|
"grad_norm": 1.1701708173193963, |
|
"learning_rate": 1.3267192438983117e-07, |
|
"loss": 1.4864, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.4716494845360826, |
|
"grad_norm": 1.1655119326822228, |
|
"learning_rate": 1.324800493208961e-07, |
|
"loss": 1.4609, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.4742268041237114, |
|
"grad_norm": 1.1668952825289185, |
|
"learning_rate": 1.322880404771446e-07, |
|
"loss": 1.5529, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.4768041237113403, |
|
"grad_norm": 1.1827027818749032, |
|
"learning_rate": 1.3209589864939906e-07, |
|
"loss": 1.4712, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.4793814432989691, |
|
"grad_norm": 1.16644526665677, |
|
"learning_rate": 1.3190362462902935e-07, |
|
"loss": 1.4444, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.481958762886598, |
|
"grad_norm": 1.2457925422571992, |
|
"learning_rate": 1.3171121920795012e-07, |
|
"loss": 1.5042, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.4845360824742269, |
|
"grad_norm": 1.1848287601135188, |
|
"learning_rate": 1.3151868317861698e-07, |
|
"loss": 1.5314, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.4871134020618557, |
|
"grad_norm": 1.181022425488295, |
|
"learning_rate": 1.3132601733402355e-07, |
|
"loss": 1.5557, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.4896907216494846, |
|
"grad_norm": 1.2220291945868886, |
|
"learning_rate": 1.3113322246769816e-07, |
|
"loss": 1.4743, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.4922680412371134, |
|
"grad_norm": 1.23521757296614, |
|
"learning_rate": 1.3094029937370049e-07, |
|
"loss": 1.4494, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.4948453608247423, |
|
"grad_norm": 1.1540829106187, |
|
"learning_rate": 1.3074724884661832e-07, |
|
"loss": 1.492, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.4974226804123711, |
|
"grad_norm": 1.2734897659131177, |
|
"learning_rate": 1.3055407168156436e-07, |
|
"loss": 1.5114, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.0821110483827021, |
|
"learning_rate": 1.3036076867417286e-07, |
|
"loss": 1.4899, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.5025773195876289, |
|
"grad_norm": 1.1591573630093586, |
|
"learning_rate": 1.3016734062059636e-07, |
|
"loss": 1.4287, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.5051546391752577, |
|
"grad_norm": 1.252040765136315, |
|
"learning_rate": 1.299737883175024e-07, |
|
"loss": 1.4215, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.5077319587628866, |
|
"grad_norm": 1.122072741553452, |
|
"learning_rate": 1.2978011256207041e-07, |
|
"loss": 1.4535, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.5103092783505154, |
|
"grad_norm": 1.1929144211640363, |
|
"learning_rate": 1.2958631415198813e-07, |
|
"loss": 1.4264, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.5128865979381443, |
|
"grad_norm": 1.1904423534607285, |
|
"learning_rate": 1.293923938854485e-07, |
|
"loss": 1.4966, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.5154639175257731, |
|
"grad_norm": 1.2142748405878527, |
|
"learning_rate": 1.2919835256114638e-07, |
|
"loss": 1.4152, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.518041237113402, |
|
"grad_norm": 1.2310572109795892, |
|
"learning_rate": 1.290041909782752e-07, |
|
"loss": 1.3986, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.5206185567010309, |
|
"grad_norm": 1.1532910482056786, |
|
"learning_rate": 1.2880990993652377e-07, |
|
"loss": 1.4606, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.5231958762886597, |
|
"grad_norm": 1.3007475509786544, |
|
"learning_rate": 1.2861551023607276e-07, |
|
"loss": 1.5304, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.5257731958762886, |
|
"grad_norm": 1.15166741332348, |
|
"learning_rate": 1.2842099267759174e-07, |
|
"loss": 1.3824, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.5283505154639174, |
|
"grad_norm": 1.1988826738728366, |
|
"learning_rate": 1.2822635806223556e-07, |
|
"loss": 1.567, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.5283505154639174, |
|
"eval_loss": 1.4671498537063599, |
|
"eval_runtime": 78.5049, |
|
"eval_samples_per_second": 21.183, |
|
"eval_steps_per_second": 1.325, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.5309278350515463, |
|
"grad_norm": 1.2386857438447851, |
|
"learning_rate": 1.2803160719164125e-07, |
|
"loss": 1.5304, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.5335051546391751, |
|
"grad_norm": 1.1597769415791235, |
|
"learning_rate": 1.2783674086792466e-07, |
|
"loss": 1.497, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.536082474226804, |
|
"grad_norm": 1.2924257071547485, |
|
"learning_rate": 1.2764175989367717e-07, |
|
"loss": 1.4877, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.5386597938144329, |
|
"grad_norm": 1.222248016944084, |
|
"learning_rate": 1.2744666507196224e-07, |
|
"loss": 1.5257, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.5412371134020617, |
|
"grad_norm": 1.0852012266696331, |
|
"learning_rate": 1.2725145720631242e-07, |
|
"loss": 1.4657, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.5438144329896906, |
|
"grad_norm": 1.2029751793520205, |
|
"learning_rate": 1.2705613710072573e-07, |
|
"loss": 1.543, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.5463917525773194, |
|
"grad_norm": 1.1786774736346322, |
|
"learning_rate": 1.2686070555966252e-07, |
|
"loss": 1.4163, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.5489690721649485, |
|
"grad_norm": 1.1923466397926792, |
|
"learning_rate": 1.2666516338804208e-07, |
|
"loss": 1.449, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.5515463917525774, |
|
"grad_norm": 1.1491363181852474, |
|
"learning_rate": 1.2646951139123932e-07, |
|
"loss": 1.4773, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.5541237113402062, |
|
"grad_norm": 1.1921001128896263, |
|
"learning_rate": 1.2627375037508162e-07, |
|
"loss": 1.4596, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.556701030927835, |
|
"grad_norm": 1.2215090538297548, |
|
"learning_rate": 1.2607788114584522e-07, |
|
"loss": 1.5697, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.559278350515464, |
|
"grad_norm": 1.1364987023852344, |
|
"learning_rate": 1.2588190451025208e-07, |
|
"loss": 1.4126, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.5618556701030928, |
|
"grad_norm": 1.139874297388743, |
|
"learning_rate": 1.2568582127546662e-07, |
|
"loss": 1.4104, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.5644329896907216, |
|
"grad_norm": 1.1273021800754177, |
|
"learning_rate": 1.2548963224909223e-07, |
|
"loss": 1.4407, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.5670103092783505, |
|
"grad_norm": 1.1999146152571862, |
|
"learning_rate": 1.2529333823916806e-07, |
|
"loss": 1.4779, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.5695876288659794, |
|
"grad_norm": 1.1170496605169837, |
|
"learning_rate": 1.2509694005416563e-07, |
|
"loss": 1.4368, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.5721649484536082, |
|
"grad_norm": 1.099167093974349, |
|
"learning_rate": 1.2490043850298557e-07, |
|
"loss": 1.4932, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.574742268041237, |
|
"grad_norm": 1.219342527534671, |
|
"learning_rate": 1.2470383439495416e-07, |
|
"loss": 1.4633, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.577319587628866, |
|
"grad_norm": 1.2125741355588842, |
|
"learning_rate": 1.2450712853982014e-07, |
|
"loss": 1.5161, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.579896907216495, |
|
"grad_norm": 1.2755825455134522, |
|
"learning_rate": 1.2431032174775127e-07, |
|
"loss": 1.5225, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.5824742268041239, |
|
"grad_norm": 1.1521606084223124, |
|
"learning_rate": 1.2411341482933108e-07, |
|
"loss": 1.4308, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.5850515463917527, |
|
"grad_norm": 1.165275382475451, |
|
"learning_rate": 1.239164085955555e-07, |
|
"loss": 1.5024, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.5876288659793816, |
|
"grad_norm": 1.2609655964912305, |
|
"learning_rate": 1.2371930385782943e-07, |
|
"loss": 1.4669, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.5902061855670104, |
|
"grad_norm": 1.1698575645046683, |
|
"learning_rate": 1.2352210142796356e-07, |
|
"loss": 1.4752, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.5927835051546393, |
|
"grad_norm": 1.1966335794904208, |
|
"learning_rate": 1.2332480211817091e-07, |
|
"loss": 1.5478, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.5953608247422681, |
|
"grad_norm": 1.081476396234954, |
|
"learning_rate": 1.2312740674106347e-07, |
|
"loss": 1.451, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.597938144329897, |
|
"grad_norm": 1.2089145441748135, |
|
"learning_rate": 1.22929916109649e-07, |
|
"loss": 1.4975, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.6005154639175259, |
|
"grad_norm": 1.2416284172109027, |
|
"learning_rate": 1.227323310373275e-07, |
|
"loss": 1.43, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.6030927835051547, |
|
"grad_norm": 1.2758382819864167, |
|
"learning_rate": 1.2253465233788794e-07, |
|
"loss": 1.4589, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.6056701030927836, |
|
"grad_norm": 1.1736803322764697, |
|
"learning_rate": 1.22336880825505e-07, |
|
"loss": 1.3896, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.6082474226804124, |
|
"grad_norm": 1.1927775409437176, |
|
"learning_rate": 1.2213901731473551e-07, |
|
"loss": 1.5394, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.6108247422680413, |
|
"grad_norm": 1.2264294531171918, |
|
"learning_rate": 1.219410626205153e-07, |
|
"loss": 1.4543, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.6134020618556701, |
|
"grad_norm": 1.2693861374653377, |
|
"learning_rate": 1.217430175581557e-07, |
|
"loss": 1.484, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.615979381443299, |
|
"grad_norm": 1.2665036241537893, |
|
"learning_rate": 1.2154488294334027e-07, |
|
"loss": 1.5607, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.6185567010309279, |
|
"grad_norm": 1.1703235363860394, |
|
"learning_rate": 1.2134665959212136e-07, |
|
"loss": 1.4644, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.6211340206185567, |
|
"grad_norm": 1.193069004037872, |
|
"learning_rate": 1.211483483209169e-07, |
|
"loss": 1.4888, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.6237113402061856, |
|
"grad_norm": 1.2361705074035756, |
|
"learning_rate": 1.209499499465068e-07, |
|
"loss": 1.4504, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.6262886597938144, |
|
"grad_norm": 1.095084009584948, |
|
"learning_rate": 1.2075146528602983e-07, |
|
"loss": 1.4828, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.6288659793814433, |
|
"grad_norm": 1.1262123200952905, |
|
"learning_rate": 1.2055289515698006e-07, |
|
"loss": 1.5487, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.6314432989690721, |
|
"grad_norm": 1.1378828378426857, |
|
"learning_rate": 1.2035424037720364e-07, |
|
"loss": 1.4921, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.634020618556701, |
|
"grad_norm": 1.1961288239091903, |
|
"learning_rate": 1.2015550176489537e-07, |
|
"loss": 1.4421, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.6365979381443299, |
|
"grad_norm": 1.1366747357584532, |
|
"learning_rate": 1.199566801385953e-07, |
|
"loss": 1.4392, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.6391752577319587, |
|
"grad_norm": 1.1909816425714403, |
|
"learning_rate": 1.1975777631718532e-07, |
|
"loss": 1.5001, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.6417525773195876, |
|
"grad_norm": 1.2963539362844378, |
|
"learning_rate": 1.19558791119886e-07, |
|
"loss": 1.4605, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.6443298969072164, |
|
"grad_norm": 1.1580390642200817, |
|
"learning_rate": 1.19359725366253e-07, |
|
"loss": 1.5063, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.6469072164948453, |
|
"grad_norm": 1.216487820544871, |
|
"learning_rate": 1.1916057987617374e-07, |
|
"loss": 1.4886, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.6494845360824741, |
|
"grad_norm": 1.2218109581350323, |
|
"learning_rate": 1.1896135546986407e-07, |
|
"loss": 1.4608, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.652061855670103, |
|
"grad_norm": 1.2280111906896558, |
|
"learning_rate": 1.1876205296786493e-07, |
|
"loss": 1.5096, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.6546391752577319, |
|
"grad_norm": 1.2166796078055058, |
|
"learning_rate": 1.1856267319103876e-07, |
|
"loss": 1.4692, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.6546391752577319, |
|
"eval_loss": 1.4642903804779053, |
|
"eval_runtime": 78.6766, |
|
"eval_samples_per_second": 21.137, |
|
"eval_steps_per_second": 1.322, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.6572164948453607, |
|
"grad_norm": 1.1939355446450859, |
|
"learning_rate": 1.1836321696056645e-07, |
|
"loss": 1.4137, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.6597938144329896, |
|
"grad_norm": 1.2546613486361071, |
|
"learning_rate": 1.1816368509794363e-07, |
|
"loss": 1.512, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.6623711340206184, |
|
"grad_norm": 1.1366449756739982, |
|
"learning_rate": 1.1796407842497753e-07, |
|
"loss": 1.3836, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.6649484536082473, |
|
"grad_norm": 1.2553355162175337, |
|
"learning_rate": 1.1776439776378351e-07, |
|
"loss": 1.4565, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.6675257731958761, |
|
"grad_norm": 1.20909630643183, |
|
"learning_rate": 1.1756464393678151e-07, |
|
"loss": 1.4481, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.670103092783505, |
|
"grad_norm": 1.2273438479078924, |
|
"learning_rate": 1.1736481776669305e-07, |
|
"loss": 1.4903, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.672680412371134, |
|
"grad_norm": 1.1909626287045671, |
|
"learning_rate": 1.1716492007653737e-07, |
|
"loss": 1.5012, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.675257731958763, |
|
"grad_norm": 1.158968259505721, |
|
"learning_rate": 1.1696495168962845e-07, |
|
"loss": 1.5465, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.6778350515463918, |
|
"grad_norm": 1.1963581026774628, |
|
"learning_rate": 1.1676491342957142e-07, |
|
"loss": 1.4729, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.6804123711340206, |
|
"grad_norm": 1.215536392765087, |
|
"learning_rate": 1.1656480612025911e-07, |
|
"loss": 1.4164, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.6829896907216495, |
|
"grad_norm": 1.0521259077304612, |
|
"learning_rate": 1.163646305858688e-07, |
|
"loss": 1.3678, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.6855670103092784, |
|
"grad_norm": 1.295543359347737, |
|
"learning_rate": 1.1616438765085881e-07, |
|
"loss": 1.57, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.6881443298969072, |
|
"grad_norm": 1.1720574150387943, |
|
"learning_rate": 1.1596407813996498e-07, |
|
"loss": 1.5221, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.690721649484536, |
|
"grad_norm": 1.186785802460397, |
|
"learning_rate": 1.1576370287819735e-07, |
|
"loss": 1.4673, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.693298969072165, |
|
"grad_norm": 1.1589224859683183, |
|
"learning_rate": 1.155632626908369e-07, |
|
"loss": 1.3919, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.6958762886597938, |
|
"grad_norm": 1.3034607577131674, |
|
"learning_rate": 1.1536275840343183e-07, |
|
"loss": 1.43, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.6984536082474226, |
|
"grad_norm": 1.1721298121139936, |
|
"learning_rate": 1.1516219084179448e-07, |
|
"loss": 1.5556, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.7010309278350515, |
|
"grad_norm": 1.164281783704574, |
|
"learning_rate": 1.149615608319978e-07, |
|
"loss": 1.4449, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.7036082474226806, |
|
"grad_norm": 1.1144845067827036, |
|
"learning_rate": 1.1476086920037183e-07, |
|
"loss": 1.5204, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.7061855670103094, |
|
"grad_norm": 1.1470381221039117, |
|
"learning_rate": 1.1456011677350051e-07, |
|
"loss": 1.4096, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.7087628865979383, |
|
"grad_norm": 1.1938066626201722, |
|
"learning_rate": 1.1435930437821812e-07, |
|
"loss": 1.4299, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.7113402061855671, |
|
"grad_norm": 1.389576843014182, |
|
"learning_rate": 1.1415843284160598e-07, |
|
"loss": 1.4736, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.713917525773196, |
|
"grad_norm": 1.1886965701829686, |
|
"learning_rate": 1.1395750299098899e-07, |
|
"loss": 1.4972, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.7164948453608249, |
|
"grad_norm": 1.1389546972088997, |
|
"learning_rate": 1.1375651565393218e-07, |
|
"loss": 1.5518, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.7190721649484537, |
|
"grad_norm": 1.1743796585118387, |
|
"learning_rate": 1.1355547165823738e-07, |
|
"loss": 1.4672, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.7216494845360826, |
|
"grad_norm": 1.1460525519017093, |
|
"learning_rate": 1.1335437183193979e-07, |
|
"loss": 1.5233, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.7242268041237114, |
|
"grad_norm": 1.1586406558048044, |
|
"learning_rate": 1.1315321700330454e-07, |
|
"loss": 1.4686, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.7268041237113403, |
|
"grad_norm": 1.1369470779252082, |
|
"learning_rate": 1.1295200800082326e-07, |
|
"loss": 1.4688, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.7293814432989691, |
|
"grad_norm": 1.1705799315615684, |
|
"learning_rate": 1.1275074565321078e-07, |
|
"loss": 1.3893, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.731958762886598, |
|
"grad_norm": 1.1725120595380418, |
|
"learning_rate": 1.125494307894016e-07, |
|
"loss": 1.537, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.7345360824742269, |
|
"grad_norm": 1.0734797144766555, |
|
"learning_rate": 1.1234806423854653e-07, |
|
"loss": 1.4388, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.7371134020618557, |
|
"grad_norm": 1.170033873518124, |
|
"learning_rate": 1.1214664683000924e-07, |
|
"loss": 1.3753, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.7396907216494846, |
|
"grad_norm": 1.231373540289329, |
|
"learning_rate": 1.1194517939336287e-07, |
|
"loss": 1.5497, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.7422680412371134, |
|
"grad_norm": 1.1946433920559838, |
|
"learning_rate": 1.1174366275838662e-07, |
|
"loss": 1.413, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.7448453608247423, |
|
"grad_norm": 1.1418431201062664, |
|
"learning_rate": 1.115420977550624e-07, |
|
"loss": 1.4914, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.7474226804123711, |
|
"grad_norm": 1.2072128707535221, |
|
"learning_rate": 1.1134048521357115e-07, |
|
"loss": 1.4836, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 1.194692316000769, |
|
"learning_rate": 1.1113882596428976e-07, |
|
"loss": 1.4389, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.7525773195876289, |
|
"grad_norm": 1.181835370102449, |
|
"learning_rate": 1.1093712083778746e-07, |
|
"loss": 1.4542, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.7551546391752577, |
|
"grad_norm": 1.1310122085797796, |
|
"learning_rate": 1.1073537066482235e-07, |
|
"loss": 1.4572, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.7577319587628866, |
|
"grad_norm": 1.1299559219838877, |
|
"learning_rate": 1.1053357627633821e-07, |
|
"loss": 1.5374, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.7603092783505154, |
|
"grad_norm": 1.2302892939334757, |
|
"learning_rate": 1.1033173850346081e-07, |
|
"loss": 1.5156, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.7628865979381443, |
|
"grad_norm": 1.1376050539784393, |
|
"learning_rate": 1.1012985817749462e-07, |
|
"loss": 1.4994, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.7654639175257731, |
|
"grad_norm": 1.1912506938583958, |
|
"learning_rate": 1.0992793612991946e-07, |
|
"loss": 1.5358, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.768041237113402, |
|
"grad_norm": 1.2323374068579527, |
|
"learning_rate": 1.097259731923869e-07, |
|
"loss": 1.5446, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.7706185567010309, |
|
"grad_norm": 1.2255437302126448, |
|
"learning_rate": 1.0952397019671694e-07, |
|
"loss": 1.413, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.7731958762886597, |
|
"grad_norm": 1.2608512214948044, |
|
"learning_rate": 1.0932192797489459e-07, |
|
"loss": 1.5306, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.7757731958762886, |
|
"grad_norm": 1.187848987827898, |
|
"learning_rate": 1.0911984735906635e-07, |
|
"loss": 1.4589, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.7783505154639174, |
|
"grad_norm": 1.1078353763626878, |
|
"learning_rate": 1.0891772918153694e-07, |
|
"loss": 1.5026, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.7809278350515463, |
|
"grad_norm": 1.1847073079284023, |
|
"learning_rate": 1.0871557427476584e-07, |
|
"loss": 1.4819, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.7809278350515463, |
|
"eval_loss": 1.4616869688034058, |
|
"eval_runtime": 78.6285, |
|
"eval_samples_per_second": 21.15, |
|
"eval_steps_per_second": 1.323, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.7835051546391751, |
|
"grad_norm": 1.1649395427594373, |
|
"learning_rate": 1.0851338347136356e-07, |
|
"loss": 1.5143, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.786082474226804, |
|
"grad_norm": 1.284550306447524, |
|
"learning_rate": 1.0831115760408871e-07, |
|
"loss": 1.4542, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.7886597938144329, |
|
"grad_norm": 1.1925120790488934, |
|
"learning_rate": 1.0810889750584424e-07, |
|
"loss": 1.426, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.7912371134020617, |
|
"grad_norm": 1.178551347790486, |
|
"learning_rate": 1.07906604009674e-07, |
|
"loss": 1.4372, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.7938144329896906, |
|
"grad_norm": 1.2458332188073578, |
|
"learning_rate": 1.077042779487595e-07, |
|
"loss": 1.5252, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.7963917525773194, |
|
"grad_norm": 1.2661697455131442, |
|
"learning_rate": 1.0750192015641633e-07, |
|
"loss": 1.5066, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.7989690721649485, |
|
"grad_norm": 1.1069806037454215, |
|
"learning_rate": 1.0729953146609074e-07, |
|
"loss": 1.4264, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.8015463917525774, |
|
"grad_norm": 1.194263854578521, |
|
"learning_rate": 1.0709711271135635e-07, |
|
"loss": 1.4339, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.8041237113402062, |
|
"grad_norm": 1.2068338783635435, |
|
"learning_rate": 1.0689466472591048e-07, |
|
"loss": 1.4341, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.806701030927835, |
|
"grad_norm": 1.1526056815131385, |
|
"learning_rate": 1.066921883435709e-07, |
|
"loss": 1.4382, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.809278350515464, |
|
"grad_norm": 1.1526436748662838, |
|
"learning_rate": 1.0648968439827239e-07, |
|
"loss": 1.4525, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.8118556701030928, |
|
"grad_norm": 1.2587407335769552, |
|
"learning_rate": 1.0628715372406309e-07, |
|
"loss": 1.4995, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.8144329896907216, |
|
"grad_norm": 1.2439345895593688, |
|
"learning_rate": 1.0608459715510139e-07, |
|
"loss": 1.4172, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.8170103092783505, |
|
"grad_norm": 1.2048841761527278, |
|
"learning_rate": 1.058820155256523e-07, |
|
"loss": 1.4536, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.8195876288659794, |
|
"grad_norm": 1.1712052519870668, |
|
"learning_rate": 1.0567940967008396e-07, |
|
"loss": 1.4739, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.8221649484536082, |
|
"grad_norm": 1.1253615480764265, |
|
"learning_rate": 1.0547678042286435e-07, |
|
"loss": 1.4362, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.824742268041237, |
|
"grad_norm": 1.1941314320057088, |
|
"learning_rate": 1.0527412861855789e-07, |
|
"loss": 1.5473, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.827319587628866, |
|
"grad_norm": 1.1515723933518516, |
|
"learning_rate": 1.0507145509182169e-07, |
|
"loss": 1.4095, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.829896907216495, |
|
"grad_norm": 1.1459437804868953, |
|
"learning_rate": 1.0486876067740252e-07, |
|
"loss": 1.4454, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.8324742268041239, |
|
"grad_norm": 1.2555188381647702, |
|
"learning_rate": 1.0466604621013306e-07, |
|
"loss": 1.5032, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.8350515463917527, |
|
"grad_norm": 1.173256763665764, |
|
"learning_rate": 1.0446331252492864e-07, |
|
"loss": 1.542, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.8376288659793816, |
|
"grad_norm": 1.1616854603706852, |
|
"learning_rate": 1.0426056045678375e-07, |
|
"loss": 1.3301, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.8402061855670104, |
|
"grad_norm": 1.1961580743330678, |
|
"learning_rate": 1.0405779084076855e-07, |
|
"loss": 1.5125, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.8427835051546393, |
|
"grad_norm": 1.153920316864521, |
|
"learning_rate": 1.0385500451202549e-07, |
|
"loss": 1.5104, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.8453608247422681, |
|
"grad_norm": 1.2288872831871334, |
|
"learning_rate": 1.036522023057659e-07, |
|
"loss": 1.54, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.847938144329897, |
|
"grad_norm": 1.1774978065006576, |
|
"learning_rate": 1.0344938505726641e-07, |
|
"loss": 1.4226, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.8505154639175259, |
|
"grad_norm": 1.18190720576571, |
|
"learning_rate": 1.0324655360186567e-07, |
|
"loss": 1.4874, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.8530927835051547, |
|
"grad_norm": 1.0881741375618583, |
|
"learning_rate": 1.0304370877496089e-07, |
|
"loss": 1.4196, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.8556701030927836, |
|
"grad_norm": 1.1920925526660484, |
|
"learning_rate": 1.0284085141200423e-07, |
|
"loss": 1.4022, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.8582474226804124, |
|
"grad_norm": 1.2553686949150205, |
|
"learning_rate": 1.0263798234849954e-07, |
|
"loss": 1.5411, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.8608247422680413, |
|
"grad_norm": 1.1849323570576418, |
|
"learning_rate": 1.0243510241999897e-07, |
|
"loss": 1.4376, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.8634020618556701, |
|
"grad_norm": 1.1748076105825112, |
|
"learning_rate": 1.0223221246209918e-07, |
|
"loss": 1.3917, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.865979381443299, |
|
"grad_norm": 1.1437404458677716, |
|
"learning_rate": 1.0202931331043839e-07, |
|
"loss": 1.5412, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.8685567010309279, |
|
"grad_norm": 1.1588752261265902, |
|
"learning_rate": 1.0182640580069248e-07, |
|
"loss": 1.4016, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.8711340206185567, |
|
"grad_norm": 1.244615607327111, |
|
"learning_rate": 1.016234907685719e-07, |
|
"loss": 1.4501, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.8737113402061856, |
|
"grad_norm": 1.1809049167530614, |
|
"learning_rate": 1.0142056904981802e-07, |
|
"loss": 1.4637, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.8762886597938144, |
|
"grad_norm": 1.1101634996349434, |
|
"learning_rate": 1.0121764148019975e-07, |
|
"loss": 1.4228, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.8788659793814433, |
|
"grad_norm": 1.2377079616714697, |
|
"learning_rate": 1.0101470889551012e-07, |
|
"loss": 1.4533, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.8814432989690721, |
|
"grad_norm": 1.160543485045226, |
|
"learning_rate": 1.0081177213156278e-07, |
|
"loss": 1.4735, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.884020618556701, |
|
"grad_norm": 1.115374473748354, |
|
"learning_rate": 1.0060883202418861e-07, |
|
"loss": 1.438, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.8865979381443299, |
|
"grad_norm": 1.1305131743119878, |
|
"learning_rate": 1.004058894092323e-07, |
|
"loss": 1.4186, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.8891752577319587, |
|
"grad_norm": 1.151990553361531, |
|
"learning_rate": 1.0020294512254883e-07, |
|
"loss": 1.5121, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.8917525773195876, |
|
"grad_norm": 1.1278991620860568, |
|
"learning_rate": 1e-07, |
|
"loss": 1.4333, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.8943298969072164, |
|
"grad_norm": 1.281137685220673, |
|
"learning_rate": 9.97970548774512e-08, |
|
"loss": 1.4416, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.8969072164948453, |
|
"grad_norm": 1.1772600120424532, |
|
"learning_rate": 9.959411059076768e-08, |
|
"loss": 1.409, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.8994845360824741, |
|
"grad_norm": 1.16485761208349, |
|
"learning_rate": 9.939116797581138e-08, |
|
"loss": 1.4324, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.902061855670103, |
|
"grad_norm": 1.203443440232203, |
|
"learning_rate": 9.918822786843724e-08, |
|
"loss": 1.4324, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.9046391752577319, |
|
"grad_norm": 1.20376421998538, |
|
"learning_rate": 9.898529110448987e-08, |
|
"loss": 1.501, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.9072164948453607, |
|
"grad_norm": 1.1533270795807118, |
|
"learning_rate": 9.878235851980025e-08, |
|
"loss": 1.404, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.9072164948453607, |
|
"eval_loss": 1.4594255685806274, |
|
"eval_runtime": 78.6148, |
|
"eval_samples_per_second": 21.154, |
|
"eval_steps_per_second": 1.323, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.9097938144329896, |
|
"grad_norm": 1.1889743164637112, |
|
"learning_rate": 9.857943095018198e-08, |
|
"loss": 1.4652, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.9123711340206184, |
|
"grad_norm": 1.162304380840768, |
|
"learning_rate": 9.837650923142809e-08, |
|
"loss": 1.3641, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.9149484536082473, |
|
"grad_norm": 1.260002079711297, |
|
"learning_rate": 9.817359419930751e-08, |
|
"loss": 1.5022, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.9175257731958761, |
|
"grad_norm": 1.1295427248534264, |
|
"learning_rate": 9.797068668956162e-08, |
|
"loss": 1.4553, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.920103092783505, |
|
"grad_norm": 1.1730252131786578, |
|
"learning_rate": 9.77677875379008e-08, |
|
"loss": 1.4748, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.922680412371134, |
|
"grad_norm": 1.2020202803132716, |
|
"learning_rate": 9.756489758000104e-08, |
|
"loss": 1.4479, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.925257731958763, |
|
"grad_norm": 1.180219637473307, |
|
"learning_rate": 9.736201765150045e-08, |
|
"loss": 1.4974, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.9278350515463918, |
|
"grad_norm": 1.2291944688317633, |
|
"learning_rate": 9.715914858799575e-08, |
|
"loss": 1.4228, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.9304123711340206, |
|
"grad_norm": 1.1131303155372065, |
|
"learning_rate": 9.69562912250391e-08, |
|
"loss": 1.4693, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.9329896907216495, |
|
"grad_norm": 1.1994615231875885, |
|
"learning_rate": 9.675344639813433e-08, |
|
"loss": 1.4745, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.9355670103092784, |
|
"grad_norm": 1.115870585349576, |
|
"learning_rate": 9.655061494273362e-08, |
|
"loss": 1.4671, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.9381443298969072, |
|
"grad_norm": 1.2054754001979724, |
|
"learning_rate": 9.63477976942341e-08, |
|
"loss": 1.5195, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.940721649484536, |
|
"grad_norm": 1.1464295691900082, |
|
"learning_rate": 9.614499548797452e-08, |
|
"loss": 1.4402, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.943298969072165, |
|
"grad_norm": 1.1914103186703613, |
|
"learning_rate": 9.594220915923148e-08, |
|
"loss": 1.5797, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.9458762886597938, |
|
"grad_norm": 1.1615295842359556, |
|
"learning_rate": 9.573943954321626e-08, |
|
"loss": 1.4126, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.9484536082474226, |
|
"grad_norm": 1.19026250293737, |
|
"learning_rate": 9.553668747507138e-08, |
|
"loss": 1.4332, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.9510309278350515, |
|
"grad_norm": 1.1351877413773055, |
|
"learning_rate": 9.533395378986697e-08, |
|
"loss": 1.4784, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.9536082474226806, |
|
"grad_norm": 1.227791339106945, |
|
"learning_rate": 9.51312393225975e-08, |
|
"loss": 1.4392, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.9561855670103094, |
|
"grad_norm": 1.2100140189737674, |
|
"learning_rate": 9.492854490817833e-08, |
|
"loss": 1.4693, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.9587628865979383, |
|
"grad_norm": 1.0478682320033872, |
|
"learning_rate": 9.472587138144214e-08, |
|
"loss": 1.4117, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.9613402061855671, |
|
"grad_norm": 1.1920119917461085, |
|
"learning_rate": 9.452321957713563e-08, |
|
"loss": 1.556, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.963917525773196, |
|
"grad_norm": 1.1902655777598523, |
|
"learning_rate": 9.432059032991606e-08, |
|
"loss": 1.5102, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.9664948453608249, |
|
"grad_norm": 1.1511704775031535, |
|
"learning_rate": 9.411798447434773e-08, |
|
"loss": 1.5281, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.9690721649484537, |
|
"grad_norm": 1.1636100359208144, |
|
"learning_rate": 9.39154028448986e-08, |
|
"loss": 1.4024, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.9716494845360826, |
|
"grad_norm": 1.2582478560602157, |
|
"learning_rate": 9.371284627593691e-08, |
|
"loss": 1.4519, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.9742268041237114, |
|
"grad_norm": 1.1608958350691665, |
|
"learning_rate": 9.351031560172764e-08, |
|
"loss": 1.4286, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.9768041237113403, |
|
"grad_norm": 1.1725970187771935, |
|
"learning_rate": 9.330781165642907e-08, |
|
"loss": 1.4858, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.9793814432989691, |
|
"grad_norm": 1.181405747708069, |
|
"learning_rate": 9.310533527408951e-08, |
|
"loss": 1.5193, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.981958762886598, |
|
"grad_norm": 1.1949902203170548, |
|
"learning_rate": 9.290288728864365e-08, |
|
"loss": 1.3768, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.9845360824742269, |
|
"grad_norm": 1.2444243036816676, |
|
"learning_rate": 9.270046853390924e-08, |
|
"loss": 1.4866, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.9871134020618557, |
|
"grad_norm": 1.162040164523566, |
|
"learning_rate": 9.249807984358369e-08, |
|
"loss": 1.4277, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.9896907216494846, |
|
"grad_norm": 1.3041991278727916, |
|
"learning_rate": 9.229572205124051e-08, |
|
"loss": 1.4895, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.9922680412371134, |
|
"grad_norm": 1.1800946591513317, |
|
"learning_rate": 9.2093395990326e-08, |
|
"loss": 1.6118, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.9948453608247423, |
|
"grad_norm": 1.120730199367575, |
|
"learning_rate": 9.189110249415576e-08, |
|
"loss": 1.4777, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.9974226804123711, |
|
"grad_norm": 1.165214854260427, |
|
"learning_rate": 9.168884239591129e-08, |
|
"loss": 1.4491, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.1460287106000804, |
|
"learning_rate": 9.148661652863641e-08, |
|
"loss": 1.442, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.002577319587629, |
|
"grad_norm": 1.245092231884586, |
|
"learning_rate": 9.128442572523417e-08, |
|
"loss": 1.4238, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 2.0051546391752577, |
|
"grad_norm": 1.1566295496507226, |
|
"learning_rate": 9.108227081846305e-08, |
|
"loss": 1.4313, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.0077319587628866, |
|
"grad_norm": 1.2544751166156012, |
|
"learning_rate": 9.088015264093364e-08, |
|
"loss": 1.4879, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 2.0103092783505154, |
|
"grad_norm": 1.2229877060400391, |
|
"learning_rate": 9.067807202510542e-08, |
|
"loss": 1.4781, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 2.0128865979381443, |
|
"grad_norm": 1.1382534019879336, |
|
"learning_rate": 9.047602980328308e-08, |
|
"loss": 1.4163, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.015463917525773, |
|
"grad_norm": 1.1936874170381253, |
|
"learning_rate": 9.027402680761309e-08, |
|
"loss": 1.5233, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 2.018041237113402, |
|
"grad_norm": 1.133631677446316, |
|
"learning_rate": 9.007206387008053e-08, |
|
"loss": 1.397, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 2.020618556701031, |
|
"grad_norm": 1.2442262218300326, |
|
"learning_rate": 8.987014182250538e-08, |
|
"loss": 1.4734, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.0231958762886597, |
|
"grad_norm": 1.1593473271235548, |
|
"learning_rate": 8.966826149653922e-08, |
|
"loss": 1.5101, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 2.0257731958762886, |
|
"grad_norm": 1.2054412501356118, |
|
"learning_rate": 8.94664237236618e-08, |
|
"loss": 1.4657, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.0283505154639174, |
|
"grad_norm": 1.1696863220137095, |
|
"learning_rate": 8.926462933517765e-08, |
|
"loss": 1.4385, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 2.0309278350515463, |
|
"grad_norm": 1.2085685291526942, |
|
"learning_rate": 8.906287916221257e-08, |
|
"loss": 1.4567, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 2.033505154639175, |
|
"grad_norm": 1.2062684152337084, |
|
"learning_rate": 8.886117403571023e-08, |
|
"loss": 1.4903, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 2.033505154639175, |
|
"eval_loss": 1.4574321508407593, |
|
"eval_runtime": 78.538, |
|
"eval_samples_per_second": 21.174, |
|
"eval_steps_per_second": 1.324, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 2.036082474226804, |
|
"grad_norm": 1.2985823482438499, |
|
"learning_rate": 8.865951478642886e-08, |
|
"loss": 1.4945, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.038659793814433, |
|
"grad_norm": 1.2008208109365806, |
|
"learning_rate": 8.845790224493761e-08, |
|
"loss": 1.4053, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.0412371134020617, |
|
"grad_norm": 1.1173370303783305, |
|
"learning_rate": 8.825633724161334e-08, |
|
"loss": 1.437, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.0438144329896906, |
|
"grad_norm": 1.2675969181316824, |
|
"learning_rate": 8.805482060663712e-08, |
|
"loss": 1.4189, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 2.0463917525773194, |
|
"grad_norm": 1.2147757078811159, |
|
"learning_rate": 8.785335316999077e-08, |
|
"loss": 1.4214, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 2.0489690721649483, |
|
"grad_norm": 1.09453864552264, |
|
"learning_rate": 8.765193576145346e-08, |
|
"loss": 1.4027, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 2.051546391752577, |
|
"grad_norm": 1.216226711944593, |
|
"learning_rate": 8.745056921059839e-08, |
|
"loss": 1.5143, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.054123711340206, |
|
"grad_norm": 1.2055008222540708, |
|
"learning_rate": 8.724925434678922e-08, |
|
"loss": 1.4489, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 2.056701030927835, |
|
"grad_norm": 1.1336500080565066, |
|
"learning_rate": 8.704799199917673e-08, |
|
"loss": 1.4248, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 2.0592783505154637, |
|
"grad_norm": 1.215103376196868, |
|
"learning_rate": 8.684678299669546e-08, |
|
"loss": 1.4463, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 2.0618556701030926, |
|
"grad_norm": 1.1882950937372736, |
|
"learning_rate": 8.664562816806021e-08, |
|
"loss": 1.4444, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 2.0644329896907214, |
|
"grad_norm": 1.2047730105242802, |
|
"learning_rate": 8.64445283417626e-08, |
|
"loss": 1.4514, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.0670103092783503, |
|
"grad_norm": 1.1364686666884227, |
|
"learning_rate": 8.624348434606781e-08, |
|
"loss": 1.4285, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.069587628865979, |
|
"grad_norm": 1.2216577804549105, |
|
"learning_rate": 8.6042497009011e-08, |
|
"loss": 1.5001, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 2.072164948453608, |
|
"grad_norm": 1.167316107588148, |
|
"learning_rate": 8.5841567158394e-08, |
|
"loss": 1.4095, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 2.074742268041237, |
|
"grad_norm": 1.087136320546188, |
|
"learning_rate": 8.564069562178188e-08, |
|
"loss": 1.4547, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 2.0773195876288657, |
|
"grad_norm": 1.10777310102604, |
|
"learning_rate": 8.543988322649954e-08, |
|
"loss": 1.4905, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.0798969072164946, |
|
"grad_norm": 1.2198690834759995, |
|
"learning_rate": 8.523913079962816e-08, |
|
"loss": 1.3988, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 2.0824742268041234, |
|
"grad_norm": 1.2266366218856903, |
|
"learning_rate": 8.50384391680022e-08, |
|
"loss": 1.4972, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.0850515463917523, |
|
"grad_norm": 1.1644015048600025, |
|
"learning_rate": 8.483780915820552e-08, |
|
"loss": 1.4233, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 2.087628865979381, |
|
"grad_norm": 1.1537200560912633, |
|
"learning_rate": 8.463724159656814e-08, |
|
"loss": 1.5044, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 2.09020618556701, |
|
"grad_norm": 1.1190956026619867, |
|
"learning_rate": 8.443673730916312e-08, |
|
"loss": 1.4284, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.092783505154639, |
|
"grad_norm": 1.1476534954615265, |
|
"learning_rate": 8.423629712180264e-08, |
|
"loss": 1.4601, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 2.095360824742268, |
|
"grad_norm": 1.2130889970169285, |
|
"learning_rate": 8.403592186003501e-08, |
|
"loss": 1.3902, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 2.097938144329897, |
|
"grad_norm": 1.2106313562862567, |
|
"learning_rate": 8.383561234914119e-08, |
|
"loss": 1.5202, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.100515463917526, |
|
"grad_norm": 1.2790874195534712, |
|
"learning_rate": 8.36353694141312e-08, |
|
"loss": 1.5241, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 2.1030927835051547, |
|
"grad_norm": 1.1984788041581806, |
|
"learning_rate": 8.34351938797409e-08, |
|
"loss": 1.5185, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.1056701030927836, |
|
"grad_norm": 1.1224530119764298, |
|
"learning_rate": 8.323508657042858e-08, |
|
"loss": 1.4387, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 2.1082474226804124, |
|
"grad_norm": 1.1916193301815299, |
|
"learning_rate": 8.303504831037154e-08, |
|
"loss": 1.433, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 2.1108247422680413, |
|
"grad_norm": 1.269383237065682, |
|
"learning_rate": 8.283507992346263e-08, |
|
"loss": 1.58, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 2.002577319587629, |
|
"grad_norm": 1.0439514094170574, |
|
"learning_rate": 8.263518223330696e-08, |
|
"loss": 1.3774, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.0051546391752577, |
|
"grad_norm": 1.1249347513631904, |
|
"learning_rate": 8.243535606321848e-08, |
|
"loss": 1.4098, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.0077319587628866, |
|
"grad_norm": 1.375007615993654, |
|
"learning_rate": 8.22356022362165e-08, |
|
"loss": 1.4725, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 2.0103092783505154, |
|
"grad_norm": 1.1571951227795978, |
|
"learning_rate": 8.203592157502244e-08, |
|
"loss": 1.4642, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 2.0128865979381443, |
|
"grad_norm": 1.1725964239389173, |
|
"learning_rate": 8.183631490205636e-08, |
|
"loss": 1.4317, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 2.015463917525773, |
|
"grad_norm": 1.1131141063076042, |
|
"learning_rate": 8.163678303943356e-08, |
|
"loss": 1.4534, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 2.018041237113402, |
|
"grad_norm": 1.174599695198473, |
|
"learning_rate": 8.143732680896123e-08, |
|
"loss": 1.4076, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.020618556701031, |
|
"grad_norm": 1.1730868356762598, |
|
"learning_rate": 8.123794703213509e-08, |
|
"loss": 1.457, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 2.0231958762886597, |
|
"grad_norm": 1.194870586046834, |
|
"learning_rate": 8.103864453013592e-08, |
|
"loss": 1.5082, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 2.0257731958762886, |
|
"grad_norm": 1.1351876585089653, |
|
"learning_rate": 8.083942012382625e-08, |
|
"loss": 1.4886, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 2.0283505154639174, |
|
"grad_norm": 1.1792650671176743, |
|
"learning_rate": 8.064027463374701e-08, |
|
"loss": 1.4118, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 2.0309278350515463, |
|
"grad_norm": 1.153547305161426, |
|
"learning_rate": 8.0441208880114e-08, |
|
"loss": 1.4064, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.033505154639175, |
|
"grad_norm": 1.2783578209502229, |
|
"learning_rate": 8.024222368281469e-08, |
|
"loss": 1.4816, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.036082474226804, |
|
"grad_norm": 1.240844307809194, |
|
"learning_rate": 8.004331986140473e-08, |
|
"loss": 1.4598, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 2.038659793814433, |
|
"grad_norm": 1.1295638200937268, |
|
"learning_rate": 7.984449823510467e-08, |
|
"loss": 1.4081, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 2.0412371134020617, |
|
"grad_norm": 1.1888063217054325, |
|
"learning_rate": 7.964575962279634e-08, |
|
"loss": 1.4618, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 2.0438144329896906, |
|
"grad_norm": 1.2357228980469037, |
|
"learning_rate": 7.944710484301995e-08, |
|
"loss": 1.3963, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.0463917525773194, |
|
"grad_norm": 1.0786846944064847, |
|
"learning_rate": 7.92485347139702e-08, |
|
"loss": 1.4514, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 2.0489690721649483, |
|
"grad_norm": 1.1666214344742663, |
|
"learning_rate": 7.90500500534932e-08, |
|
"loss": 1.4389, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.0489690721649483, |
|
"eval_loss": 1.4557408094406128, |
|
"eval_runtime": 78.6008, |
|
"eval_samples_per_second": 21.158, |
|
"eval_steps_per_second": 1.323, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.051546391752577, |
|
"grad_norm": 1.1265923768111081, |
|
"learning_rate": 7.88516516790831e-08, |
|
"loss": 1.4401, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 2.054123711340206, |
|
"grad_norm": 1.2322020489966297, |
|
"learning_rate": 7.865334040787866e-08, |
|
"loss": 1.5326, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.056701030927835, |
|
"grad_norm": 1.1620543990403278, |
|
"learning_rate": 7.845511705665973e-08, |
|
"loss": 1.4151, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.0592783505154637, |
|
"grad_norm": 1.2532645521350043, |
|
"learning_rate": 7.82569824418443e-08, |
|
"loss": 1.485, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 2.0618556701030926, |
|
"grad_norm": 1.2322746000056972, |
|
"learning_rate": 7.805893737948472e-08, |
|
"loss": 1.439, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 2.0644329896907214, |
|
"grad_norm": 1.1992705537386268, |
|
"learning_rate": 7.786098268526446e-08, |
|
"loss": 1.4927, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.0670103092783507, |
|
"grad_norm": 1.219061389377471, |
|
"learning_rate": 7.7663119174495e-08, |
|
"loss": 1.5607, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 2.0695876288659796, |
|
"grad_norm": 1.2161975840628703, |
|
"learning_rate": 7.746534766211206e-08, |
|
"loss": 1.5666, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.0721649484536084, |
|
"grad_norm": 1.296835674200516, |
|
"learning_rate": 7.726766896267253e-08, |
|
"loss": 1.4738, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 2.0747422680412373, |
|
"grad_norm": 1.1913191108570989, |
|
"learning_rate": 7.7070083890351e-08, |
|
"loss": 1.4345, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.077319587628866, |
|
"grad_norm": 1.1697890394016621, |
|
"learning_rate": 7.687259325893654e-08, |
|
"loss": 1.4431, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 2.079896907216495, |
|
"grad_norm": 1.2354727439582665, |
|
"learning_rate": 7.667519788182912e-08, |
|
"loss": 1.4302, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.082474226804124, |
|
"grad_norm": 1.1445036968078774, |
|
"learning_rate": 7.647789857203644e-08, |
|
"loss": 1.4532, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.0850515463917527, |
|
"grad_norm": 1.196595545836434, |
|
"learning_rate": 7.628069614217058e-08, |
|
"loss": 1.3915, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.0876288659793816, |
|
"grad_norm": 1.2451954556034555, |
|
"learning_rate": 7.608359140444453e-08, |
|
"loss": 1.502, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.0902061855670104, |
|
"grad_norm": 1.1198448743060805, |
|
"learning_rate": 7.588658517066892e-08, |
|
"loss": 1.4182, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.0927835051546393, |
|
"grad_norm": 1.178128381993088, |
|
"learning_rate": 7.568967825224875e-08, |
|
"loss": 1.5009, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 2.095360824742268, |
|
"grad_norm": 1.1493716638910112, |
|
"learning_rate": 7.549287146017988e-08, |
|
"loss": 1.4575, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.097938144329897, |
|
"grad_norm": 1.2133662857011498, |
|
"learning_rate": 7.529616560504583e-08, |
|
"loss": 1.5579, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 2.100515463917526, |
|
"grad_norm": 1.3854933572472905, |
|
"learning_rate": 7.509956149701444e-08, |
|
"loss": 1.4113, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.1030927835051547, |
|
"grad_norm": 1.263798951148438, |
|
"learning_rate": 7.490305994583435e-08, |
|
"loss": 1.4258, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 2.1056701030927836, |
|
"grad_norm": 1.1393321990385807, |
|
"learning_rate": 7.470666176083191e-08, |
|
"loss": 1.4943, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.1082474226804124, |
|
"grad_norm": 1.1741861811520338, |
|
"learning_rate": 7.451036775090775e-08, |
|
"loss": 1.3918, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.1108247422680413, |
|
"grad_norm": 1.222621280727268, |
|
"learning_rate": 7.431417872453338e-08, |
|
"loss": 1.513, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.11340206185567, |
|
"grad_norm": 1.1452645437770688, |
|
"learning_rate": 7.411809548974791e-08, |
|
"loss": 1.4496, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 2.115979381443299, |
|
"grad_norm": 1.1804026334318425, |
|
"learning_rate": 7.39221188541548e-08, |
|
"loss": 1.4644, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.118556701030928, |
|
"grad_norm": 1.1527370569507815, |
|
"learning_rate": 7.372624962491841e-08, |
|
"loss": 1.4698, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 2.1211340206185567, |
|
"grad_norm": 1.211563683201349, |
|
"learning_rate": 7.353048860876063e-08, |
|
"loss": 1.4671, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.1237113402061856, |
|
"grad_norm": 1.1550395362954822, |
|
"learning_rate": 7.333483661195792e-08, |
|
"loss": 1.3627, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 2.1262886597938144, |
|
"grad_norm": 1.1772438114561363, |
|
"learning_rate": 7.31392944403375e-08, |
|
"loss": 1.4349, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.1288659793814433, |
|
"grad_norm": 1.1316430782314122, |
|
"learning_rate": 7.294386289927424e-08, |
|
"loss": 1.4892, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 2.131443298969072, |
|
"grad_norm": 1.2166109017309248, |
|
"learning_rate": 7.274854279368758e-08, |
|
"loss": 1.4753, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 2.134020618556701, |
|
"grad_norm": 1.2508664732495605, |
|
"learning_rate": 7.255333492803777e-08, |
|
"loss": 1.3593, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.13659793814433, |
|
"grad_norm": 1.1270294993138392, |
|
"learning_rate": 7.235824010632283e-08, |
|
"loss": 1.5031, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 2.1391752577319587, |
|
"grad_norm": 1.142323203849277, |
|
"learning_rate": 7.216325913207534e-08, |
|
"loss": 1.4747, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 2.1417525773195876, |
|
"grad_norm": 1.198388386752302, |
|
"learning_rate": 7.196839280835875e-08, |
|
"loss": 1.4787, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.1443298969072164, |
|
"grad_norm": 1.288933637399068, |
|
"learning_rate": 7.17736419377644e-08, |
|
"loss": 1.458, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 2.1469072164948453, |
|
"grad_norm": 1.2342213116469787, |
|
"learning_rate": 7.157900732240826e-08, |
|
"loss": 1.4902, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.149484536082474, |
|
"grad_norm": 1.2300130857871707, |
|
"learning_rate": 7.138448976392724e-08, |
|
"loss": 1.4835, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 2.152061855670103, |
|
"grad_norm": 1.169125520832618, |
|
"learning_rate": 7.119009006347624e-08, |
|
"loss": 1.413, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 2.154639175257732, |
|
"grad_norm": 1.1702489758289347, |
|
"learning_rate": 7.09958090217248e-08, |
|
"loss": 1.4857, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 2.1572164948453607, |
|
"grad_norm": 1.179155067994331, |
|
"learning_rate": 7.080164743885362e-08, |
|
"loss": 1.507, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.1597938144329896, |
|
"grad_norm": 1.149588572227629, |
|
"learning_rate": 7.060760611455151e-08, |
|
"loss": 1.3957, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.1623711340206184, |
|
"grad_norm": 1.1269730428089064, |
|
"learning_rate": 7.041368584801186e-08, |
|
"loss": 1.515, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 2.1649484536082473, |
|
"grad_norm": 1.2614734844469966, |
|
"learning_rate": 7.021988743792958e-08, |
|
"loss": 1.4752, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 2.167525773195876, |
|
"grad_norm": 1.26049546725807, |
|
"learning_rate": 7.002621168249758e-08, |
|
"loss": 1.4222, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 2.170103092783505, |
|
"grad_norm": 1.2122490418432295, |
|
"learning_rate": 6.983265937940365e-08, |
|
"loss": 1.5258, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 2.172680412371134, |
|
"grad_norm": 1.163933149699957, |
|
"learning_rate": 6.963923132582715e-08, |
|
"loss": 1.4406, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.1752577319587627, |
|
"grad_norm": 1.2117410126905865, |
|
"learning_rate": 6.944592831843566e-08, |
|
"loss": 1.4541, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 2.1752577319587627, |
|
"eval_loss": 1.4543218612670898, |
|
"eval_runtime": 78.6219, |
|
"eval_samples_per_second": 21.152, |
|
"eval_steps_per_second": 1.323, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 2.1778350515463916, |
|
"grad_norm": 1.2898700377788812, |
|
"learning_rate": 6.925275115338167e-08, |
|
"loss": 1.458, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 2.1804123711340204, |
|
"grad_norm": 1.1426836123172524, |
|
"learning_rate": 6.90597006262995e-08, |
|
"loss": 1.3469, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 2.1829896907216493, |
|
"grad_norm": 1.224441134115869, |
|
"learning_rate": 6.886677753230183e-08, |
|
"loss": 1.4027, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 2.1855670103092786, |
|
"grad_norm": 1.387271519204012, |
|
"learning_rate": 6.867398266597642e-08, |
|
"loss": 1.4359, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.1881443298969074, |
|
"grad_norm": 1.2243550754367374, |
|
"learning_rate": 6.848131682138303e-08, |
|
"loss": 1.4891, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 2.1907216494845363, |
|
"grad_norm": 1.2282484095681934, |
|
"learning_rate": 6.82887807920499e-08, |
|
"loss": 1.4571, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 2.193298969072165, |
|
"grad_norm": 1.252437764569184, |
|
"learning_rate": 6.809637537097061e-08, |
|
"loss": 1.4845, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 2.195876288659794, |
|
"grad_norm": 1.2033826306564712, |
|
"learning_rate": 6.790410135060096e-08, |
|
"loss": 1.3981, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 2.198453608247423, |
|
"grad_norm": 1.2730733273660004, |
|
"learning_rate": 6.77119595228554e-08, |
|
"loss": 1.5428, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.2010309278350517, |
|
"grad_norm": 1.1145258448772917, |
|
"learning_rate": 6.751995067910388e-08, |
|
"loss": 1.4391, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 2.2036082474226806, |
|
"grad_norm": 1.2423736700157595, |
|
"learning_rate": 6.732807561016884e-08, |
|
"loss": 1.3461, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 2.2061855670103094, |
|
"grad_norm": 1.2567446761007774, |
|
"learning_rate": 6.713633510632157e-08, |
|
"loss": 1.4424, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 2.2087628865979383, |
|
"grad_norm": 1.1962904231989222, |
|
"learning_rate": 6.694472995727913e-08, |
|
"loss": 1.5211, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 2.211340206185567, |
|
"grad_norm": 1.2697071279271324, |
|
"learning_rate": 6.675326095220124e-08, |
|
"loss": 1.5138, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.213917525773196, |
|
"grad_norm": 1.1182813975437969, |
|
"learning_rate": 6.656192887968674e-08, |
|
"loss": 1.4643, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 2.216494845360825, |
|
"grad_norm": 1.2209457066901777, |
|
"learning_rate": 6.637073452777051e-08, |
|
"loss": 1.4646, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 2.2190721649484537, |
|
"grad_norm": 1.2364207179496447, |
|
"learning_rate": 6.617967868392035e-08, |
|
"loss": 1.4531, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 2.2216494845360826, |
|
"grad_norm": 1.1596958099892627, |
|
"learning_rate": 6.598876213503339e-08, |
|
"loss": 1.3596, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 2.2242268041237114, |
|
"grad_norm": 1.1861584749981382, |
|
"learning_rate": 6.579798566743313e-08, |
|
"loss": 1.4605, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.2268041237113403, |
|
"grad_norm": 1.2713750509697457, |
|
"learning_rate": 6.560735006686617e-08, |
|
"loss": 1.5169, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 2.229381443298969, |
|
"grad_norm": 1.166290536481266, |
|
"learning_rate": 6.541685611849887e-08, |
|
"loss": 1.4436, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 2.231958762886598, |
|
"grad_norm": 1.1735876550775757, |
|
"learning_rate": 6.522650460691415e-08, |
|
"loss": 1.4548, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 2.234536082474227, |
|
"grad_norm": 1.2477782864575375, |
|
"learning_rate": 6.503629631610836e-08, |
|
"loss": 1.4534, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 2.2371134020618557, |
|
"grad_norm": 1.2173622340437633, |
|
"learning_rate": 6.48462320294879e-08, |
|
"loss": 1.4595, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.2396907216494846, |
|
"grad_norm": 1.1869675634283399, |
|
"learning_rate": 6.465631252986608e-08, |
|
"loss": 1.4451, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 2.2422680412371134, |
|
"grad_norm": 1.1456159400412829, |
|
"learning_rate": 6.446653859945986e-08, |
|
"loss": 1.4064, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 2.2448453608247423, |
|
"grad_norm": 1.2491020198374654, |
|
"learning_rate": 6.427691101988673e-08, |
|
"loss": 1.4949, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 2.247422680412371, |
|
"grad_norm": 1.2282744468510673, |
|
"learning_rate": 6.40874305721613e-08, |
|
"loss": 1.4545, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 1.0996865259394428, |
|
"learning_rate": 6.389809803669226e-08, |
|
"loss": 1.3342, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.252577319587629, |
|
"grad_norm": 1.230550939339635, |
|
"learning_rate": 6.370891419327906e-08, |
|
"loss": 1.5121, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 2.2551546391752577, |
|
"grad_norm": 1.2652568339180974, |
|
"learning_rate": 6.351987982110879e-08, |
|
"loss": 1.5533, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 2.2577319587628866, |
|
"grad_norm": 1.173180731192026, |
|
"learning_rate": 6.333099569875284e-08, |
|
"loss": 1.4439, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 2.2603092783505154, |
|
"grad_norm": 1.1001923514400465, |
|
"learning_rate": 6.314226260416382e-08, |
|
"loss": 1.4376, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 2.2628865979381443, |
|
"grad_norm": 1.1389700541958854, |
|
"learning_rate": 6.295368131467235e-08, |
|
"loss": 1.4357, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.265463917525773, |
|
"grad_norm": 1.1695985290298057, |
|
"learning_rate": 6.276525260698363e-08, |
|
"loss": 1.5309, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 2.268041237113402, |
|
"grad_norm": 1.2012587244050719, |
|
"learning_rate": 6.257697725717468e-08, |
|
"loss": 1.5271, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 2.270618556701031, |
|
"grad_norm": 1.2116419761383141, |
|
"learning_rate": 6.238885604069075e-08, |
|
"loss": 1.4536, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 2.2731958762886597, |
|
"grad_norm": 1.169258658026815, |
|
"learning_rate": 6.220088973234215e-08, |
|
"loss": 1.4662, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 2.2757731958762886, |
|
"grad_norm": 1.1455385835708687, |
|
"learning_rate": 6.201307910630145e-08, |
|
"loss": 1.4339, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.2783505154639174, |
|
"grad_norm": 1.1833257380384377, |
|
"learning_rate": 6.182542493609984e-08, |
|
"loss": 1.3253, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 2.2809278350515463, |
|
"grad_norm": 1.28784815413645, |
|
"learning_rate": 6.163792799462403e-08, |
|
"loss": 1.4603, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 2.283505154639175, |
|
"grad_norm": 1.1970928590978123, |
|
"learning_rate": 6.145058905411342e-08, |
|
"loss": 1.4683, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 2.286082474226804, |
|
"grad_norm": 1.149098853897877, |
|
"learning_rate": 6.126340888615641e-08, |
|
"loss": 1.4729, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 2.288659793814433, |
|
"grad_norm": 1.209952156325127, |
|
"learning_rate": 6.107638826168756e-08, |
|
"loss": 1.5063, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.2912371134020617, |
|
"grad_norm": 1.093427620169618, |
|
"learning_rate": 6.088952795098441e-08, |
|
"loss": 1.4402, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 2.2938144329896906, |
|
"grad_norm": 1.1277798916215127, |
|
"learning_rate": 6.070282872366406e-08, |
|
"loss": 1.5049, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 2.2963917525773194, |
|
"grad_norm": 1.1497157702484186, |
|
"learning_rate": 6.05162913486802e-08, |
|
"loss": 1.4331, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 2.2989690721649483, |
|
"grad_norm": 1.2127687421273623, |
|
"learning_rate": 6.032991659432006e-08, |
|
"loss": 1.464, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 2.301546391752577, |
|
"grad_norm": 1.2091736243527582, |
|
"learning_rate": 6.014370522820084e-08, |
|
"loss": 1.4257, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.301546391752577, |
|
"eval_loss": 1.4530315399169922, |
|
"eval_runtime": 78.4954, |
|
"eval_samples_per_second": 21.186, |
|
"eval_steps_per_second": 1.325, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.304123711340206, |
|
"grad_norm": 1.1621649511934278, |
|
"learning_rate": 5.995765801726698e-08, |
|
"loss": 1.4808, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 2.306701030927835, |
|
"grad_norm": 1.1581272698070357, |
|
"learning_rate": 5.977177572778678e-08, |
|
"loss": 1.3401, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 2.3092783505154637, |
|
"grad_norm": 1.1599391051626198, |
|
"learning_rate": 5.958605912534921e-08, |
|
"loss": 1.4917, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 2.3118556701030926, |
|
"grad_norm": 1.3034698067830743, |
|
"learning_rate": 5.9400508974860885e-08, |
|
"loss": 1.4841, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 2.3144329896907214, |
|
"grad_norm": 1.2060359148709237, |
|
"learning_rate": 5.9215126040542886e-08, |
|
"loss": 1.4479, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.3170103092783503, |
|
"grad_norm": 1.2258119330781094, |
|
"learning_rate": 5.902991108592754e-08, |
|
"loss": 1.4949, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.319587628865979, |
|
"grad_norm": 1.2150702094703367, |
|
"learning_rate": 5.8844864873855296e-08, |
|
"loss": 1.4329, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 2.3221649484536084, |
|
"grad_norm": 1.1354804163624515, |
|
"learning_rate": 5.8659988166471706e-08, |
|
"loss": 1.3683, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 2.3247422680412373, |
|
"grad_norm": 1.1304878710380117, |
|
"learning_rate": 5.847528172522407e-08, |
|
"loss": 1.4345, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 2.327319587628866, |
|
"grad_norm": 1.2388489587800555, |
|
"learning_rate": 5.829074631085852e-08, |
|
"loss": 1.5177, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.329896907216495, |
|
"grad_norm": 1.2418385155763394, |
|
"learning_rate": 5.8106382683416636e-08, |
|
"loss": 1.5666, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 2.332474226804124, |
|
"grad_norm": 1.2067656028810445, |
|
"learning_rate": 5.7922191602232675e-08, |
|
"loss": 1.501, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 2.3350515463917527, |
|
"grad_norm": 1.2443124436097661, |
|
"learning_rate": 5.773817382593007e-08, |
|
"loss": 1.4516, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 2.3376288659793816, |
|
"grad_norm": 1.2589938629670394, |
|
"learning_rate": 5.7554330112418504e-08, |
|
"loss": 1.4955, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 2.3402061855670104, |
|
"grad_norm": 1.1979526509329819, |
|
"learning_rate": 5.737066121889078e-08, |
|
"loss": 1.4224, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.3427835051546393, |
|
"grad_norm": 1.1895398966073056, |
|
"learning_rate": 5.718716790181965e-08, |
|
"loss": 1.4243, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 2.345360824742268, |
|
"grad_norm": 1.1828652518517522, |
|
"learning_rate": 5.70038509169547e-08, |
|
"loss": 1.4559, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 2.347938144329897, |
|
"grad_norm": 1.2201556733969088, |
|
"learning_rate": 5.682071101931936e-08, |
|
"loss": 1.5799, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 2.350515463917526, |
|
"grad_norm": 1.2211801179218442, |
|
"learning_rate": 5.6637748963207566e-08, |
|
"loss": 1.4684, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 2.3530927835051547, |
|
"grad_norm": 1.2453622614111477, |
|
"learning_rate": 5.6454965502180884e-08, |
|
"loss": 1.4854, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.3556701030927836, |
|
"grad_norm": 1.1220592371624576, |
|
"learning_rate": 5.627236138906524e-08, |
|
"loss": 1.5089, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 2.3582474226804124, |
|
"grad_norm": 1.1369675384518176, |
|
"learning_rate": 5.60899373759479e-08, |
|
"loss": 1.4088, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 2.3608247422680413, |
|
"grad_norm": 1.1583531710119257, |
|
"learning_rate": 5.590769421417434e-08, |
|
"loss": 1.4299, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.36340206185567, |
|
"grad_norm": 1.2204630482972216, |
|
"learning_rate": 5.572563265434527e-08, |
|
"loss": 1.421, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 2.365979381443299, |
|
"grad_norm": 1.1654233558024554, |
|
"learning_rate": 5.55437534463133e-08, |
|
"loss": 1.4153, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.368556701030928, |
|
"grad_norm": 1.1255124035829496, |
|
"learning_rate": 5.536205733918007e-08, |
|
"loss": 1.4196, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 2.3711340206185567, |
|
"grad_norm": 1.1998683282168985, |
|
"learning_rate": 5.5180545081293074e-08, |
|
"loss": 1.4067, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 2.3737113402061856, |
|
"grad_norm": 1.2097328179188533, |
|
"learning_rate": 5.4999217420242574e-08, |
|
"loss": 1.4221, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.3762886597938144, |
|
"grad_norm": 1.2465777328454615, |
|
"learning_rate": 5.481807510285852e-08, |
|
"loss": 1.5432, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 2.3788659793814433, |
|
"grad_norm": 1.1017326736009339, |
|
"learning_rate": 5.4637118875207585e-08, |
|
"loss": 1.4498, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.381443298969072, |
|
"grad_norm": 1.1894534742510336, |
|
"learning_rate": 5.445634948258991e-08, |
|
"loss": 1.4779, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 2.384020618556701, |
|
"grad_norm": 1.2240426429209377, |
|
"learning_rate": 5.4275767669536145e-08, |
|
"loss": 1.4643, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.38659793814433, |
|
"grad_norm": 1.1865338401108185, |
|
"learning_rate": 5.4095374179804365e-08, |
|
"loss": 1.4218, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 2.3891752577319587, |
|
"grad_norm": 1.1332962977107732, |
|
"learning_rate": 5.391516975637699e-08, |
|
"loss": 1.4893, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 2.3917525773195876, |
|
"grad_norm": 1.1749099925869624, |
|
"learning_rate": 5.373515514145771e-08, |
|
"loss": 1.4223, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.3943298969072164, |
|
"grad_norm": 1.2704273457918143, |
|
"learning_rate": 5.355533107646858e-08, |
|
"loss": 1.4625, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 2.3969072164948453, |
|
"grad_norm": 1.2661897531951014, |
|
"learning_rate": 5.3375698302046745e-08, |
|
"loss": 1.4886, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 2.399484536082474, |
|
"grad_norm": 1.1604729483093374, |
|
"learning_rate": 5.319625755804138e-08, |
|
"loss": 1.433, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 2.402061855670103, |
|
"grad_norm": 1.1177913422918446, |
|
"learning_rate": 5.301700958351098e-08, |
|
"loss": 1.3745, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 2.404639175257732, |
|
"grad_norm": 1.350758760981664, |
|
"learning_rate": 5.283795511671994e-08, |
|
"loss": 1.5148, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.4072164948453607, |
|
"grad_norm": 1.1721177815291475, |
|
"learning_rate": 5.265909489513567e-08, |
|
"loss": 1.4789, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 2.4097938144329896, |
|
"grad_norm": 1.1121369880829992, |
|
"learning_rate": 5.248042965542558e-08, |
|
"loss": 1.4492, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 2.4123711340206184, |
|
"grad_norm": 1.172764927678444, |
|
"learning_rate": 5.230196013345398e-08, |
|
"loss": 1.495, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 2.4149484536082473, |
|
"grad_norm": 1.2211219953558563, |
|
"learning_rate": 5.212368706427912e-08, |
|
"loss": 1.4839, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.417525773195876, |
|
"grad_norm": 1.2134922811527864, |
|
"learning_rate": 5.194561118215004e-08, |
|
"loss": 1.4247, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.420103092783505, |
|
"grad_norm": 1.1269911256995855, |
|
"learning_rate": 5.176773322050381e-08, |
|
"loss": 1.4484, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 2.422680412371134, |
|
"grad_norm": 1.119051207691081, |
|
"learning_rate": 5.1590053911962127e-08, |
|
"loss": 1.3717, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 2.4252577319587627, |
|
"grad_norm": 1.1877122575741303, |
|
"learning_rate": 5.141257398832862e-08, |
|
"loss": 1.416, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 2.4278350515463916, |
|
"grad_norm": 1.1267435950520672, |
|
"learning_rate": 5.1235294180585674e-08, |
|
"loss": 1.4357, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 2.4278350515463916, |
|
"eval_loss": 1.4520158767700195, |
|
"eval_runtime": 78.5953, |
|
"eval_samples_per_second": 21.159, |
|
"eval_steps_per_second": 1.323, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 2.430412371134021, |
|
"grad_norm": 1.0857318983382882, |
|
"learning_rate": 5.1058215218891464e-08, |
|
"loss": 1.4512, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.4329896907216497, |
|
"grad_norm": 1.155498319174195, |
|
"learning_rate": 5.088133783257693e-08, |
|
"loss": 1.5014, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 2.4355670103092786, |
|
"grad_norm": 1.2379699109090305, |
|
"learning_rate": 5.070466275014287e-08, |
|
"loss": 1.5288, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 2.4381443298969074, |
|
"grad_norm": 1.3260836529994613, |
|
"learning_rate": 5.0528190699256756e-08, |
|
"loss": 1.456, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 2.4407216494845363, |
|
"grad_norm": 1.1737794063785383, |
|
"learning_rate": 5.03519224067499e-08, |
|
"loss": 1.4514, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 2.443298969072165, |
|
"grad_norm": 1.183113595964214, |
|
"learning_rate": 5.0175858598614363e-08, |
|
"loss": 1.4507, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 2.445876288659794, |
|
"grad_norm": 1.1143164931619889, |
|
"learning_rate": 5.000000000000002e-08, |
|
"loss": 1.3849, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 2.448453608247423, |
|
"grad_norm": 1.1724349277334387, |
|
"learning_rate": 4.9824347335211514e-08, |
|
"loss": 1.4424, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 2.4510309278350517, |
|
"grad_norm": 1.1212216527840104, |
|
"learning_rate": 4.964890132770543e-08, |
|
"loss": 1.4082, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 2.4536082474226806, |
|
"grad_norm": 1.1522290603715333, |
|
"learning_rate": 4.947366270008707e-08, |
|
"loss": 1.4314, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 2.4561855670103094, |
|
"grad_norm": 1.1633774724561892, |
|
"learning_rate": 4.929863217410767e-08, |
|
"loss": 1.4865, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.4587628865979383, |
|
"grad_norm": 1.1406335428126368, |
|
"learning_rate": 4.912381047066133e-08, |
|
"loss": 1.4458, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 2.461340206185567, |
|
"grad_norm": 1.1104681920852408, |
|
"learning_rate": 4.894919830978211e-08, |
|
"loss": 1.397, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 2.463917525773196, |
|
"grad_norm": 1.2181204959510732, |
|
"learning_rate": 4.8774796410640983e-08, |
|
"loss": 1.4955, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 2.466494845360825, |
|
"grad_norm": 1.15471572592744, |
|
"learning_rate": 4.860060549154301e-08, |
|
"loss": 1.3996, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 2.4690721649484537, |
|
"grad_norm": 1.19065290512176, |
|
"learning_rate": 4.842662626992426e-08, |
|
"loss": 1.4755, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 2.4716494845360826, |
|
"grad_norm": 1.351223096851913, |
|
"learning_rate": 4.825285946234874e-08, |
|
"loss": 1.4747, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 2.4742268041237114, |
|
"grad_norm": 1.141166837825934, |
|
"learning_rate": 4.807930578450584e-08, |
|
"loss": 1.4063, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 2.4768041237113403, |
|
"grad_norm": 1.1861721992764764, |
|
"learning_rate": 4.7905965951206986e-08, |
|
"loss": 1.4967, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 2.479381443298969, |
|
"grad_norm": 1.2595851597755765, |
|
"learning_rate": 4.773284067638281e-08, |
|
"loss": 1.4877, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 2.481958762886598, |
|
"grad_norm": 1.1088230107238257, |
|
"learning_rate": 4.755993067308047e-08, |
|
"loss": 1.4385, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.484536082474227, |
|
"grad_norm": 1.2852932163080484, |
|
"learning_rate": 4.7387236653460205e-08, |
|
"loss": 1.4141, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 2.4871134020618557, |
|
"grad_norm": 1.244645084527039, |
|
"learning_rate": 4.721475932879282e-08, |
|
"loss": 1.482, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 2.4896907216494846, |
|
"grad_norm": 1.2466688875419663, |
|
"learning_rate": 4.7042499409456695e-08, |
|
"loss": 1.4382, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 2.4922680412371134, |
|
"grad_norm": 1.2462831105011571, |
|
"learning_rate": 4.687045760493468e-08, |
|
"loss": 1.536, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.4948453608247423, |
|
"grad_norm": 1.1482492444378036, |
|
"learning_rate": 4.6698634623811307e-08, |
|
"loss": 1.4406, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.497422680412371, |
|
"grad_norm": 1.1978027196822072, |
|
"learning_rate": 4.652703117376986e-08, |
|
"loss": 1.4288, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 1.205112527214404, |
|
"learning_rate": 4.635564796158945e-08, |
|
"loss": 1.4066, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.502577319587629, |
|
"grad_norm": 1.1958287831198664, |
|
"learning_rate": 4.618448569314206e-08, |
|
"loss": 1.4194, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.5051546391752577, |
|
"grad_norm": 1.0972900424361671, |
|
"learning_rate": 4.60135450733897e-08, |
|
"loss": 1.4838, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 2.5077319587628866, |
|
"grad_norm": 1.2508036239600449, |
|
"learning_rate": 4.584282680638154e-08, |
|
"loss": 1.4443, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.5103092783505154, |
|
"grad_norm": 1.1703232750822017, |
|
"learning_rate": 4.567233159525088e-08, |
|
"loss": 1.434, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 2.5128865979381443, |
|
"grad_norm": 1.1666987794362405, |
|
"learning_rate": 4.550206014221232e-08, |
|
"loss": 1.4857, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 2.515463917525773, |
|
"grad_norm": 1.118899379693407, |
|
"learning_rate": 4.53320131485589e-08, |
|
"loss": 1.4753, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 2.518041237113402, |
|
"grad_norm": 1.2072619010906969, |
|
"learning_rate": 4.516219131465919e-08, |
|
"loss": 1.461, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 2.520618556701031, |
|
"grad_norm": 1.1330825353202136, |
|
"learning_rate": 4.499259533995434e-08, |
|
"loss": 1.3632, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 2.5231958762886597, |
|
"grad_norm": 1.087244159516567, |
|
"learning_rate": 4.48232259229554e-08, |
|
"loss": 1.4907, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 2.5257731958762886, |
|
"grad_norm": 1.113783698087956, |
|
"learning_rate": 4.465408376124016e-08, |
|
"loss": 1.425, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 2.5283505154639174, |
|
"grad_norm": 1.2174392360989843, |
|
"learning_rate": 4.448516955145047e-08, |
|
"loss": 1.5075, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 2.5309278350515463, |
|
"grad_norm": 1.2580642720936182, |
|
"learning_rate": 4.431648398928932e-08, |
|
"loss": 1.4312, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 2.533505154639175, |
|
"grad_norm": 1.2608189792754003, |
|
"learning_rate": 4.414802776951798e-08, |
|
"loss": 1.4614, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.536082474226804, |
|
"grad_norm": 1.1608489532256927, |
|
"learning_rate": 4.3979801585953094e-08, |
|
"loss": 1.4286, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 2.538659793814433, |
|
"grad_norm": 1.241756886612098, |
|
"learning_rate": 4.381180613146395e-08, |
|
"loss": 1.4545, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.5412371134020617, |
|
"grad_norm": 1.1267401284402057, |
|
"learning_rate": 4.364404209796948e-08, |
|
"loss": 1.4289, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 2.5438144329896906, |
|
"grad_norm": 1.1675743288280764, |
|
"learning_rate": 4.347651017643539e-08, |
|
"loss": 1.4545, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 2.5463917525773194, |
|
"grad_norm": 1.1014672234344964, |
|
"learning_rate": 4.3309211056871544e-08, |
|
"loss": 1.4588, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.5489690721649483, |
|
"grad_norm": 1.1537126237371678, |
|
"learning_rate": 4.314214542832888e-08, |
|
"loss": 1.4922, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 2.551546391752577, |
|
"grad_norm": 1.0803879548258355, |
|
"learning_rate": 4.2975313978896644e-08, |
|
"loss": 1.4505, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 2.554123711340206, |
|
"grad_norm": 1.1135211277789598, |
|
"learning_rate": 4.280871739569971e-08, |
|
"loss": 1.4256, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.554123711340206, |
|
"eval_loss": 1.4510596990585327, |
|
"eval_runtime": 78.5321, |
|
"eval_samples_per_second": 21.176, |
|
"eval_steps_per_second": 1.324, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.556701030927835, |
|
"grad_norm": 1.1587956973540048, |
|
"learning_rate": 4.2642356364895414e-08, |
|
"loss": 1.3874, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 2.5592783505154637, |
|
"grad_norm": 1.2208237784983438, |
|
"learning_rate": 4.247623157167102e-08, |
|
"loss": 1.4828, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.5618556701030926, |
|
"grad_norm": 1.1970857349297972, |
|
"learning_rate": 4.231034370024088e-08, |
|
"loss": 1.4412, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 2.5644329896907214, |
|
"grad_norm": 1.1543756364647166, |
|
"learning_rate": 4.214469343384346e-08, |
|
"loss": 1.4448, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 2.5670103092783503, |
|
"grad_norm": 1.125316478876826, |
|
"learning_rate": 4.197928145473856e-08, |
|
"loss": 1.3943, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 2.569587628865979, |
|
"grad_norm": 1.1220973164280506, |
|
"learning_rate": 4.181410844420473e-08, |
|
"loss": 1.4221, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 2.572164948453608, |
|
"grad_norm": 1.1654590544487953, |
|
"learning_rate": 4.164917508253607e-08, |
|
"loss": 1.433, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 2.574742268041237, |
|
"grad_norm": 1.1709294745599472, |
|
"learning_rate": 4.148448204903977e-08, |
|
"loss": 1.3952, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 2.5773195876288657, |
|
"grad_norm": 1.1679647806294131, |
|
"learning_rate": 4.132003002203314e-08, |
|
"loss": 1.4641, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 2.579896907216495, |
|
"grad_norm": 1.3695549935841669, |
|
"learning_rate": 4.115581967884093e-08, |
|
"loss": 1.5259, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 2.582474226804124, |
|
"grad_norm": 1.1307837909317393, |
|
"learning_rate": 4.099185169579241e-08, |
|
"loss": 1.4012, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 2.5850515463917527, |
|
"grad_norm": 1.1501589873026261, |
|
"learning_rate": 4.0828126748218647e-08, |
|
"loss": 1.4582, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.5876288659793816, |
|
"grad_norm": 1.1069474546473044, |
|
"learning_rate": 4.0664645510449745e-08, |
|
"loss": 1.4335, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 2.5902061855670104, |
|
"grad_norm": 1.1910808093335385, |
|
"learning_rate": 4.050140865581204e-08, |
|
"loss": 1.458, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.5927835051546393, |
|
"grad_norm": 1.1210216135242885, |
|
"learning_rate": 4.033841685662529e-08, |
|
"loss": 1.4671, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 2.595360824742268, |
|
"grad_norm": 1.1392325814801574, |
|
"learning_rate": 4.0175670784200066e-08, |
|
"loss": 1.4687, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 2.597938144329897, |
|
"grad_norm": 1.2066331988995807, |
|
"learning_rate": 4.001317110883477e-08, |
|
"loss": 1.6142, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 2.600515463917526, |
|
"grad_norm": 1.120036816028406, |
|
"learning_rate": 3.985091849981297e-08, |
|
"loss": 1.4617, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 2.6030927835051547, |
|
"grad_norm": 1.1171460565708284, |
|
"learning_rate": 3.96889136254007e-08, |
|
"loss": 1.459, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 2.6056701030927836, |
|
"grad_norm": 1.2472238722902789, |
|
"learning_rate": 3.952715715284363e-08, |
|
"loss": 1.5456, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 2.6082474226804124, |
|
"grad_norm": 1.2133346933773341, |
|
"learning_rate": 3.93656497483643e-08, |
|
"loss": 1.5134, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 2.6108247422680413, |
|
"grad_norm": 1.1470733566590117, |
|
"learning_rate": 3.9204392077159544e-08, |
|
"loss": 1.4653, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.61340206185567, |
|
"grad_norm": 1.1608282166724524, |
|
"learning_rate": 3.904338480339755e-08, |
|
"loss": 1.479, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 2.615979381443299, |
|
"grad_norm": 1.1508782189162872, |
|
"learning_rate": 3.888262859021507e-08, |
|
"loss": 1.4025, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 2.618556701030928, |
|
"grad_norm": 1.178209399181694, |
|
"learning_rate": 3.872212409971507e-08, |
|
"loss": 1.2948, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 2.6211340206185567, |
|
"grad_norm": 1.32807190899102, |
|
"learning_rate": 3.856187199296358e-08, |
|
"loss": 1.5456, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 2.6237113402061856, |
|
"grad_norm": 1.2185169437161736, |
|
"learning_rate": 3.8401872929987166e-08, |
|
"loss": 1.429, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 2.6262886597938144, |
|
"grad_norm": 1.2304397213352538, |
|
"learning_rate": 3.824212756977027e-08, |
|
"loss": 1.4558, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 2.6288659793814433, |
|
"grad_norm": 1.1724306586240414, |
|
"learning_rate": 3.8082636570252346e-08, |
|
"loss": 1.4984, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 2.631443298969072, |
|
"grad_norm": 1.1298977167004856, |
|
"learning_rate": 3.7923400588325147e-08, |
|
"loss": 1.4417, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 2.634020618556701, |
|
"grad_norm": 1.1784947581476026, |
|
"learning_rate": 3.7764420279830266e-08, |
|
"loss": 1.4164, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 2.63659793814433, |
|
"grad_norm": 1.155170570736418, |
|
"learning_rate": 3.7605696299556135e-08, |
|
"loss": 1.4371, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.6391752577319587, |
|
"grad_norm": 1.1663523776289366, |
|
"learning_rate": 3.744722930123544e-08, |
|
"loss": 1.4747, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 2.6417525773195876, |
|
"grad_norm": 1.2126168901096435, |
|
"learning_rate": 3.72890199375426e-08, |
|
"loss": 1.5058, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 2.6443298969072164, |
|
"grad_norm": 1.2017176914352923, |
|
"learning_rate": 3.71310688600907e-08, |
|
"loss": 1.4733, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 2.6469072164948453, |
|
"grad_norm": 1.1119469160793427, |
|
"learning_rate": 3.6973376719429125e-08, |
|
"loss": 1.476, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 2.649484536082474, |
|
"grad_norm": 1.130792424586462, |
|
"learning_rate": 3.681594416504088e-08, |
|
"loss": 1.4494, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 2.652061855670103, |
|
"grad_norm": 1.222509795849272, |
|
"learning_rate": 3.6658771845339676e-08, |
|
"loss": 1.4999, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 2.654639175257732, |
|
"grad_norm": 1.1385228914334713, |
|
"learning_rate": 3.650186040766746e-08, |
|
"loss": 1.4402, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 2.6572164948453607, |
|
"grad_norm": 1.1448576075492045, |
|
"learning_rate": 3.634521049829169e-08, |
|
"loss": 1.4132, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 2.6597938144329896, |
|
"grad_norm": 1.139064959062427, |
|
"learning_rate": 3.618882276240267e-08, |
|
"loss": 1.3994, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 2.6623711340206184, |
|
"grad_norm": 1.161606746690635, |
|
"learning_rate": 3.603269784411089e-08, |
|
"loss": 1.4385, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.6649484536082473, |
|
"grad_norm": 1.1300734708150515, |
|
"learning_rate": 3.587683638644437e-08, |
|
"loss": 1.4228, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 2.667525773195876, |
|
"grad_norm": 1.1979334493577922, |
|
"learning_rate": 3.572123903134606e-08, |
|
"loss": 1.3946, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 2.670103092783505, |
|
"grad_norm": 1.2108873546484593, |
|
"learning_rate": 3.556590641967114e-08, |
|
"loss": 1.4019, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 2.6726804123711343, |
|
"grad_norm": 1.252184087003669, |
|
"learning_rate": 3.5410839191184386e-08, |
|
"loss": 1.4863, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 2.675257731958763, |
|
"grad_norm": 1.1268238345165822, |
|
"learning_rate": 3.525603798455753e-08, |
|
"loss": 1.4624, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.677835051546392, |
|
"grad_norm": 1.2410354943951132, |
|
"learning_rate": 3.5101503437366676e-08, |
|
"loss": 1.5426, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 2.680412371134021, |
|
"grad_norm": 1.2054964281688654, |
|
"learning_rate": 3.49472361860896e-08, |
|
"loss": 1.4182, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 2.680412371134021, |
|
"eval_loss": 1.4503966569900513, |
|
"eval_runtime": 78.5776, |
|
"eval_samples_per_second": 21.164, |
|
"eval_steps_per_second": 1.324, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 2.6829896907216497, |
|
"grad_norm": 1.18692856703466, |
|
"learning_rate": 3.4793236866103294e-08, |
|
"loss": 1.5021, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 2.6855670103092786, |
|
"grad_norm": 1.099606075968585, |
|
"learning_rate": 3.463950611168111e-08, |
|
"loss": 1.4051, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 2.6881443298969074, |
|
"grad_norm": 1.1712675559534376, |
|
"learning_rate": 3.448604455599021e-08, |
|
"loss": 1.4565, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.6907216494845363, |
|
"grad_norm": 1.2365327819201322, |
|
"learning_rate": 3.43328528310892e-08, |
|
"loss": 1.4418, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 2.693298969072165, |
|
"grad_norm": 1.1186618547215839, |
|
"learning_rate": 3.4179931567925215e-08, |
|
"loss": 1.4987, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 2.695876288659794, |
|
"grad_norm": 1.2081208242761923, |
|
"learning_rate": 3.402728139633142e-08, |
|
"loss": 1.441, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 2.698453608247423, |
|
"grad_norm": 1.218636962355054, |
|
"learning_rate": 3.387490294502457e-08, |
|
"loss": 1.4067, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 2.7010309278350517, |
|
"grad_norm": 1.1637394002772754, |
|
"learning_rate": 3.372279684160221e-08, |
|
"loss": 1.5326, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 2.7036082474226806, |
|
"grad_norm": 1.2353156557559488, |
|
"learning_rate": 3.357096371254008e-08, |
|
"loss": 1.472, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 2.7061855670103094, |
|
"grad_norm": 1.19587166321243, |
|
"learning_rate": 3.3419404183189813e-08, |
|
"loss": 1.4886, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 2.7087628865979383, |
|
"grad_norm": 1.1730315855085072, |
|
"learning_rate": 3.326811887777606e-08, |
|
"loss": 1.3887, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 2.711340206185567, |
|
"grad_norm": 1.2017905489788439, |
|
"learning_rate": 3.3117108419394036e-08, |
|
"loss": 1.4376, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 2.713917525773196, |
|
"grad_norm": 1.223875153650053, |
|
"learning_rate": 3.2966373430007044e-08, |
|
"loss": 1.4841, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.716494845360825, |
|
"grad_norm": 1.163982928943064, |
|
"learning_rate": 3.2815914530443656e-08, |
|
"loss": 1.5057, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 2.7190721649484537, |
|
"grad_norm": 1.1065194981403395, |
|
"learning_rate": 3.2665732340395413e-08, |
|
"loss": 1.5145, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 2.7216494845360826, |
|
"grad_norm": 1.1802479694554426, |
|
"learning_rate": 3.2515827478414227e-08, |
|
"loss": 1.4639, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 2.7242268041237114, |
|
"grad_norm": 1.1042272626565486, |
|
"learning_rate": 3.236620056190972e-08, |
|
"loss": 1.3944, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 2.7268041237113403, |
|
"grad_norm": 1.2114102979959467, |
|
"learning_rate": 3.221685220714674e-08, |
|
"loss": 1.4298, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 2.729381443298969, |
|
"grad_norm": 1.1393577034048052, |
|
"learning_rate": 3.2067783029242866e-08, |
|
"loss": 1.3856, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 2.731958762886598, |
|
"grad_norm": 1.1037036354008587, |
|
"learning_rate": 3.1918993642165804e-08, |
|
"loss": 1.3889, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 2.734536082474227, |
|
"grad_norm": 1.2272871402765764, |
|
"learning_rate": 3.177048465873089e-08, |
|
"loss": 1.4043, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 2.7371134020618557, |
|
"grad_norm": 1.210586273197648, |
|
"learning_rate": 3.1622256690598633e-08, |
|
"loss": 1.4999, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 2.7396907216494846, |
|
"grad_norm": 1.1746574581016895, |
|
"learning_rate": 3.147431034827208e-08, |
|
"loss": 1.4216, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.7422680412371134, |
|
"grad_norm": 1.1586070909228363, |
|
"learning_rate": 3.1326646241094336e-08, |
|
"loss": 1.4696, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 2.7448453608247423, |
|
"grad_norm": 1.1312629920265729, |
|
"learning_rate": 3.11792649772461e-08, |
|
"loss": 1.5172, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 2.747422680412371, |
|
"grad_norm": 1.181603470826963, |
|
"learning_rate": 3.1032167163743115e-08, |
|
"loss": 1.4453, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 1.1958639955584416, |
|
"learning_rate": 3.0885353406433703e-08, |
|
"loss": 1.5075, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 2.752577319587629, |
|
"grad_norm": 1.200258914978432, |
|
"learning_rate": 3.073882430999619e-08, |
|
"loss": 1.409, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 2.7551546391752577, |
|
"grad_norm": 1.1425311029684388, |
|
"learning_rate": 3.05925804779366e-08, |
|
"loss": 1.4537, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 2.7577319587628866, |
|
"grad_norm": 1.1441189180372324, |
|
"learning_rate": 3.044662251258595e-08, |
|
"loss": 1.567, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 2.7603092783505154, |
|
"grad_norm": 1.1519696479164119, |
|
"learning_rate": 3.030095101509786e-08, |
|
"loss": 1.4678, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 2.7628865979381443, |
|
"grad_norm": 1.2588291000562302, |
|
"learning_rate": 3.0155566585446114e-08, |
|
"loss": 1.5141, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 2.765463917525773, |
|
"grad_norm": 1.1712961770904633, |
|
"learning_rate": 3.0010469822422156e-08, |
|
"loss": 1.4298, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.768041237113402, |
|
"grad_norm": 1.2155090578526457, |
|
"learning_rate": 2.986566132363259e-08, |
|
"loss": 1.5341, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 2.770618556701031, |
|
"grad_norm": 1.1558741286842076, |
|
"learning_rate": 2.972114168549682e-08, |
|
"loss": 1.4089, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 2.7731958762886597, |
|
"grad_norm": 1.281655267971227, |
|
"learning_rate": 2.9576911503244494e-08, |
|
"loss": 1.3596, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 2.7757731958762886, |
|
"grad_norm": 1.1885614767244468, |
|
"learning_rate": 2.9432971370912995e-08, |
|
"loss": 1.4181, |
|
"step": 1164 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1552, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 388, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 305116087320576.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|