|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 11895, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008406893652795292, |
|
"grad_norm": 0.14043055474758148, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.4104, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.016813787305590584, |
|
"grad_norm": 0.2322196364402771, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 2.4058, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.025220680958385876, |
|
"grad_norm": 0.330422580242157, |
|
"learning_rate": 6e-06, |
|
"loss": 2.3956, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03362757461118117, |
|
"grad_norm": 0.37281373143196106, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 2.3817, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.042034468263976464, |
|
"grad_norm": 0.4371340274810791, |
|
"learning_rate": 1e-05, |
|
"loss": 2.2819, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05044136191677175, |
|
"grad_norm": 0.5313137173652649, |
|
"learning_rate": 1.2e-05, |
|
"loss": 2.2971, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05884825556956705, |
|
"grad_norm": 0.5993666648864746, |
|
"learning_rate": 1.4e-05, |
|
"loss": 2.2486, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.06725514922236234, |
|
"grad_norm": 0.885300874710083, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 2.2134, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.07566204287515763, |
|
"grad_norm": 0.7315155267715454, |
|
"learning_rate": 1.8e-05, |
|
"loss": 2.1677, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.08406893652795293, |
|
"grad_norm": 0.6705694794654846, |
|
"learning_rate": 2e-05, |
|
"loss": 2.1677, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.09247583018074822, |
|
"grad_norm": 0.6811093688011169, |
|
"learning_rate": 1.999584295057038e-05, |
|
"loss": 2.1539, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.1008827238335435, |
|
"grad_norm": 0.8088228702545166, |
|
"learning_rate": 1.9983375258493504e-05, |
|
"loss": 2.1099, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.1092896174863388, |
|
"grad_norm": 0.6950477361679077, |
|
"learning_rate": 1.996260728953182e-05, |
|
"loss": 2.0899, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.1176965111391341, |
|
"grad_norm": 0.746992290019989, |
|
"learning_rate": 1.9933556310380036e-05, |
|
"loss": 2.1047, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.12610340479192939, |
|
"grad_norm": 0.9399320483207703, |
|
"learning_rate": 1.9896246474309414e-05, |
|
"loss": 2.0344, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.13451029844472467, |
|
"grad_norm": 0.8537988066673279, |
|
"learning_rate": 1.9850708801086507e-05, |
|
"loss": 2.0576, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.14291719209751996, |
|
"grad_norm": 0.8270643949508667, |
|
"learning_rate": 1.9796981151183013e-05, |
|
"loss": 2.019, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.15132408575031525, |
|
"grad_norm": 1.1590845584869385, |
|
"learning_rate": 1.9735108194298206e-05, |
|
"loss": 2.0348, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.15973097940311054, |
|
"grad_norm": 0.8914215564727783, |
|
"learning_rate": 1.9665141372220112e-05, |
|
"loss": 2.0314, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.16813787305590586, |
|
"grad_norm": 0.7579370141029358, |
|
"learning_rate": 1.9587138856056303e-05, |
|
"loss": 2.0636, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.17654476670870115, |
|
"grad_norm": 0.9895981550216675, |
|
"learning_rate": 1.9501165497869832e-05, |
|
"loss": 1.9961, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.18495166036149643, |
|
"grad_norm": 0.9374507069587708, |
|
"learning_rate": 1.940729277676063e-05, |
|
"loss": 1.9923, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.19335855401429172, |
|
"grad_norm": 0.9945927858352661, |
|
"learning_rate": 1.930559873943704e-05, |
|
"loss": 1.9944, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.201765447667087, |
|
"grad_norm": 0.9538114070892334, |
|
"learning_rate": 1.9196167935327036e-05, |
|
"loss": 2.036, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.2101723413198823, |
|
"grad_norm": 0.780885636806488, |
|
"learning_rate": 1.9079091346282977e-05, |
|
"loss": 1.9894, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.2185792349726776, |
|
"grad_norm": 1.2008519172668457, |
|
"learning_rate": 1.8954466310938405e-05, |
|
"loss": 1.9837, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.22698612862547288, |
|
"grad_norm": 1.089125394821167, |
|
"learning_rate": 1.8822396443779745e-05, |
|
"loss": 2.016, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.2353930222782682, |
|
"grad_norm": 1.087217092514038, |
|
"learning_rate": 1.868299154900018e-05, |
|
"loss": 2.0407, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.24379991593106348, |
|
"grad_norm": 0.9000464677810669, |
|
"learning_rate": 1.8536367529207375e-05, |
|
"loss": 1.9954, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.25220680958385877, |
|
"grad_norm": 0.7694302201271057, |
|
"learning_rate": 1.83826462890609e-05, |
|
"loss": 1.9973, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.26061370323665406, |
|
"grad_norm": 0.8629293441772461, |
|
"learning_rate": 1.8221955633919495e-05, |
|
"loss": 1.9994, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.26902059688944935, |
|
"grad_norm": 0.839648962020874, |
|
"learning_rate": 1.8054429163582415e-05, |
|
"loss": 2.003, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.27742749054224464, |
|
"grad_norm": 0.8285431861877441, |
|
"learning_rate": 1.7880206161213255e-05, |
|
"loss": 1.9925, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.2858343841950399, |
|
"grad_norm": 0.837740421295166, |
|
"learning_rate": 1.7699431477538534e-05, |
|
"loss": 1.9801, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.2942412778478352, |
|
"grad_norm": 0.8980201482772827, |
|
"learning_rate": 1.7512255410417392e-05, |
|
"loss": 1.9764, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.3026481715006305, |
|
"grad_norm": 1.3097134828567505, |
|
"learning_rate": 1.731883357988244e-05, |
|
"loss": 1.9771, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.3110550651534258, |
|
"grad_norm": 0.7765768766403198, |
|
"learning_rate": 1.7119326798755734e-05, |
|
"loss": 1.9589, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.3194619588062211, |
|
"grad_norm": 1.0500297546386719, |
|
"learning_rate": 1.6913900938947417e-05, |
|
"loss": 1.9478, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.32786885245901637, |
|
"grad_norm": 0.9133660793304443, |
|
"learning_rate": 1.6702726793548153e-05, |
|
"loss": 1.9782, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.3362757461118117, |
|
"grad_norm": 0.9790305495262146, |
|
"learning_rate": 1.6485979934830084e-05, |
|
"loss": 1.9712, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.344682639764607, |
|
"grad_norm": 1.1819267272949219, |
|
"learning_rate": 1.626384056827429e-05, |
|
"loss": 1.9597, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.3530895334174023, |
|
"grad_norm": 1.196151852607727, |
|
"learning_rate": 1.6036493382746178e-05, |
|
"loss": 1.9571, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.3614964270701976, |
|
"grad_norm": 0.8918384909629822, |
|
"learning_rate": 1.580412739694333e-05, |
|
"loss": 1.9769, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.36990332072299287, |
|
"grad_norm": 2.0516951084136963, |
|
"learning_rate": 1.5566935802243496e-05, |
|
"loss": 1.9757, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.37831021437578816, |
|
"grad_norm": 0.9879031777381897, |
|
"learning_rate": 1.5325115802083373e-05, |
|
"loss": 1.9893, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.38671710802858345, |
|
"grad_norm": 0.6235149502754211, |
|
"learning_rate": 1.5078868448001704e-05, |
|
"loss": 2.008, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.39512400168137873, |
|
"grad_norm": 0.9613019824028015, |
|
"learning_rate": 1.4828398472483057e-05, |
|
"loss": 1.9669, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.403530895334174, |
|
"grad_norm": 1.0296316146850586, |
|
"learning_rate": 1.4573914118741201e-05, |
|
"loss": 1.8964, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.4119377889869693, |
|
"grad_norm": 0.8810610771179199, |
|
"learning_rate": 1.4315626967583657e-05, |
|
"loss": 1.9226, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.4203446826397646, |
|
"grad_norm": 0.9355632066726685, |
|
"learning_rate": 1.40537517615013e-05, |
|
"loss": 1.9678, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.4287515762925599, |
|
"grad_norm": 0.7143288850784302, |
|
"learning_rate": 1.3788506226129348e-05, |
|
"loss": 1.9328, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.4371584699453552, |
|
"grad_norm": 0.987920880317688, |
|
"learning_rate": 1.3520110889228104e-05, |
|
"loss": 1.8968, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.44556536359815047, |
|
"grad_norm": 0.8306659460067749, |
|
"learning_rate": 1.3248788897334005e-05, |
|
"loss": 1.9294, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.45397225725094575, |
|
"grad_norm": 1.0377694368362427, |
|
"learning_rate": 1.2974765830233383e-05, |
|
"loss": 1.9835, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.46237915090374104, |
|
"grad_norm": 0.9360339641571045, |
|
"learning_rate": 1.269826951341319e-05, |
|
"loss": 1.9121, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.4707860445565364, |
|
"grad_norm": 1.0214323997497559, |
|
"learning_rate": 1.2419529828644661e-05, |
|
"loss": 1.9133, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.4791929382093317, |
|
"grad_norm": 1.1800683736801147, |
|
"learning_rate": 1.2138778522857307e-05, |
|
"loss": 1.934, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.48759983186212696, |
|
"grad_norm": 1.0064023733139038, |
|
"learning_rate": 1.1856249015462242e-05, |
|
"loss": 1.9665, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.49600672551492225, |
|
"grad_norm": 1.3952449560165405, |
|
"learning_rate": 1.1572176204284986e-05, |
|
"loss": 1.9426, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.5044136191677175, |
|
"grad_norm": 1.0473861694335938, |
|
"learning_rate": 1.1286796270269076e-05, |
|
"loss": 1.9559, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 1.101931095123291, |
|
"learning_rate": 1.1000346481112887e-05, |
|
"loss": 1.9103, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.5212274064733081, |
|
"grad_norm": 1.0466363430023193, |
|
"learning_rate": 1.0713064994002956e-05, |
|
"loss": 1.9395, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.5296343001261034, |
|
"grad_norm": 0.9257377982139587, |
|
"learning_rate": 1.0425190657607702e-05, |
|
"loss": 1.9141, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.5380411937788987, |
|
"grad_norm": 0.779354453086853, |
|
"learning_rate": 1.0136962813496306e-05, |
|
"loss": 1.9647, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.546448087431694, |
|
"grad_norm": 0.8354730606079102, |
|
"learning_rate": 9.848621097147772e-06, |
|
"loss": 2.0283, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.5548549810844893, |
|
"grad_norm": 1.0222142934799194, |
|
"learning_rate": 9.560405238715588e-06, |
|
"loss": 1.9444, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.5632618747372846, |
|
"grad_norm": 1.3046057224273682, |
|
"learning_rate": 9.272554863713739e-06, |
|
"loss": 1.9555, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.5716687683900799, |
|
"grad_norm": 0.8095905184745789, |
|
"learning_rate": 8.985309293789662e-06, |
|
"loss": 1.9761, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.5800756620428752, |
|
"grad_norm": 1.0805299282073975, |
|
"learning_rate": 8.698907347749885e-06, |
|
"loss": 1.9043, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.5884825556956704, |
|
"grad_norm": 1.3454480171203613, |
|
"learning_rate": 8.413587143003691e-06, |
|
"loss": 1.902, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.5968894493484658, |
|
"grad_norm": 1.1316404342651367, |
|
"learning_rate": 8.129585897589964e-06, |
|
"loss": 1.942, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.605296343001261, |
|
"grad_norm": 0.9173849821090698, |
|
"learning_rate": 7.847139732951751e-06, |
|
"loss": 1.9336, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.6137032366540563, |
|
"grad_norm": 0.8359085321426392, |
|
"learning_rate": 7.566483477622577e-06, |
|
"loss": 1.9584, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.6221101303068516, |
|
"grad_norm": 1.6915035247802734, |
|
"learning_rate": 7.28785047198767e-06, |
|
"loss": 1.9113, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.6305170239596469, |
|
"grad_norm": 0.8478715419769287, |
|
"learning_rate": 7.011472374282457e-06, |
|
"loss": 1.9362, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.6389239176124422, |
|
"grad_norm": 1.5024291276931763, |
|
"learning_rate": 6.737578967989626e-06, |
|
"loss": 1.9636, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.6473308112652375, |
|
"grad_norm": 0.7754480242729187, |
|
"learning_rate": 6.466397970794855e-06, |
|
"loss": 1.9107, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.6557377049180327, |
|
"grad_norm": 0.7962978482246399, |
|
"learning_rate": 6.198154845260089e-06, |
|
"loss": 1.8819, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.6641445985708281, |
|
"grad_norm": 0.9278832077980042, |
|
"learning_rate": 5.933072611371725e-06, |
|
"loss": 1.9741, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.6725514922236234, |
|
"grad_norm": 1.244887351989746, |
|
"learning_rate": 5.671371661119609e-06, |
|
"loss": 2.0055, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.6809583858764187, |
|
"grad_norm": 1.1086173057556152, |
|
"learning_rate": 5.415832074569403e-06, |
|
"loss": 1.8962, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.689365279529214, |
|
"grad_norm": 1.0748802423477173, |
|
"learning_rate": 5.161504255249876e-06, |
|
"loss": 1.9434, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.6977721731820092, |
|
"grad_norm": 0.7687519788742065, |
|
"learning_rate": 4.911199209125536e-06, |
|
"loss": 1.95, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.7061790668348046, |
|
"grad_norm": 0.9328936338424683, |
|
"learning_rate": 4.665125042286226e-06, |
|
"loss": 1.9271, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.7145859604875998, |
|
"grad_norm": 1.0116279125213623, |
|
"learning_rate": 4.423486343226934e-06, |
|
"loss": 1.8916, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.7229928541403952, |
|
"grad_norm": 0.9574350714683533, |
|
"learning_rate": 4.186484012750876e-06, |
|
"loss": 2.0135, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.7313997477931904, |
|
"grad_norm": 1.5315290689468384, |
|
"learning_rate": 3.954315096938598e-06, |
|
"loss": 1.9159, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.7398066414459857, |
|
"grad_norm": 1.2323825359344482, |
|
"learning_rate": 3.7271726233219098e-06, |
|
"loss": 1.9175, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.748213535098781, |
|
"grad_norm": 0.7751179933547974, |
|
"learning_rate": 3.50524544039889e-06, |
|
"loss": 1.9278, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.7566204287515763, |
|
"grad_norm": 1.096082329750061, |
|
"learning_rate": 3.288718060623376e-06, |
|
"loss": 1.9253, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.7650273224043715, |
|
"grad_norm": 0.9929354786872864, |
|
"learning_rate": 3.077770506999487e-06, |
|
"loss": 1.9252, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.7734342160571669, |
|
"grad_norm": 0.8709311485290527, |
|
"learning_rate": 2.872578163408717e-06, |
|
"loss": 1.9219, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.7818411097099621, |
|
"grad_norm": 1.3237998485565186, |
|
"learning_rate": 2.673311628794043e-06, |
|
"loss": 1.9318, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.7902480033627575, |
|
"grad_norm": 1.3490018844604492, |
|
"learning_rate": 2.4801365753222795e-06, |
|
"loss": 1.9017, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.7986548970155527, |
|
"grad_norm": 1.5231995582580566, |
|
"learning_rate": 2.293213610642594e-06, |
|
"loss": 1.8539, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.807061790668348, |
|
"grad_norm": 0.9112081527709961, |
|
"learning_rate": 2.1126981443557303e-06, |
|
"loss": 1.9095, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.8154686843211434, |
|
"grad_norm": 1.3989226818084717, |
|
"learning_rate": 1.9387402588049167e-06, |
|
"loss": 1.9411, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.8238755779739386, |
|
"grad_norm": 0.9119231104850769, |
|
"learning_rate": 1.7714845842959415e-06, |
|
"loss": 1.8862, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.832282471626734, |
|
"grad_norm": 1.1409211158752441, |
|
"learning_rate": 1.6110701788500682e-06, |
|
"loss": 1.897, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.8406893652795292, |
|
"grad_norm": 1.3277921676635742, |
|
"learning_rate": 1.4576304125898234e-06, |
|
"loss": 1.9049, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.8490962589323245, |
|
"grad_norm": 1.3236161470413208, |
|
"learning_rate": 1.311292856853772e-06, |
|
"loss": 1.8771, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.8575031525851198, |
|
"grad_norm": 1.0454002618789673, |
|
"learning_rate": 1.1721791781324343e-06, |
|
"loss": 1.9074, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.8659100462379151, |
|
"grad_norm": 1.196260929107666, |
|
"learning_rate": 1.0404050369135698e-06, |
|
"loss": 1.9241, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.8743169398907104, |
|
"grad_norm": 1.2088180780410767, |
|
"learning_rate": 9.160799915208962e-07, |
|
"loss": 1.9161, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.8827238335435057, |
|
"grad_norm": 1.2079304456710815, |
|
"learning_rate": 7.993074070262185e-07, |
|
"loss": 1.9722, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.8911307271963009, |
|
"grad_norm": 0.9400559067726135, |
|
"learning_rate": 6.901843693106935e-07, |
|
"loss": 1.972, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.8995376208490963, |
|
"grad_norm": 1.1368870735168457, |
|
"learning_rate": 5.888016043466583e-07, |
|
"loss": 1.9051, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.9079445145018915, |
|
"grad_norm": 1.0495091676712036, |
|
"learning_rate": 4.952434027671659e-07, |
|
"loss": 1.8549, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.9163514081546869, |
|
"grad_norm": 0.832502007484436, |
|
"learning_rate": 4.095875497859192e-07, |
|
"loss": 1.9349, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.9247583018074821, |
|
"grad_norm": 1.295791745185852, |
|
"learning_rate": 3.3190526052587545e-07, |
|
"loss": 1.9073, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.9331651954602774, |
|
"grad_norm": 0.9404931664466858, |
|
"learning_rate": 2.6226112081028544e-07, |
|
"loss": 1.9349, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.9415720891130728, |
|
"grad_norm": 0.8282467722892761, |
|
"learning_rate": 2.0071303346540505e-07, |
|
"loss": 1.9868, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.949978982765868, |
|
"grad_norm": 1.1069015264511108, |
|
"learning_rate": 1.473121701795144e-07, |
|
"loss": 1.8612, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.9583858764186634, |
|
"grad_norm": 1.072883129119873, |
|
"learning_rate": 1.0210292895826423e-07, |
|
"loss": 1.8956, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.9667927700714586, |
|
"grad_norm": 1.38330078125, |
|
"learning_rate": 6.512289721174126e-08, |
|
"loss": 1.9686, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.9751996637242539, |
|
"grad_norm": 1.1821762323379517, |
|
"learning_rate": 3.640282050392241e-08, |
|
"loss": 1.9082, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.9836065573770492, |
|
"grad_norm": 1.2466627359390259, |
|
"learning_rate": 1.5966576990507078e-08, |
|
"loss": 1.896, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.9920134510298445, |
|
"grad_norm": 1.2905254364013672, |
|
"learning_rate": 3.83115756638297e-09, |
|
"loss": 1.9837, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 11895, |
|
"total_flos": 2.17890078425088e+17, |
|
"train_loss": 1.9851969016204616, |
|
"train_runtime": 3196.2319, |
|
"train_samples_per_second": 7.443, |
|
"train_steps_per_second": 3.722 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 11895, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.17890078425088e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|