|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.4825796886582654, |
|
"eval_steps": 500, |
|
"global_step": 1500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.2311009168624878, |
|
"learning_rate": 5.6012058970266934e-05, |
|
"loss": 1.6789, |
|
"max_memory_allocated (GB)": 91.88, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 10, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.4872009754180908, |
|
"learning_rate": 7.287336883921704e-05, |
|
"loss": 1.3884, |
|
"max_memory_allocated (GB)": 91.88, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 20, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.7868104577064514, |
|
"learning_rate": 8.273660282559241e-05, |
|
"loss": 1.2404, |
|
"max_memory_allocated (GB)": 91.88, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 30, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.35713163018226624, |
|
"learning_rate": 8.973467870816715e-05, |
|
"loss": 1.2036, |
|
"max_memory_allocated (GB)": 91.88, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 40, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3057297468185425, |
|
"learning_rate": 9.516280807158375e-05, |
|
"loss": 1.1788, |
|
"max_memory_allocated (GB)": 91.88, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 50, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.3608625531196594, |
|
"learning_rate": 9.959791269454252e-05, |
|
"loss": 1.1525, |
|
"max_memory_allocated (GB)": 91.88, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 60, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3684042990207672, |
|
"learning_rate": 9.959204487506375e-05, |
|
"loss": 1.1261, |
|
"max_memory_allocated (GB)": 91.88, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 70, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4175470471382141, |
|
"learning_rate": 9.908210096889343e-05, |
|
"loss": 1.1214, |
|
"max_memory_allocated (GB)": 91.88, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 80, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4869045317173004, |
|
"learning_rate": 9.85721570627231e-05, |
|
"loss": 1.1043, |
|
"max_memory_allocated (GB)": 91.88, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 90, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.6399329900741577, |
|
"learning_rate": 9.806221315655279e-05, |
|
"loss": 1.1059, |
|
"max_memory_allocated (GB)": 91.88, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 100, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5639649033546448, |
|
"learning_rate": 9.755226925038246e-05, |
|
"loss": 1.0711, |
|
"max_memory_allocated (GB)": 91.91, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 110, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.44946518540382385, |
|
"learning_rate": 9.704232534421214e-05, |
|
"loss": 1.0644, |
|
"max_memory_allocated (GB)": 91.91, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 120, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.5573060512542725, |
|
"learning_rate": 9.653238143804181e-05, |
|
"loss": 1.0634, |
|
"max_memory_allocated (GB)": 91.92, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 130, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.6105266213417053, |
|
"learning_rate": 9.60224375318715e-05, |
|
"loss": 1.0597, |
|
"max_memory_allocated (GB)": 91.92, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.6286391615867615, |
|
"learning_rate": 9.551249362570118e-05, |
|
"loss": 1.0528, |
|
"max_memory_allocated (GB)": 91.92, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 150, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.8733624815940857, |
|
"learning_rate": 9.500254971953085e-05, |
|
"loss": 1.0524, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 160, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.6268635392189026, |
|
"learning_rate": 9.449260581336054e-05, |
|
"loss": 1.0345, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 170, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.5832647681236267, |
|
"learning_rate": 9.398266190719021e-05, |
|
"loss": 1.0262, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 180, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.6518144011497498, |
|
"learning_rate": 9.347271800101989e-05, |
|
"loss": 1.0318, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 190, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.49274373054504395, |
|
"learning_rate": 9.296277409484956e-05, |
|
"loss": 1.0298, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 200, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.5237769484519958, |
|
"learning_rate": 9.245283018867925e-05, |
|
"loss": 1.0176, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 210, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.564319372177124, |
|
"learning_rate": 9.194288628250894e-05, |
|
"loss": 1.0172, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 220, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.4697343111038208, |
|
"learning_rate": 9.14329423763386e-05, |
|
"loss": 1.0262, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 230, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.5207454562187195, |
|
"learning_rate": 9.092299847016829e-05, |
|
"loss": 1.024, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 240, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.4637609124183655, |
|
"learning_rate": 9.041305456399796e-05, |
|
"loss": 1.0069, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 250, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.47436627745628357, |
|
"learning_rate": 8.990311065782764e-05, |
|
"loss": 1.0119, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 260, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.5096576809883118, |
|
"learning_rate": 8.939316675165733e-05, |
|
"loss": 1.0092, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 270, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.5780492424964905, |
|
"learning_rate": 8.8883222845487e-05, |
|
"loss": 1.0082, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 280, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.4528846740722656, |
|
"learning_rate": 8.837327893931669e-05, |
|
"loss": 1.0057, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 290, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.5259899497032166, |
|
"learning_rate": 8.786333503314635e-05, |
|
"loss": 0.9989, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 300, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.5007658004760742, |
|
"learning_rate": 8.735339112697604e-05, |
|
"loss": 0.9997, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 310, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.4527484178543091, |
|
"learning_rate": 8.684344722080571e-05, |
|
"loss": 0.9949, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 320, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.4365575611591339, |
|
"learning_rate": 8.633350331463539e-05, |
|
"loss": 0.9943, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 330, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.49428853392601013, |
|
"learning_rate": 8.582355940846507e-05, |
|
"loss": 1.0005, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 340, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.4611368775367737, |
|
"learning_rate": 8.531361550229475e-05, |
|
"loss": 0.9972, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 350, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.43548157811164856, |
|
"learning_rate": 8.480367159612444e-05, |
|
"loss": 0.9833, |
|
"max_memory_allocated (GB)": 91.93, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 360, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.4797479808330536, |
|
"learning_rate": 8.42937276899541e-05, |
|
"loss": 0.981, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 370, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.44958415627479553, |
|
"learning_rate": 8.378378378378379e-05, |
|
"loss": 0.9969, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 380, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.4499351680278778, |
|
"learning_rate": 8.327383987761347e-05, |
|
"loss": 0.9847, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 390, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.45021358132362366, |
|
"learning_rate": 8.276389597144315e-05, |
|
"loss": 0.9874, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 400, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.4754478335380554, |
|
"learning_rate": 8.225395206527282e-05, |
|
"loss": 0.9955, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 410, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.44393980503082275, |
|
"learning_rate": 8.17440081591025e-05, |
|
"loss": 0.9898, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 420, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.43429532647132874, |
|
"learning_rate": 8.123406425293219e-05, |
|
"loss": 0.9905, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 430, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.4695710837841034, |
|
"learning_rate": 8.072412034676186e-05, |
|
"loss": 0.9702, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 440, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.40997833013534546, |
|
"learning_rate": 8.021417644059154e-05, |
|
"loss": 0.9825, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 450, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.4330343008041382, |
|
"learning_rate": 7.970423253442122e-05, |
|
"loss": 0.9777, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 460, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.42674386501312256, |
|
"learning_rate": 7.91942886282509e-05, |
|
"loss": 0.9794, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 470, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.4461188316345215, |
|
"learning_rate": 7.868434472208057e-05, |
|
"loss": 0.979, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 480, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.4532679617404938, |
|
"learning_rate": 7.817440081591025e-05, |
|
"loss": 0.9764, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 490, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.42160096764564514, |
|
"learning_rate": 7.766445690973994e-05, |
|
"loss": 0.967, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.45428088307380676, |
|
"learning_rate": 7.715451300356961e-05, |
|
"loss": 0.975, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 510, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.3874836564064026, |
|
"learning_rate": 7.664456909739929e-05, |
|
"loss": 0.9707, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 520, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.4256057143211365, |
|
"learning_rate": 7.613462519122897e-05, |
|
"loss": 0.9775, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 530, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.3986164331436157, |
|
"learning_rate": 7.562468128505865e-05, |
|
"loss": 0.972, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 540, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.4044497311115265, |
|
"learning_rate": 7.511473737888832e-05, |
|
"loss": 0.9725, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 550, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.439773291349411, |
|
"learning_rate": 7.460479347271801e-05, |
|
"loss": 0.9667, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 560, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.42315754294395447, |
|
"learning_rate": 7.409484956654769e-05, |
|
"loss": 0.9714, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 570, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.38059332966804504, |
|
"learning_rate": 7.358490566037736e-05, |
|
"loss": 0.979, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 580, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.4393787086009979, |
|
"learning_rate": 7.307496175420703e-05, |
|
"loss": 0.9663, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 590, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.4336337447166443, |
|
"learning_rate": 7.256501784803672e-05, |
|
"loss": 0.9661, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 600, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.41273126006126404, |
|
"learning_rate": 7.20550739418664e-05, |
|
"loss": 0.9672, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 610, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.45420095324516296, |
|
"learning_rate": 7.154513003569607e-05, |
|
"loss": 0.9668, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 620, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.4193101227283478, |
|
"learning_rate": 7.103518612952576e-05, |
|
"loss": 0.9684, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 630, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.4247801601886749, |
|
"learning_rate": 7.052524222335543e-05, |
|
"loss": 0.9652, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 640, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.5118327140808105, |
|
"learning_rate": 7.001529831718512e-05, |
|
"loss": 0.9777, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 650, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.42659929394721985, |
|
"learning_rate": 6.950535441101478e-05, |
|
"loss": 0.9624, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 660, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.45405977964401245, |
|
"learning_rate": 6.899541050484447e-05, |
|
"loss": 0.9585, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 670, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.40387892723083496, |
|
"learning_rate": 6.848546659867415e-05, |
|
"loss": 0.9633, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 680, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.39345934987068176, |
|
"learning_rate": 6.797552269250382e-05, |
|
"loss": 0.9788, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 690, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.4187304675579071, |
|
"learning_rate": 6.746557878633351e-05, |
|
"loss": 0.9651, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 700, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.4518466293811798, |
|
"learning_rate": 6.695563488016318e-05, |
|
"loss": 0.965, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 710, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.4474211037158966, |
|
"learning_rate": 6.644569097399287e-05, |
|
"loss": 0.9714, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 720, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.38997548818588257, |
|
"learning_rate": 6.593574706782255e-05, |
|
"loss": 0.9528, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 730, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.42444851994514465, |
|
"learning_rate": 6.542580316165222e-05, |
|
"loss": 0.9638, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 740, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.4098523259162903, |
|
"learning_rate": 6.491585925548191e-05, |
|
"loss": 0.9534, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 750, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.42875897884368896, |
|
"learning_rate": 6.440591534931157e-05, |
|
"loss": 0.9631, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 760, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.4514971077442169, |
|
"learning_rate": 6.389597144314126e-05, |
|
"loss": 0.9623, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 770, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.43437400460243225, |
|
"learning_rate": 6.338602753697093e-05, |
|
"loss": 0.9657, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 780, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.40089166164398193, |
|
"learning_rate": 6.287608363080062e-05, |
|
"loss": 0.9607, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 790, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.3970320224761963, |
|
"learning_rate": 6.23661397246303e-05, |
|
"loss": 0.9623, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 800, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.5439819693565369, |
|
"learning_rate": 6.185619581845997e-05, |
|
"loss": 0.9585, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 810, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.37487202882766724, |
|
"learning_rate": 6.134625191228966e-05, |
|
"loss": 0.9509, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 820, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.42697498202323914, |
|
"learning_rate": 6.0836308006119326e-05, |
|
"loss": 0.9525, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 830, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.38735342025756836, |
|
"learning_rate": 6.032636409994901e-05, |
|
"loss": 0.954, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 840, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.4077759087085724, |
|
"learning_rate": 5.981642019377869e-05, |
|
"loss": 0.9528, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 850, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.38065391778945923, |
|
"learning_rate": 5.930647628760837e-05, |
|
"loss": 0.9589, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 860, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.3892410695552826, |
|
"learning_rate": 5.879653238143804e-05, |
|
"loss": 0.9538, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 870, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.3951966166496277, |
|
"learning_rate": 5.8286588475267726e-05, |
|
"loss": 0.9518, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 880, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.5943160057067871, |
|
"learning_rate": 5.777664456909741e-05, |
|
"loss": 0.9437, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 890, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.440020889043808, |
|
"learning_rate": 5.7266700662927075e-05, |
|
"loss": 0.9635, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 900, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.44286438822746277, |
|
"learning_rate": 5.6756756756756757e-05, |
|
"loss": 0.9511, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 910, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.4247153699398041, |
|
"learning_rate": 5.624681285058644e-05, |
|
"loss": 0.9475, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 920, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.42017248272895813, |
|
"learning_rate": 5.573686894441612e-05, |
|
"loss": 0.9436, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 930, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.4182833731174469, |
|
"learning_rate": 5.5226925038245794e-05, |
|
"loss": 0.9557, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 940, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.41079315543174744, |
|
"learning_rate": 5.4716981132075475e-05, |
|
"loss": 0.9585, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 950, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.3954004943370819, |
|
"learning_rate": 5.4207037225905157e-05, |
|
"loss": 0.9478, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 960, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.39996138215065, |
|
"learning_rate": 5.369709331973484e-05, |
|
"loss": 0.947, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 970, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.39325571060180664, |
|
"learning_rate": 5.3187149413564506e-05, |
|
"loss": 0.9473, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 980, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.43744412064552307, |
|
"learning_rate": 5.267720550739419e-05, |
|
"loss": 0.9437, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 990, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.36840781569480896, |
|
"learning_rate": 5.216726160122387e-05, |
|
"loss": 0.9441, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.4539542496204376, |
|
"learning_rate": 5.165731769505354e-05, |
|
"loss": 0.9518, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1010, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 0.4364802837371826, |
|
"learning_rate": 5.1147373788883224e-05, |
|
"loss": 0.9423, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1020, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.42135128378868103, |
|
"learning_rate": 5.0637429882712906e-05, |
|
"loss": 0.9519, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1030, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 0.3595232367515564, |
|
"learning_rate": 5.012748597654259e-05, |
|
"loss": 0.9494, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1040, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 0.38723716139793396, |
|
"learning_rate": 4.961754207037226e-05, |
|
"loss": 0.9459, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1050, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.39567115902900696, |
|
"learning_rate": 4.910759816420194e-05, |
|
"loss": 0.9457, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1060, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 0.3695621192455292, |
|
"learning_rate": 4.859765425803162e-05, |
|
"loss": 0.9423, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1070, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.37855803966522217, |
|
"learning_rate": 4.80877103518613e-05, |
|
"loss": 0.9484, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1080, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 0.3890206515789032, |
|
"learning_rate": 4.7577766445690974e-05, |
|
"loss": 0.9452, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1090, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 0.38612017035484314, |
|
"learning_rate": 4.7067822539520655e-05, |
|
"loss": 0.9351, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1100, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 0.36654138565063477, |
|
"learning_rate": 4.655787863335033e-05, |
|
"loss": 0.9513, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1110, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 0.41428259015083313, |
|
"learning_rate": 4.604793472718002e-05, |
|
"loss": 0.9469, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1120, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.42189493775367737, |
|
"learning_rate": 4.553799082100969e-05, |
|
"loss": 0.9367, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1130, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 0.4327576160430908, |
|
"learning_rate": 4.5028046914839374e-05, |
|
"loss": 0.9333, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 0.4243718981742859, |
|
"learning_rate": 4.451810300866905e-05, |
|
"loss": 0.9354, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1150, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.3888280391693115, |
|
"learning_rate": 4.400815910249872e-05, |
|
"loss": 0.9381, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1160, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 0.36528488993644714, |
|
"learning_rate": 4.3498215196328404e-05, |
|
"loss": 0.953, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1170, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 0.410566121339798, |
|
"learning_rate": 4.2988271290158086e-05, |
|
"loss": 0.9491, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1180, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 0.39953145384788513, |
|
"learning_rate": 4.247832738398777e-05, |
|
"loss": 0.9342, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1190, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 0.37041422724723816, |
|
"learning_rate": 4.196838347781744e-05, |
|
"loss": 0.9485, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1200, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 0.41900932788848877, |
|
"learning_rate": 4.145843957164712e-05, |
|
"loss": 0.9463, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1210, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 0.39692798256874084, |
|
"learning_rate": 4.09484956654768e-05, |
|
"loss": 0.9425, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1220, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.4093817472457886, |
|
"learning_rate": 4.043855175930648e-05, |
|
"loss": 0.9439, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1230, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 0.3851373493671417, |
|
"learning_rate": 3.992860785313616e-05, |
|
"loss": 0.9478, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1240, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 0.39068475365638733, |
|
"learning_rate": 3.9418663946965835e-05, |
|
"loss": 0.9425, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1250, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.4238007068634033, |
|
"learning_rate": 3.8908720040795516e-05, |
|
"loss": 0.9387, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1260, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 0.3815898299217224, |
|
"learning_rate": 3.839877613462519e-05, |
|
"loss": 0.9367, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1270, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 0.3882830739021301, |
|
"learning_rate": 3.788883222845487e-05, |
|
"loss": 0.9529, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1280, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.5217743515968323, |
|
"learning_rate": 3.737888832228455e-05, |
|
"loss": 0.9372, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1290, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 0.381600022315979, |
|
"learning_rate": 3.6868944416114235e-05, |
|
"loss": 0.9344, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1300, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 0.3727063238620758, |
|
"learning_rate": 3.635900050994391e-05, |
|
"loss": 0.9505, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1310, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.35615187883377075, |
|
"learning_rate": 3.5849056603773584e-05, |
|
"loss": 0.9329, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1320, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 0.3927929103374481, |
|
"learning_rate": 3.5339112697603265e-05, |
|
"loss": 0.9425, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1330, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.42496153712272644, |
|
"learning_rate": 3.482916879143294e-05, |
|
"loss": 0.9354, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1340, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 0.38419976830482483, |
|
"learning_rate": 3.431922488526262e-05, |
|
"loss": 0.9402, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1350, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"grad_norm": 0.4346012771129608, |
|
"learning_rate": 3.38092809790923e-05, |
|
"loss": 0.9444, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1360, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.4262070655822754, |
|
"learning_rate": 3.3299337072921984e-05, |
|
"loss": 0.9456, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1370, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"grad_norm": 0.3871968984603882, |
|
"learning_rate": 3.278939316675166e-05, |
|
"loss": 0.946, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1380, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 0.36344829201698303, |
|
"learning_rate": 3.227944926058134e-05, |
|
"loss": 0.9354, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1390, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 0.38965150713920593, |
|
"learning_rate": 3.1769505354411015e-05, |
|
"loss": 0.9268, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1400, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"grad_norm": 0.39498674869537354, |
|
"learning_rate": 3.1259561448240696e-05, |
|
"loss": 0.9187, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1410, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 0.3658037781715393, |
|
"learning_rate": 3.074961754207038e-05, |
|
"loss": 0.9335, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1420, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 0.37881678342819214, |
|
"learning_rate": 3.0239673635900052e-05, |
|
"loss": 0.942, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1430, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.37767714262008667, |
|
"learning_rate": 2.9729729729729733e-05, |
|
"loss": 0.9308, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1440, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 0.3596402108669281, |
|
"learning_rate": 2.9219785823559408e-05, |
|
"loss": 0.9217, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1450, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 0.3982533812522888, |
|
"learning_rate": 2.8709841917389093e-05, |
|
"loss": 0.9321, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1460, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.37246957421302795, |
|
"learning_rate": 2.8199898011218767e-05, |
|
"loss": 0.9333, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1470, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 0.3782934844493866, |
|
"learning_rate": 2.7689954105048445e-05, |
|
"loss": 0.9413, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1480, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"grad_norm": 0.3564135432243347, |
|
"learning_rate": 2.7180010198878126e-05, |
|
"loss": 0.9224, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1490, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 0.3907375633716583, |
|
"learning_rate": 2.66700662927078e-05, |
|
"loss": 0.9316, |
|
"max_memory_allocated (GB)": 91.97, |
|
"memory_allocated (GB)": 14.99, |
|
"step": 1500, |
|
"total_memory_available (GB)": 94.62 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2022, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 1.6792232923653734e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|