{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4825796886582654, "eval_steps": 500, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 1.2311009168624878, "learning_rate": 5.6012058970266934e-05, "loss": 1.6789, "max_memory_allocated (GB)": 91.88, "memory_allocated (GB)": 14.99, "step": 10, "total_memory_available (GB)": 94.62 }, { "epoch": 0.02, "grad_norm": 1.4872009754180908, "learning_rate": 7.287336883921704e-05, "loss": 1.3884, "max_memory_allocated (GB)": 91.88, "memory_allocated (GB)": 14.99, "step": 20, "total_memory_available (GB)": 94.62 }, { "epoch": 0.03, "grad_norm": 0.7868104577064514, "learning_rate": 8.273660282559241e-05, "loss": 1.2404, "max_memory_allocated (GB)": 91.88, "memory_allocated (GB)": 14.99, "step": 30, "total_memory_available (GB)": 94.62 }, { "epoch": 0.04, "grad_norm": 0.35713163018226624, "learning_rate": 8.973467870816715e-05, "loss": 1.2036, "max_memory_allocated (GB)": 91.88, "memory_allocated (GB)": 14.99, "step": 40, "total_memory_available (GB)": 94.62 }, { "epoch": 0.05, "grad_norm": 0.3057297468185425, "learning_rate": 9.516280807158375e-05, "loss": 1.1788, "max_memory_allocated (GB)": 91.88, "memory_allocated (GB)": 14.99, "step": 50, "total_memory_available (GB)": 94.62 }, { "epoch": 0.06, "grad_norm": 0.3608625531196594, "learning_rate": 9.959791269454252e-05, "loss": 1.1525, "max_memory_allocated (GB)": 91.88, "memory_allocated (GB)": 14.99, "step": 60, "total_memory_available (GB)": 94.62 }, { "epoch": 0.07, "grad_norm": 0.3684042990207672, "learning_rate": 9.959204487506375e-05, "loss": 1.1261, "max_memory_allocated (GB)": 91.88, "memory_allocated (GB)": 14.99, "step": 70, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08, "grad_norm": 0.4175470471382141, "learning_rate": 9.908210096889343e-05, "loss": 1.1214, "max_memory_allocated (GB)": 91.88, "memory_allocated (GB)": 14.99, "step": 80, "total_memory_available (GB)": 94.62 }, { "epoch": 0.09, "grad_norm": 0.4869045317173004, "learning_rate": 9.85721570627231e-05, "loss": 1.1043, "max_memory_allocated (GB)": 91.88, "memory_allocated (GB)": 14.99, "step": 90, "total_memory_available (GB)": 94.62 }, { "epoch": 0.1, "grad_norm": 0.6399329900741577, "learning_rate": 9.806221315655279e-05, "loss": 1.1059, "max_memory_allocated (GB)": 91.88, "memory_allocated (GB)": 14.99, "step": 100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.11, "grad_norm": 0.5639649033546448, "learning_rate": 9.755226925038246e-05, "loss": 1.0711, "max_memory_allocated (GB)": 91.91, "memory_allocated (GB)": 14.99, "step": 110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.12, "grad_norm": 0.44946518540382385, "learning_rate": 9.704232534421214e-05, "loss": 1.0644, "max_memory_allocated (GB)": 91.91, "memory_allocated (GB)": 14.99, "step": 120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.13, "grad_norm": 0.5573060512542725, "learning_rate": 9.653238143804181e-05, "loss": 1.0634, "max_memory_allocated (GB)": 91.92, "memory_allocated (GB)": 14.99, "step": 130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.14, "grad_norm": 0.6105266213417053, "learning_rate": 9.60224375318715e-05, "loss": 1.0597, "max_memory_allocated (GB)": 91.92, "memory_allocated (GB)": 14.99, "step": 140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.15, "grad_norm": 0.6286391615867615, "learning_rate": 9.551249362570118e-05, "loss": 1.0528, "max_memory_allocated (GB)": 91.92, "memory_allocated (GB)": 14.99, "step": 150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16, "grad_norm": 0.8733624815940857, "learning_rate": 9.500254971953085e-05, "loss": 1.0524, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.17, "grad_norm": 0.6268635392189026, "learning_rate": 9.449260581336054e-05, "loss": 1.0345, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.18, "grad_norm": 0.5832647681236267, "learning_rate": 9.398266190719021e-05, "loss": 1.0262, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.19, "grad_norm": 0.6518144011497498, "learning_rate": 9.347271800101989e-05, "loss": 1.0318, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 190, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2, "grad_norm": 0.49274373054504395, "learning_rate": 9.296277409484956e-05, "loss": 1.0298, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 200, "total_memory_available (GB)": 94.62 }, { "epoch": 0.21, "grad_norm": 0.5237769484519958, "learning_rate": 9.245283018867925e-05, "loss": 1.0176, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 210, "total_memory_available (GB)": 94.62 }, { "epoch": 0.22, "grad_norm": 0.564319372177124, "learning_rate": 9.194288628250894e-05, "loss": 1.0172, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 220, "total_memory_available (GB)": 94.62 }, { "epoch": 0.23, "grad_norm": 0.4697343111038208, "learning_rate": 9.14329423763386e-05, "loss": 1.0262, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 230, "total_memory_available (GB)": 94.62 }, { "epoch": 0.24, "grad_norm": 0.5207454562187195, "learning_rate": 9.092299847016829e-05, "loss": 1.024, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 240, "total_memory_available (GB)": 94.62 }, { "epoch": 0.25, "grad_norm": 0.4637609124183655, "learning_rate": 9.041305456399796e-05, "loss": 1.0069, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 250, "total_memory_available (GB)": 94.62 }, { "epoch": 0.26, "grad_norm": 0.47436627745628357, "learning_rate": 8.990311065782764e-05, "loss": 1.0119, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 260, "total_memory_available (GB)": 94.62 }, { "epoch": 0.27, "grad_norm": 0.5096576809883118, "learning_rate": 8.939316675165733e-05, "loss": 1.0092, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 270, "total_memory_available (GB)": 94.62 }, { "epoch": 0.28, "grad_norm": 0.5780492424964905, "learning_rate": 8.8883222845487e-05, "loss": 1.0082, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 280, "total_memory_available (GB)": 94.62 }, { "epoch": 0.29, "grad_norm": 0.4528846740722656, "learning_rate": 8.837327893931669e-05, "loss": 1.0057, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 290, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3, "grad_norm": 0.5259899497032166, "learning_rate": 8.786333503314635e-05, "loss": 0.9989, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 300, "total_memory_available (GB)": 94.62 }, { "epoch": 0.31, "grad_norm": 0.5007658004760742, "learning_rate": 8.735339112697604e-05, "loss": 0.9997, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 310, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32, "grad_norm": 0.4527484178543091, "learning_rate": 8.684344722080571e-05, "loss": 0.9949, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 320, "total_memory_available (GB)": 94.62 }, { "epoch": 0.33, "grad_norm": 0.4365575611591339, "learning_rate": 8.633350331463539e-05, "loss": 0.9943, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 330, "total_memory_available (GB)": 94.62 }, { "epoch": 0.34, "grad_norm": 0.49428853392601013, "learning_rate": 8.582355940846507e-05, "loss": 1.0005, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 340, "total_memory_available (GB)": 94.62 }, { "epoch": 0.35, "grad_norm": 0.4611368775367737, "learning_rate": 8.531361550229475e-05, "loss": 0.9972, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 350, "total_memory_available (GB)": 94.62 }, { "epoch": 0.36, "grad_norm": 0.43548157811164856, "learning_rate": 8.480367159612444e-05, "loss": 0.9833, "max_memory_allocated (GB)": 91.93, "memory_allocated (GB)": 14.99, "step": 360, "total_memory_available (GB)": 94.62 }, { "epoch": 0.37, "grad_norm": 0.4797479808330536, "learning_rate": 8.42937276899541e-05, "loss": 0.981, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 370, "total_memory_available (GB)": 94.62 }, { "epoch": 0.38, "grad_norm": 0.44958415627479553, "learning_rate": 8.378378378378379e-05, "loss": 0.9969, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 380, "total_memory_available (GB)": 94.62 }, { "epoch": 0.39, "grad_norm": 0.4499351680278778, "learning_rate": 8.327383987761347e-05, "loss": 0.9847, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 390, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4, "grad_norm": 0.45021358132362366, "learning_rate": 8.276389597144315e-05, "loss": 0.9874, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 400, "total_memory_available (GB)": 94.62 }, { "epoch": 0.41, "grad_norm": 0.4754478335380554, "learning_rate": 8.225395206527282e-05, "loss": 0.9955, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 410, "total_memory_available (GB)": 94.62 }, { "epoch": 0.42, "grad_norm": 0.44393980503082275, "learning_rate": 8.17440081591025e-05, "loss": 0.9898, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 420, "total_memory_available (GB)": 94.62 }, { "epoch": 0.43, "grad_norm": 0.43429532647132874, "learning_rate": 8.123406425293219e-05, "loss": 0.9905, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 430, "total_memory_available (GB)": 94.62 }, { "epoch": 0.43, "grad_norm": 0.4695710837841034, "learning_rate": 8.072412034676186e-05, "loss": 0.9702, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 440, "total_memory_available (GB)": 94.62 }, { "epoch": 0.44, "grad_norm": 0.40997833013534546, "learning_rate": 8.021417644059154e-05, "loss": 0.9825, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 450, "total_memory_available (GB)": 94.62 }, { "epoch": 0.45, "grad_norm": 0.4330343008041382, "learning_rate": 7.970423253442122e-05, "loss": 0.9777, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 460, "total_memory_available (GB)": 94.62 }, { "epoch": 0.46, "grad_norm": 0.42674386501312256, "learning_rate": 7.91942886282509e-05, "loss": 0.9794, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 470, "total_memory_available (GB)": 94.62 }, { "epoch": 0.47, "grad_norm": 0.4461188316345215, "learning_rate": 7.868434472208057e-05, "loss": 0.979, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 480, "total_memory_available (GB)": 94.62 }, { "epoch": 0.48, "grad_norm": 0.4532679617404938, "learning_rate": 7.817440081591025e-05, "loss": 0.9764, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 490, "total_memory_available (GB)": 94.62 }, { "epoch": 0.49, "grad_norm": 0.42160096764564514, "learning_rate": 7.766445690973994e-05, "loss": 0.967, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5, "grad_norm": 0.45428088307380676, "learning_rate": 7.715451300356961e-05, "loss": 0.975, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 510, "total_memory_available (GB)": 94.62 }, { "epoch": 0.51, "grad_norm": 0.3874836564064026, "learning_rate": 7.664456909739929e-05, "loss": 0.9707, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 520, "total_memory_available (GB)": 94.62 }, { "epoch": 0.52, "grad_norm": 0.4256057143211365, "learning_rate": 7.613462519122897e-05, "loss": 0.9775, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 530, "total_memory_available (GB)": 94.62 }, { "epoch": 0.53, "grad_norm": 0.3986164331436157, "learning_rate": 7.562468128505865e-05, "loss": 0.972, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 540, "total_memory_available (GB)": 94.62 }, { "epoch": 0.54, "grad_norm": 0.4044497311115265, "learning_rate": 7.511473737888832e-05, "loss": 0.9725, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 550, "total_memory_available (GB)": 94.62 }, { "epoch": 0.55, "grad_norm": 0.439773291349411, "learning_rate": 7.460479347271801e-05, "loss": 0.9667, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 560, "total_memory_available (GB)": 94.62 }, { "epoch": 0.56, "grad_norm": 0.42315754294395447, "learning_rate": 7.409484956654769e-05, "loss": 0.9714, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 570, "total_memory_available (GB)": 94.62 }, { "epoch": 0.57, "grad_norm": 0.38059332966804504, "learning_rate": 7.358490566037736e-05, "loss": 0.979, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 580, "total_memory_available (GB)": 94.62 }, { "epoch": 0.58, "grad_norm": 0.4393787086009979, "learning_rate": 7.307496175420703e-05, "loss": 0.9663, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 590, "total_memory_available (GB)": 94.62 }, { "epoch": 0.59, "grad_norm": 0.4336337447166443, "learning_rate": 7.256501784803672e-05, "loss": 0.9661, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 600, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6, "grad_norm": 0.41273126006126404, "learning_rate": 7.20550739418664e-05, "loss": 0.9672, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 610, "total_memory_available (GB)": 94.62 }, { "epoch": 0.61, "grad_norm": 0.45420095324516296, "learning_rate": 7.154513003569607e-05, "loss": 0.9668, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 620, "total_memory_available (GB)": 94.62 }, { "epoch": 0.62, "grad_norm": 0.4193101227283478, "learning_rate": 7.103518612952576e-05, "loss": 0.9684, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 630, "total_memory_available (GB)": 94.62 }, { "epoch": 0.63, "grad_norm": 0.4247801601886749, "learning_rate": 7.052524222335543e-05, "loss": 0.9652, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 640, "total_memory_available (GB)": 94.62 }, { "epoch": 0.64, "grad_norm": 0.5118327140808105, "learning_rate": 7.001529831718512e-05, "loss": 0.9777, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 650, "total_memory_available (GB)": 94.62 }, { "epoch": 0.65, "grad_norm": 0.42659929394721985, "learning_rate": 6.950535441101478e-05, "loss": 0.9624, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 660, "total_memory_available (GB)": 94.62 }, { "epoch": 0.66, "grad_norm": 0.45405977964401245, "learning_rate": 6.899541050484447e-05, "loss": 0.9585, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 670, "total_memory_available (GB)": 94.62 }, { "epoch": 0.67, "grad_norm": 0.40387892723083496, "learning_rate": 6.848546659867415e-05, "loss": 0.9633, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 680, "total_memory_available (GB)": 94.62 }, { "epoch": 0.68, "grad_norm": 0.39345934987068176, "learning_rate": 6.797552269250382e-05, "loss": 0.9788, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 690, "total_memory_available (GB)": 94.62 }, { "epoch": 0.69, "grad_norm": 0.4187304675579071, "learning_rate": 6.746557878633351e-05, "loss": 0.9651, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 700, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7, "grad_norm": 0.4518466293811798, "learning_rate": 6.695563488016318e-05, "loss": 0.965, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 710, "total_memory_available (GB)": 94.62 }, { "epoch": 0.71, "grad_norm": 0.4474211037158966, "learning_rate": 6.644569097399287e-05, "loss": 0.9714, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 720, "total_memory_available (GB)": 94.62 }, { "epoch": 0.72, "grad_norm": 0.38997548818588257, "learning_rate": 6.593574706782255e-05, "loss": 0.9528, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 730, "total_memory_available (GB)": 94.62 }, { "epoch": 0.73, "grad_norm": 0.42444851994514465, "learning_rate": 6.542580316165222e-05, "loss": 0.9638, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 740, "total_memory_available (GB)": 94.62 }, { "epoch": 0.74, "grad_norm": 0.4098523259162903, "learning_rate": 6.491585925548191e-05, "loss": 0.9534, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 750, "total_memory_available (GB)": 94.62 }, { "epoch": 0.75, "grad_norm": 0.42875897884368896, "learning_rate": 6.440591534931157e-05, "loss": 0.9631, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 760, "total_memory_available (GB)": 94.62 }, { "epoch": 0.76, "grad_norm": 0.4514971077442169, "learning_rate": 6.389597144314126e-05, "loss": 0.9623, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 770, "total_memory_available (GB)": 94.62 }, { "epoch": 0.77, "grad_norm": 0.43437400460243225, "learning_rate": 6.338602753697093e-05, "loss": 0.9657, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 780, "total_memory_available (GB)": 94.62 }, { "epoch": 0.78, "grad_norm": 0.40089166164398193, "learning_rate": 6.287608363080062e-05, "loss": 0.9607, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 790, "total_memory_available (GB)": 94.62 }, { "epoch": 0.79, "grad_norm": 0.3970320224761963, "learning_rate": 6.23661397246303e-05, "loss": 0.9623, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 800, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8, "grad_norm": 0.5439819693565369, "learning_rate": 6.185619581845997e-05, "loss": 0.9585, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 810, "total_memory_available (GB)": 94.62 }, { "epoch": 0.81, "grad_norm": 0.37487202882766724, "learning_rate": 6.134625191228966e-05, "loss": 0.9509, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 820, "total_memory_available (GB)": 94.62 }, { "epoch": 0.82, "grad_norm": 0.42697498202323914, "learning_rate": 6.0836308006119326e-05, "loss": 0.9525, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 830, "total_memory_available (GB)": 94.62 }, { "epoch": 0.83, "grad_norm": 0.38735342025756836, "learning_rate": 6.032636409994901e-05, "loss": 0.954, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 840, "total_memory_available (GB)": 94.62 }, { "epoch": 0.84, "grad_norm": 0.4077759087085724, "learning_rate": 5.981642019377869e-05, "loss": 0.9528, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 850, "total_memory_available (GB)": 94.62 }, { "epoch": 0.85, "grad_norm": 0.38065391778945923, "learning_rate": 5.930647628760837e-05, "loss": 0.9589, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 860, "total_memory_available (GB)": 94.62 }, { "epoch": 0.86, "grad_norm": 0.3892410695552826, "learning_rate": 5.879653238143804e-05, "loss": 0.9538, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 870, "total_memory_available (GB)": 94.62 }, { "epoch": 0.87, "grad_norm": 0.3951966166496277, "learning_rate": 5.8286588475267726e-05, "loss": 0.9518, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 880, "total_memory_available (GB)": 94.62 }, { "epoch": 0.88, "grad_norm": 0.5943160057067871, "learning_rate": 5.777664456909741e-05, "loss": 0.9437, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 890, "total_memory_available (GB)": 94.62 }, { "epoch": 0.89, "grad_norm": 0.440020889043808, "learning_rate": 5.7266700662927075e-05, "loss": 0.9635, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 900, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9, "grad_norm": 0.44286438822746277, "learning_rate": 5.6756756756756757e-05, "loss": 0.9511, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 910, "total_memory_available (GB)": 94.62 }, { "epoch": 0.91, "grad_norm": 0.4247153699398041, "learning_rate": 5.624681285058644e-05, "loss": 0.9475, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 920, "total_memory_available (GB)": 94.62 }, { "epoch": 0.92, "grad_norm": 0.42017248272895813, "learning_rate": 5.573686894441612e-05, "loss": 0.9436, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 930, "total_memory_available (GB)": 94.62 }, { "epoch": 0.93, "grad_norm": 0.4182833731174469, "learning_rate": 5.5226925038245794e-05, "loss": 0.9557, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 940, "total_memory_available (GB)": 94.62 }, { "epoch": 0.94, "grad_norm": 0.41079315543174744, "learning_rate": 5.4716981132075475e-05, "loss": 0.9585, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 950, "total_memory_available (GB)": 94.62 }, { "epoch": 0.95, "grad_norm": 0.3954004943370819, "learning_rate": 5.4207037225905157e-05, "loss": 0.9478, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 960, "total_memory_available (GB)": 94.62 }, { "epoch": 0.96, "grad_norm": 0.39996138215065, "learning_rate": 5.369709331973484e-05, "loss": 0.947, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 970, "total_memory_available (GB)": 94.62 }, { "epoch": 0.97, "grad_norm": 0.39325571060180664, "learning_rate": 5.3187149413564506e-05, "loss": 0.9473, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 980, "total_memory_available (GB)": 94.62 }, { "epoch": 0.98, "grad_norm": 0.43744412064552307, "learning_rate": 5.267720550739419e-05, "loss": 0.9437, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 990, "total_memory_available (GB)": 94.62 }, { "epoch": 0.99, "grad_norm": 0.36840781569480896, "learning_rate": 5.216726160122387e-05, "loss": 0.9441, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1000, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0, "grad_norm": 0.4539542496204376, "learning_rate": 5.165731769505354e-05, "loss": 0.9518, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1010, "total_memory_available (GB)": 94.62 }, { "epoch": 1.01, "grad_norm": 0.4364802837371826, "learning_rate": 5.1147373788883224e-05, "loss": 0.9423, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1020, "total_memory_available (GB)": 94.62 }, { "epoch": 1.02, "grad_norm": 0.42135128378868103, "learning_rate": 5.0637429882712906e-05, "loss": 0.9519, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1030, "total_memory_available (GB)": 94.62 }, { "epoch": 1.03, "grad_norm": 0.3595232367515564, "learning_rate": 5.012748597654259e-05, "loss": 0.9494, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1040, "total_memory_available (GB)": 94.62 }, { "epoch": 1.04, "grad_norm": 0.38723716139793396, "learning_rate": 4.961754207037226e-05, "loss": 0.9459, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1050, "total_memory_available (GB)": 94.62 }, { "epoch": 1.05, "grad_norm": 0.39567115902900696, "learning_rate": 4.910759816420194e-05, "loss": 0.9457, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1060, "total_memory_available (GB)": 94.62 }, { "epoch": 1.06, "grad_norm": 0.3695621192455292, "learning_rate": 4.859765425803162e-05, "loss": 0.9423, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1070, "total_memory_available (GB)": 94.62 }, { "epoch": 1.07, "grad_norm": 0.37855803966522217, "learning_rate": 4.80877103518613e-05, "loss": 0.9484, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1080, "total_memory_available (GB)": 94.62 }, { "epoch": 1.08, "grad_norm": 0.3890206515789032, "learning_rate": 4.7577766445690974e-05, "loss": 0.9452, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1090, "total_memory_available (GB)": 94.62 }, { "epoch": 1.09, "grad_norm": 0.38612017035484314, "learning_rate": 4.7067822539520655e-05, "loss": 0.9351, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1100, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1, "grad_norm": 0.36654138565063477, "learning_rate": 4.655787863335033e-05, "loss": 0.9513, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1110, "total_memory_available (GB)": 94.62 }, { "epoch": 1.11, "grad_norm": 0.41428259015083313, "learning_rate": 4.604793472718002e-05, "loss": 0.9469, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1120, "total_memory_available (GB)": 94.62 }, { "epoch": 1.12, "grad_norm": 0.42189493775367737, "learning_rate": 4.553799082100969e-05, "loss": 0.9367, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1130, "total_memory_available (GB)": 94.62 }, { "epoch": 1.13, "grad_norm": 0.4327576160430908, "learning_rate": 4.5028046914839374e-05, "loss": 0.9333, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1140, "total_memory_available (GB)": 94.62 }, { "epoch": 1.14, "grad_norm": 0.4243718981742859, "learning_rate": 4.451810300866905e-05, "loss": 0.9354, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1150, "total_memory_available (GB)": 94.62 }, { "epoch": 1.15, "grad_norm": 0.3888280391693115, "learning_rate": 4.400815910249872e-05, "loss": 0.9381, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1160, "total_memory_available (GB)": 94.62 }, { "epoch": 1.16, "grad_norm": 0.36528488993644714, "learning_rate": 4.3498215196328404e-05, "loss": 0.953, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1170, "total_memory_available (GB)": 94.62 }, { "epoch": 1.17, "grad_norm": 0.410566121339798, "learning_rate": 4.2988271290158086e-05, "loss": 0.9491, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1180, "total_memory_available (GB)": 94.62 }, { "epoch": 1.18, "grad_norm": 0.39953145384788513, "learning_rate": 4.247832738398777e-05, "loss": 0.9342, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1190, "total_memory_available (GB)": 94.62 }, { "epoch": 1.19, "grad_norm": 0.37041422724723816, "learning_rate": 4.196838347781744e-05, "loss": 0.9485, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1200, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2, "grad_norm": 0.41900932788848877, "learning_rate": 4.145843957164712e-05, "loss": 0.9463, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1210, "total_memory_available (GB)": 94.62 }, { "epoch": 1.21, "grad_norm": 0.39692798256874084, "learning_rate": 4.09484956654768e-05, "loss": 0.9425, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1220, "total_memory_available (GB)": 94.62 }, { "epoch": 1.22, "grad_norm": 0.4093817472457886, "learning_rate": 4.043855175930648e-05, "loss": 0.9439, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1230, "total_memory_available (GB)": 94.62 }, { "epoch": 1.23, "grad_norm": 0.3851373493671417, "learning_rate": 3.992860785313616e-05, "loss": 0.9478, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1240, "total_memory_available (GB)": 94.62 }, { "epoch": 1.24, "grad_norm": 0.39068475365638733, "learning_rate": 3.9418663946965835e-05, "loss": 0.9425, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1250, "total_memory_available (GB)": 94.62 }, { "epoch": 1.25, "grad_norm": 0.4238007068634033, "learning_rate": 3.8908720040795516e-05, "loss": 0.9387, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1260, "total_memory_available (GB)": 94.62 }, { "epoch": 1.26, "grad_norm": 0.3815898299217224, "learning_rate": 3.839877613462519e-05, "loss": 0.9367, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1270, "total_memory_available (GB)": 94.62 }, { "epoch": 1.27, "grad_norm": 0.3882830739021301, "learning_rate": 3.788883222845487e-05, "loss": 0.9529, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1280, "total_memory_available (GB)": 94.62 }, { "epoch": 1.28, "grad_norm": 0.5217743515968323, "learning_rate": 3.737888832228455e-05, "loss": 0.9372, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1290, "total_memory_available (GB)": 94.62 }, { "epoch": 1.28, "grad_norm": 0.381600022315979, "learning_rate": 3.6868944416114235e-05, "loss": 0.9344, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1300, "total_memory_available (GB)": 94.62 }, { "epoch": 1.29, "grad_norm": 0.3727063238620758, "learning_rate": 3.635900050994391e-05, "loss": 0.9505, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1310, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3, "grad_norm": 0.35615187883377075, "learning_rate": 3.5849056603773584e-05, "loss": 0.9329, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1320, "total_memory_available (GB)": 94.62 }, { "epoch": 1.31, "grad_norm": 0.3927929103374481, "learning_rate": 3.5339112697603265e-05, "loss": 0.9425, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1330, "total_memory_available (GB)": 94.62 }, { "epoch": 1.32, "grad_norm": 0.42496153712272644, "learning_rate": 3.482916879143294e-05, "loss": 0.9354, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1340, "total_memory_available (GB)": 94.62 }, { "epoch": 1.33, "grad_norm": 0.38419976830482483, "learning_rate": 3.431922488526262e-05, "loss": 0.9402, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1350, "total_memory_available (GB)": 94.62 }, { "epoch": 1.34, "grad_norm": 0.4346012771129608, "learning_rate": 3.38092809790923e-05, "loss": 0.9444, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1360, "total_memory_available (GB)": 94.62 }, { "epoch": 1.35, "grad_norm": 0.4262070655822754, "learning_rate": 3.3299337072921984e-05, "loss": 0.9456, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1370, "total_memory_available (GB)": 94.62 }, { "epoch": 1.36, "grad_norm": 0.3871968984603882, "learning_rate": 3.278939316675166e-05, "loss": 0.946, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1380, "total_memory_available (GB)": 94.62 }, { "epoch": 1.37, "grad_norm": 0.36344829201698303, "learning_rate": 3.227944926058134e-05, "loss": 0.9354, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1390, "total_memory_available (GB)": 94.62 }, { "epoch": 1.38, "grad_norm": 0.38965150713920593, "learning_rate": 3.1769505354411015e-05, "loss": 0.9268, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1400, "total_memory_available (GB)": 94.62 }, { "epoch": 1.39, "grad_norm": 0.39498674869537354, "learning_rate": 3.1259561448240696e-05, "loss": 0.9187, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1410, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4, "grad_norm": 0.3658037781715393, "learning_rate": 3.074961754207038e-05, "loss": 0.9335, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1420, "total_memory_available (GB)": 94.62 }, { "epoch": 1.41, "grad_norm": 0.37881678342819214, "learning_rate": 3.0239673635900052e-05, "loss": 0.942, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1430, "total_memory_available (GB)": 94.62 }, { "epoch": 1.42, "grad_norm": 0.37767714262008667, "learning_rate": 2.9729729729729733e-05, "loss": 0.9308, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1440, "total_memory_available (GB)": 94.62 }, { "epoch": 1.43, "grad_norm": 0.3596402108669281, "learning_rate": 2.9219785823559408e-05, "loss": 0.9217, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1450, "total_memory_available (GB)": 94.62 }, { "epoch": 1.44, "grad_norm": 0.3982533812522888, "learning_rate": 2.8709841917389093e-05, "loss": 0.9321, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1460, "total_memory_available (GB)": 94.62 }, { "epoch": 1.45, "grad_norm": 0.37246957421302795, "learning_rate": 2.8199898011218767e-05, "loss": 0.9333, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1470, "total_memory_available (GB)": 94.62 }, { "epoch": 1.46, "grad_norm": 0.3782934844493866, "learning_rate": 2.7689954105048445e-05, "loss": 0.9413, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1480, "total_memory_available (GB)": 94.62 }, { "epoch": 1.47, "grad_norm": 0.3564135432243347, "learning_rate": 2.7180010198878126e-05, "loss": 0.9224, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1490, "total_memory_available (GB)": 94.62 }, { "epoch": 1.48, "grad_norm": 0.3907375633716583, "learning_rate": 2.66700662927078e-05, "loss": 0.9316, "max_memory_allocated (GB)": 91.97, "memory_allocated (GB)": 14.99, "step": 1500, "total_memory_available (GB)": 94.62 } ], "logging_steps": 10, "max_steps": 2022, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 1.6792232923653734e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }