{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9883864591055103, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 0.8518019318580627, "learning_rate": 5.6012058970266934e-05, "loss": 1.7421, "max_memory_allocated (GB)": 91.86, "memory_allocated (GB)": 24.39, "step": 10, "total_memory_available (GB)": 94.62 }, { "epoch": 0.02, "grad_norm": 0.8390570878982544, "learning_rate": 7.287336883921704e-05, "loss": 1.5281, "max_memory_allocated (GB)": 91.86, "memory_allocated (GB)": 24.39, "step": 20, "total_memory_available (GB)": 94.62 }, { "epoch": 0.03, "grad_norm": 0.3700675666332245, "learning_rate": 8.273660282559241e-05, "loss": 1.3485, "max_memory_allocated (GB)": 91.86, "memory_allocated (GB)": 24.39, "step": 30, "total_memory_available (GB)": 94.62 }, { "epoch": 0.04, "grad_norm": 0.3168916404247284, "learning_rate": 8.973467870816715e-05, "loss": 1.2968, "max_memory_allocated (GB)": 91.86, "memory_allocated (GB)": 24.39, "step": 40, "total_memory_available (GB)": 94.62 }, { "epoch": 0.05, "grad_norm": 0.24861344695091248, "learning_rate": 9.516280807158375e-05, "loss": 1.2689, "max_memory_allocated (GB)": 91.86, "memory_allocated (GB)": 24.39, "step": 50, "total_memory_available (GB)": 94.62 }, { "epoch": 0.06, "grad_norm": 0.22251686453819275, "learning_rate": 9.959791269454252e-05, "loss": 1.2434, "max_memory_allocated (GB)": 91.86, "memory_allocated (GB)": 24.39, "step": 60, "total_memory_available (GB)": 94.62 }, { "epoch": 0.07, "grad_norm": 0.23426611721515656, "learning_rate": 9.959204487506375e-05, "loss": 1.2152, "max_memory_allocated (GB)": 91.87, "memory_allocated (GB)": 24.39, "step": 70, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08, "grad_norm": 0.45850667357444763, "learning_rate": 9.908210096889343e-05, "loss": 1.2108, "max_memory_allocated (GB)": 91.87, "memory_allocated (GB)": 24.39, "step": 80, "total_memory_available (GB)": 94.62 }, { "epoch": 0.09, "grad_norm": 0.4196653366088867, "learning_rate": 9.85721570627231e-05, "loss": 1.1913, "max_memory_allocated (GB)": 91.87, "memory_allocated (GB)": 24.39, "step": 90, "total_memory_available (GB)": 94.62 }, { "epoch": 0.1, "grad_norm": 0.5248636603355408, "learning_rate": 9.806221315655279e-05, "loss": 1.1924, "max_memory_allocated (GB)": 91.87, "memory_allocated (GB)": 24.39, "step": 100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.11, "grad_norm": 0.3434283137321472, "learning_rate": 9.755226925038246e-05, "loss": 1.1558, "max_memory_allocated (GB)": 91.87, "memory_allocated (GB)": 24.39, "step": 110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.12, "grad_norm": 0.47737815976142883, "learning_rate": 9.704232534421214e-05, "loss": 1.1492, "max_memory_allocated (GB)": 91.87, "memory_allocated (GB)": 24.39, "step": 120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.13, "grad_norm": 0.47788286209106445, "learning_rate": 9.653238143804181e-05, "loss": 1.1486, "max_memory_allocated (GB)": 91.87, "memory_allocated (GB)": 24.39, "step": 130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.14, "grad_norm": 0.45408132672309875, "learning_rate": 9.60224375318715e-05, "loss": 1.1456, "max_memory_allocated (GB)": 91.87, "memory_allocated (GB)": 24.39, "step": 140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.15, "grad_norm": 0.4091607630252838, "learning_rate": 9.551249362570118e-05, "loss": 1.1365, "max_memory_allocated (GB)": 91.87, "memory_allocated (GB)": 24.39, "step": 150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16, "grad_norm": 0.5064594745635986, "learning_rate": 9.500254971953085e-05, "loss": 1.137, "max_memory_allocated (GB)": 91.87, "memory_allocated (GB)": 24.39, "step": 160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.17, "grad_norm": 0.4288266897201538, "learning_rate": 9.449260581336054e-05, "loss": 1.1181, "max_memory_allocated (GB)": 91.87, "memory_allocated (GB)": 24.39, "step": 170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.18, "grad_norm": 0.3854447901248932, "learning_rate": 9.398266190719021e-05, "loss": 1.1091, "max_memory_allocated (GB)": 91.87, "memory_allocated (GB)": 24.39, "step": 180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.19, "grad_norm": 0.4143249988555908, "learning_rate": 9.347271800101989e-05, "loss": 1.1156, "max_memory_allocated (GB)": 91.87, "memory_allocated (GB)": 24.39, "step": 190, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2, "grad_norm": 0.521230161190033, "learning_rate": 9.296277409484956e-05, "loss": 1.1117, "max_memory_allocated (GB)": 91.87, "memory_allocated (GB)": 24.39, "step": 200, "total_memory_available (GB)": 94.62 }, { "epoch": 0.21, "grad_norm": 0.487106055021286, "learning_rate": 9.245283018867925e-05, "loss": 1.1003, "max_memory_allocated (GB)": 91.87, "memory_allocated (GB)": 24.39, "step": 210, "total_memory_available (GB)": 94.62 }, { "epoch": 0.22, "grad_norm": 0.4616335928440094, "learning_rate": 9.194288628250894e-05, "loss": 1.0992, "max_memory_allocated (GB)": 91.88, "memory_allocated (GB)": 24.39, "step": 220, "total_memory_available (GB)": 94.62 }, { "epoch": 0.23, "grad_norm": 0.3908211290836334, "learning_rate": 9.14329423763386e-05, "loss": 1.1074, "max_memory_allocated (GB)": 91.88, "memory_allocated (GB)": 24.39, "step": 230, "total_memory_available (GB)": 94.62 }, { "epoch": 0.24, "grad_norm": 0.4411673843860626, "learning_rate": 9.092299847016829e-05, "loss": 1.1055, "max_memory_allocated (GB)": 91.88, "memory_allocated (GB)": 24.39, "step": 240, "total_memory_available (GB)": 94.62 }, { "epoch": 0.25, "grad_norm": 0.4123621881008148, "learning_rate": 9.041305456399796e-05, "loss": 1.0883, "max_memory_allocated (GB)": 91.88, "memory_allocated (GB)": 24.39, "step": 250, "total_memory_available (GB)": 94.62 }, { "epoch": 0.26, "grad_norm": 0.5461438298225403, "learning_rate": 8.990311065782764e-05, "loss": 1.0928, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 260, "total_memory_available (GB)": 94.62 }, { "epoch": 0.27, "grad_norm": 0.4907448887825012, "learning_rate": 8.939316675165733e-05, "loss": 1.0912, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 270, "total_memory_available (GB)": 94.62 }, { "epoch": 0.28, "grad_norm": 0.45152169466018677, "learning_rate": 8.8883222845487e-05, "loss": 1.0891, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 280, "total_memory_available (GB)": 94.62 }, { "epoch": 0.29, "grad_norm": 0.41472557187080383, "learning_rate": 8.837327893931669e-05, "loss": 1.0864, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 290, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3, "grad_norm": 0.45566004514694214, "learning_rate": 8.786333503314635e-05, "loss": 1.0776, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 300, "total_memory_available (GB)": 94.62 }, { "epoch": 0.31, "grad_norm": 0.3909231424331665, "learning_rate": 8.735339112697604e-05, "loss": 1.0801, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 310, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32, "grad_norm": 0.39705774188041687, "learning_rate": 8.684344722080571e-05, "loss": 1.0746, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 320, "total_memory_available (GB)": 94.62 }, { "epoch": 0.33, "grad_norm": 0.4257935881614685, "learning_rate": 8.633350331463539e-05, "loss": 1.0738, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 330, "total_memory_available (GB)": 94.62 }, { "epoch": 0.34, "grad_norm": 0.41336777806282043, "learning_rate": 8.582355940846507e-05, "loss": 1.0811, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 340, "total_memory_available (GB)": 94.62 }, { "epoch": 0.35, "grad_norm": 0.3621828854084015, "learning_rate": 8.531361550229475e-05, "loss": 1.0762, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 350, "total_memory_available (GB)": 94.62 }, { "epoch": 0.36, "grad_norm": 0.398189902305603, "learning_rate": 8.480367159612444e-05, "loss": 1.0622, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 360, "total_memory_available (GB)": 94.62 }, { "epoch": 0.37, "grad_norm": 0.37738627195358276, "learning_rate": 8.42937276899541e-05, "loss": 1.06, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 370, "total_memory_available (GB)": 94.62 }, { "epoch": 0.38, "grad_norm": 0.40790703892707825, "learning_rate": 8.378378378378379e-05, "loss": 1.0768, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 380, "total_memory_available (GB)": 94.62 }, { "epoch": 0.39, "grad_norm": 0.35230451822280884, "learning_rate": 8.327383987761347e-05, "loss": 1.0631, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 390, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4, "grad_norm": 0.37737661600112915, "learning_rate": 8.276389597144315e-05, "loss": 1.0665, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 400, "total_memory_available (GB)": 94.62 }, { "epoch": 0.41, "grad_norm": 0.39823117852211, "learning_rate": 8.225395206527282e-05, "loss": 1.0739, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 410, "total_memory_available (GB)": 94.62 }, { "epoch": 0.42, "grad_norm": 0.38277310132980347, "learning_rate": 8.17440081591025e-05, "loss": 1.07, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 420, "total_memory_available (GB)": 94.62 }, { "epoch": 0.43, "grad_norm": 0.34220802783966064, "learning_rate": 8.123406425293219e-05, "loss": 1.0698, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 430, "total_memory_available (GB)": 94.62 }, { "epoch": 0.43, "grad_norm": 0.3858403265476227, "learning_rate": 8.072412034676186e-05, "loss": 1.0488, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 440, "total_memory_available (GB)": 94.62 }, { "epoch": 0.44, "grad_norm": 0.36855727434158325, "learning_rate": 8.021417644059154e-05, "loss": 1.0612, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 450, "total_memory_available (GB)": 94.62 }, { "epoch": 0.45, "grad_norm": 0.4122312664985657, "learning_rate": 7.970423253442122e-05, "loss": 1.0566, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 460, "total_memory_available (GB)": 94.62 }, { "epoch": 0.46, "grad_norm": 0.38682645559310913, "learning_rate": 7.91942886282509e-05, "loss": 1.0575, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 470, "total_memory_available (GB)": 94.62 }, { "epoch": 0.47, "grad_norm": 0.38858598470687866, "learning_rate": 7.868434472208057e-05, "loss": 1.0579, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 480, "total_memory_available (GB)": 94.62 }, { "epoch": 0.48, "grad_norm": 0.3749813139438629, "learning_rate": 7.817440081591025e-05, "loss": 1.0531, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 490, "total_memory_available (GB)": 94.62 }, { "epoch": 0.49, "grad_norm": 0.36404120922088623, "learning_rate": 7.766445690973994e-05, "loss": 1.0447, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 500, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5, "grad_norm": 0.4445250332355499, "learning_rate": 7.715451300356961e-05, "loss": 1.0526, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 510, "total_memory_available (GB)": 94.62 }, { "epoch": 0.51, "grad_norm": 0.3644183278083801, "learning_rate": 7.664456909739929e-05, "loss": 1.0494, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 520, "total_memory_available (GB)": 94.62 }, { "epoch": 0.52, "grad_norm": 0.34624868631362915, "learning_rate": 7.613462519122897e-05, "loss": 1.0572, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 530, "total_memory_available (GB)": 94.62 }, { "epoch": 0.53, "grad_norm": 0.3788256347179413, "learning_rate": 7.562468128505865e-05, "loss": 1.0502, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 540, "total_memory_available (GB)": 94.62 }, { "epoch": 0.54, "grad_norm": 0.3667903542518616, "learning_rate": 7.511473737888832e-05, "loss": 1.0505, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 550, "total_memory_available (GB)": 94.62 }, { "epoch": 0.55, "grad_norm": 0.37510526180267334, "learning_rate": 7.460479347271801e-05, "loss": 1.045, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 560, "total_memory_available (GB)": 94.62 }, { "epoch": 0.56, "grad_norm": 0.3509054183959961, "learning_rate": 7.409484956654769e-05, "loss": 1.0504, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 570, "total_memory_available (GB)": 94.62 }, { "epoch": 0.57, "grad_norm": 0.3294220268726349, "learning_rate": 7.358490566037736e-05, "loss": 1.0573, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 580, "total_memory_available (GB)": 94.62 }, { "epoch": 0.58, "grad_norm": 0.34325262904167175, "learning_rate": 7.307496175420703e-05, "loss": 1.0445, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 590, "total_memory_available (GB)": 94.62 }, { "epoch": 0.59, "grad_norm": 0.3679581880569458, "learning_rate": 7.256501784803672e-05, "loss": 1.0445, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 600, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6, "grad_norm": 0.3220757246017456, "learning_rate": 7.20550739418664e-05, "loss": 1.0458, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 610, "total_memory_available (GB)": 94.62 }, { "epoch": 0.61, "grad_norm": 0.3660426139831543, "learning_rate": 7.154513003569607e-05, "loss": 1.0447, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 620, "total_memory_available (GB)": 94.62 }, { "epoch": 0.62, "grad_norm": 0.32533150911331177, "learning_rate": 7.103518612952576e-05, "loss": 1.0471, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 630, "total_memory_available (GB)": 94.62 }, { "epoch": 0.63, "grad_norm": 0.33115923404693604, "learning_rate": 7.052524222335543e-05, "loss": 1.0431, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 640, "total_memory_available (GB)": 94.62 }, { "epoch": 0.64, "grad_norm": 0.337576299905777, "learning_rate": 7.001529831718512e-05, "loss": 1.057, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 650, "total_memory_available (GB)": 94.62 }, { "epoch": 0.65, "grad_norm": 0.3337574303150177, "learning_rate": 6.950535441101478e-05, "loss": 1.0408, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 660, "total_memory_available (GB)": 94.62 }, { "epoch": 0.66, "grad_norm": 0.35560840368270874, "learning_rate": 6.899541050484447e-05, "loss": 1.0363, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 670, "total_memory_available (GB)": 94.62 }, { "epoch": 0.67, "grad_norm": 0.3454528748989105, "learning_rate": 6.848546659867415e-05, "loss": 1.0412, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 680, "total_memory_available (GB)": 94.62 }, { "epoch": 0.68, "grad_norm": 0.3608352541923523, "learning_rate": 6.797552269250382e-05, "loss": 1.0578, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 690, "total_memory_available (GB)": 94.62 }, { "epoch": 0.69, "grad_norm": 0.332792729139328, "learning_rate": 6.746557878633351e-05, "loss": 1.0426, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 700, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7, "grad_norm": 0.4059067666530609, "learning_rate": 6.695563488016318e-05, "loss": 1.0441, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 710, "total_memory_available (GB)": 94.62 }, { "epoch": 0.71, "grad_norm": 0.3741580843925476, "learning_rate": 6.644569097399287e-05, "loss": 1.0525, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 720, "total_memory_available (GB)": 94.62 }, { "epoch": 0.72, "grad_norm": 0.3646301329135895, "learning_rate": 6.593574706782255e-05, "loss": 1.0302, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 730, "total_memory_available (GB)": 94.62 }, { "epoch": 0.73, "grad_norm": 0.35956060886383057, "learning_rate": 6.542580316165222e-05, "loss": 1.0439, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 740, "total_memory_available (GB)": 94.62 }, { "epoch": 0.74, "grad_norm": 0.3517419099807739, "learning_rate": 6.491585925548191e-05, "loss": 1.0314, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 750, "total_memory_available (GB)": 94.62 }, { "epoch": 0.75, "grad_norm": 0.33927640318870544, "learning_rate": 6.440591534931157e-05, "loss": 1.042, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 760, "total_memory_available (GB)": 94.62 }, { "epoch": 0.76, "grad_norm": 0.3502146005630493, "learning_rate": 6.389597144314126e-05, "loss": 1.0416, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 770, "total_memory_available (GB)": 94.62 }, { "epoch": 0.77, "grad_norm": 0.37221387028694153, "learning_rate": 6.338602753697093e-05, "loss": 1.0453, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 780, "total_memory_available (GB)": 94.62 }, { "epoch": 0.78, "grad_norm": 0.3718739449977875, "learning_rate": 6.287608363080062e-05, "loss": 1.0392, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 790, "total_memory_available (GB)": 94.62 }, { "epoch": 0.79, "grad_norm": 0.35249418020248413, "learning_rate": 6.23661397246303e-05, "loss": 1.0413, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 800, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8, "grad_norm": 0.35906028747558594, "learning_rate": 6.185619581845997e-05, "loss": 1.0375, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 810, "total_memory_available (GB)": 94.62 }, { "epoch": 0.81, "grad_norm": 0.33932170271873474, "learning_rate": 6.134625191228966e-05, "loss": 1.0286, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 820, "total_memory_available (GB)": 94.62 }, { "epoch": 0.82, "grad_norm": 0.33107632398605347, "learning_rate": 6.0836308006119326e-05, "loss": 1.0319, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 830, "total_memory_available (GB)": 94.62 }, { "epoch": 0.83, "grad_norm": 0.32848185300827026, "learning_rate": 6.032636409994901e-05, "loss": 1.0329, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 840, "total_memory_available (GB)": 94.62 }, { "epoch": 0.84, "grad_norm": 0.33085334300994873, "learning_rate": 5.981642019377869e-05, "loss": 1.0326, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 850, "total_memory_available (GB)": 94.62 }, { "epoch": 0.85, "grad_norm": 0.3043057322502136, "learning_rate": 5.930647628760837e-05, "loss": 1.0379, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 860, "total_memory_available (GB)": 94.62 }, { "epoch": 0.86, "grad_norm": 0.3407464027404785, "learning_rate": 5.879653238143804e-05, "loss": 1.0336, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 870, "total_memory_available (GB)": 94.62 }, { "epoch": 0.87, "grad_norm": 0.34069886803627014, "learning_rate": 5.8286588475267726e-05, "loss": 1.0294, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 880, "total_memory_available (GB)": 94.62 }, { "epoch": 0.88, "grad_norm": 0.4303439259529114, "learning_rate": 5.777664456909741e-05, "loss": 1.0223, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 890, "total_memory_available (GB)": 94.62 }, { "epoch": 0.89, "grad_norm": 0.3378705382347107, "learning_rate": 5.7266700662927075e-05, "loss": 1.042, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 900, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9, "grad_norm": 0.4227118194103241, "learning_rate": 5.6756756756756757e-05, "loss": 1.0301, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 910, "total_memory_available (GB)": 94.62 }, { "epoch": 0.91, "grad_norm": 0.36343687772750854, "learning_rate": 5.624681285058644e-05, "loss": 1.0249, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 920, "total_memory_available (GB)": 94.62 }, { "epoch": 0.92, "grad_norm": 0.37661993503570557, "learning_rate": 5.573686894441612e-05, "loss": 1.0201, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 930, "total_memory_available (GB)": 94.62 }, { "epoch": 0.93, "grad_norm": 0.38610127568244934, "learning_rate": 5.5226925038245794e-05, "loss": 1.0351, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 940, "total_memory_available (GB)": 94.62 }, { "epoch": 0.94, "grad_norm": 0.32767486572265625, "learning_rate": 5.4716981132075475e-05, "loss": 1.0364, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 950, "total_memory_available (GB)": 94.62 }, { "epoch": 0.95, "grad_norm": 0.36714789271354675, "learning_rate": 5.4207037225905157e-05, "loss": 1.0252, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 960, "total_memory_available (GB)": 94.62 }, { "epoch": 0.96, "grad_norm": 0.34143558144569397, "learning_rate": 5.369709331973484e-05, "loss": 1.0266, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 970, "total_memory_available (GB)": 94.62 }, { "epoch": 0.97, "grad_norm": 0.3287556767463684, "learning_rate": 5.3187149413564506e-05, "loss": 1.0265, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 980, "total_memory_available (GB)": 94.62 }, { "epoch": 0.98, "grad_norm": 0.33613139390945435, "learning_rate": 5.267720550739419e-05, "loss": 1.0215, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 990, "total_memory_available (GB)": 94.62 }, { "epoch": 0.99, "grad_norm": 0.32623520493507385, "learning_rate": 5.216726160122387e-05, "loss": 1.0213, "max_memory_allocated (GB)": 91.9, "memory_allocated (GB)": 24.39, "step": 1000, "total_memory_available (GB)": 94.62 } ], "logging_steps": 10, "max_steps": 2022, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000, "total_flos": 1.119482194910249e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }