{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 137, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0072992700729927005, "grad_norm": 0.43487387895584106, "learning_rate": 1.4285714285714285e-05, "loss": 1.7554, "step": 1 }, { "epoch": 0.0364963503649635, "grad_norm": 0.4084155857563019, "learning_rate": 7.142857142857143e-05, "loss": 1.7248, "step": 5 }, { "epoch": 0.072992700729927, "grad_norm": 0.4045219123363495, "learning_rate": 0.00014285714285714287, "loss": 1.6877, "step": 10 }, { "epoch": 0.10948905109489052, "grad_norm": 0.5852366089820862, "learning_rate": 0.00019996738360808565, "loss": 1.5983, "step": 15 }, { "epoch": 0.145985401459854, "grad_norm": 0.5415384769439697, "learning_rate": 0.00019882804237803488, "loss": 1.4422, "step": 20 }, { "epoch": 0.18248175182481752, "grad_norm": 0.49430617690086365, "learning_rate": 0.00019607909582962477, "loss": 1.3509, "step": 25 }, { "epoch": 0.21897810218978103, "grad_norm": 0.3028700649738312, "learning_rate": 0.0001917653158603628, "loss": 1.2905, "step": 30 }, { "epoch": 0.25547445255474455, "grad_norm": 0.21788553893566132, "learning_rate": 0.00018595696069872013, "loss": 1.2481, "step": 35 }, { "epoch": 0.291970802919708, "grad_norm": 0.21770533919334412, "learning_rate": 0.00017874863061334657, "loss": 1.2255, "step": 40 }, { "epoch": 0.3284671532846715, "grad_norm": 0.19103731215000153, "learning_rate": 0.00017025772716520323, "loss": 1.2213, "step": 45 }, { "epoch": 0.36496350364963503, "grad_norm": 0.18975041806697845, "learning_rate": 0.0001606225410966638, "loss": 1.2069, "step": 50 }, { "epoch": 0.40145985401459855, "grad_norm": 0.21415986120700836, "learning_rate": 0.00015000000000000001, "loss": 1.1938, "step": 55 }, { "epoch": 0.43795620437956206, "grad_norm": 0.20551565289497375, "learning_rate": 0.0001385631124488136, "loss": 1.1837, "step": 60 }, { "epoch": 0.4744525547445255, "grad_norm": 0.20549848675727844, "learning_rate": 0.0001264981502196662, "loss": 1.1724, "step": 65 }, { "epoch": 0.5109489051094891, "grad_norm": 0.19810882210731506, "learning_rate": 0.00011400161449686293, "loss": 1.1713, "step": 70 }, { "epoch": 0.5474452554744526, "grad_norm": 0.18478353321552277, "learning_rate": 0.00010127703547159739, "loss": 1.1571, "step": 75 }, { "epoch": 0.583941605839416, "grad_norm": 0.19806450605392456, "learning_rate": 8.853165746015997e-05, "loss": 1.1539, "step": 80 }, { "epoch": 0.6204379562043796, "grad_norm": 0.20397868752479553, "learning_rate": 7.597306353045393e-05, "loss": 1.1457, "step": 85 }, { "epoch": 0.656934306569343, "grad_norm": 0.18514488637447357, "learning_rate": 6.380579461128819e-05, "loss": 1.1613, "step": 90 }, { "epoch": 0.6934306569343066, "grad_norm": 0.18412043154239655, "learning_rate": 5.222801814877369e-05, "loss": 1.1449, "step": 95 }, { "epoch": 0.7299270072992701, "grad_norm": 0.19028052687644958, "learning_rate": 4.142830056718052e-05, "loss": 1.1511, "step": 100 }, { "epoch": 0.7664233576642335, "grad_norm": 0.1979902684688568, "learning_rate": 3.158253610095697e-05, "loss": 1.1457, "step": 105 }, { "epoch": 0.8029197080291971, "grad_norm": 0.19014957547187805, "learning_rate": 2.2851082017805703e-05, "loss": 1.1423, "step": 110 }, { "epoch": 0.8394160583941606, "grad_norm": 0.186836376786232, "learning_rate": 1.5376146891235598e-05, "loss": 1.1477, "step": 115 }, { "epoch": 0.8759124087591241, "grad_norm": 0.20176666975021362, "learning_rate": 9.279474459608805e-06, "loss": 1.153, "step": 120 }, { "epoch": 0.9124087591240876, "grad_norm": 0.19549883902072906, "learning_rate": 4.660360794506946e-06, "loss": 1.1444, "step": 125 }, { "epoch": 0.948905109489051, "grad_norm": 0.17397421598434448, "learning_rate": 1.5940370726542863e-06, "loss": 1.1508, "step": 130 }, { "epoch": 0.9854014598540146, "grad_norm": 0.1897565871477127, "learning_rate": 1.3044429107700318e-07, "loss": 1.1497, "step": 135 }, { "epoch": 1.0, "eval_loss": 1.6479942798614502, "eval_runtime": 1.1173, "eval_samples_per_second": 8.055, "eval_steps_per_second": 0.895, "step": 137 }, { "epoch": 1.0, "step": 137, "total_flos": 8.08957492854784e+17, "train_loss": 1.2461711733880705, "train_runtime": 2338.1563, "train_samples_per_second": 14.952, "train_steps_per_second": 0.059 } ], "logging_steps": 5, "max_steps": 137, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.08957492854784e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }