{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.1678190239645566, "eval_steps": 500, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.006712760958582265, "grad_norm": 1.0528477430343628, "learning_rate": 0.00014985229447149842, "loss": 7.6572, "step": 100 }, { "epoch": 0.01342552191716453, "grad_norm": 1.1515228748321533, "learning_rate": 0.00014940916946874937, "loss": 6.1239, "step": 200 }, { "epoch": 0.020138282875746795, "grad_norm": 1.2438573837280273, "learning_rate": 0.00014867237372557577, "loss": 5.4067, "step": 300 }, { "epoch": 0.02685104383432906, "grad_norm": 1.3082321882247925, "learning_rate": 0.00014764481515444297, "loss": 5.0934, "step": 400 }, { "epoch": 0.03356380479291132, "grad_norm": 1.4851253032684326, "learning_rate": 0.00014633054922174807, "loss": 4.8669, "step": 500 }, { "epoch": 0.04027656575149359, "grad_norm": 1.3247835636138916, "learning_rate": 0.00014473476294210664, "loss": 4.7151, "step": 600 }, { "epoch": 0.04698932671007586, "grad_norm": 1.5466852188110352, "learning_rate": 0.0001428637544067573, "loss": 4.5684, "step": 700 }, { "epoch": 0.05370208766865812, "grad_norm": 1.3418868780136108, "learning_rate": 0.0001407249079268789, "loss": 4.4861, "step": 800 }, { "epoch": 0.060414848627240385, "grad_norm": 1.4495049715042114, "learning_rate": 0.0001383266648899225, "loss": 4.3896, "step": 900 }, { "epoch": 0.06712760958582265, "grad_norm": 1.2629677057266235, "learning_rate": 0.0001356784904439796, "loss": 4.3076, "step": 1000 }, { "epoch": 0.07384037054440491, "grad_norm": 1.382216215133667, "learning_rate": 0.00013279083614167278, "loss": 4.2179, "step": 1100 }, { "epoch": 0.08055313150298718, "grad_norm": 1.2883789539337158, "learning_rate": 0.00012967509869100336, "loss": 4.1599, "step": 1200 }, { "epoch": 0.08726589246156945, "grad_norm": 1.3527660369873047, "learning_rate": 0.00012634357497595263, "loss": 4.0976, "step": 1300 }, { "epoch": 0.09397865342015171, "grad_norm": 1.3394412994384766, "learning_rate": 0.00012280941352435837, "loss": 4.0805, "step": 1400 }, { "epoch": 0.10069141437873397, "grad_norm": 1.4646199941635132, "learning_rate": 0.00011908656261460721, "loss": 4.0032, "step": 1500 }, { "epoch": 0.10740417533731624, "grad_norm": 1.2548878192901611, "learning_rate": 0.00011518971522595105, "loss": 3.9702, "step": 1600 }, { "epoch": 0.1141169362958985, "grad_norm": 1.363207221031189, "learning_rate": 0.00011113425104971176, "loss": 3.9321, "step": 1700 }, { "epoch": 0.12082969725448077, "grad_norm": 1.3911628723144531, "learning_rate": 0.00010693617579023885, "loss": 3.8974, "step": 1800 }, { "epoch": 0.12754245821306304, "grad_norm": 1.3630716800689697, "learning_rate": 0.00010261205799518043, "loss": 3.8514, "step": 1900 }, { "epoch": 0.1342552191716453, "grad_norm": 1.2687169313430786, "learning_rate": 9.817896366438074e-05, "loss": 3.818, "step": 2000 }, { "epoch": 0.14096798013022757, "grad_norm": 1.3437057733535767, "learning_rate": 9.36543888954819e-05, "loss": 3.8071, "step": 2100 }, { "epoch": 0.14768074108880982, "grad_norm": 1.3673392534255981, "learning_rate": 8.905619083205881e-05, "loss": 3.7842, "step": 2200 }, { "epoch": 0.1543935020473921, "grad_norm": 1.2775851488113403, "learning_rate": 8.440251718681331e-05, "loss": 3.7666, "step": 2300 }, { "epoch": 0.16110626300597436, "grad_norm": 1.382295846939087, "learning_rate": 7.971173461797922e-05, "loss": 3.679, "step": 2400 }, { "epoch": 0.1678190239645566, "grad_norm": 1.269216775894165, "learning_rate": 7.500235624161463e-05, "loss": 3.7059, "step": 2500 } ], "logging_steps": 100, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6499577364480000.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }