{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.04576659038901602, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00019292929292929293, "loss": 2.0259, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.00018282828282828283, "loss": 1.4961, "step": 20 }, { "epoch": 0.01, "learning_rate": 0.00017272727272727275, "loss": 1.3953, "step": 30 }, { "epoch": 0.01, "learning_rate": 0.00016262626262626264, "loss": 1.3234, "step": 40 }, { "epoch": 0.01, "learning_rate": 0.00015252525252525253, "loss": 1.4601, "step": 50 }, { "epoch": 0.01, "learning_rate": 0.00014242424242424243, "loss": 1.3362, "step": 60 }, { "epoch": 0.02, "learning_rate": 0.00013232323232323235, "loss": 1.3905, "step": 70 }, { "epoch": 0.02, "learning_rate": 0.00012222222222222224, "loss": 1.3132, "step": 80 }, { "epoch": 0.02, "learning_rate": 0.00011212121212121212, "loss": 1.2767, "step": 90 }, { "epoch": 0.02, "learning_rate": 0.00010202020202020202, "loss": 1.3303, "step": 100 }, { "epoch": 0.03, "learning_rate": 9.191919191919192e-05, "loss": 1.3685, "step": 110 }, { "epoch": 0.03, "learning_rate": 8.181818181818183e-05, "loss": 1.335, "step": 120 }, { "epoch": 0.03, "learning_rate": 7.171717171717171e-05, "loss": 1.3122, "step": 130 }, { "epoch": 0.03, "learning_rate": 6.161616161616162e-05, "loss": 1.2693, "step": 140 }, { "epoch": 0.03, "learning_rate": 5.151515151515152e-05, "loss": 1.2069, "step": 150 }, { "epoch": 0.04, "learning_rate": 4.141414141414142e-05, "loss": 1.2302, "step": 160 }, { "epoch": 0.04, "learning_rate": 3.131313131313132e-05, "loss": 1.2451, "step": 170 }, { "epoch": 0.04, "learning_rate": 2.1212121212121215e-05, "loss": 1.2993, "step": 180 }, { "epoch": 0.04, "learning_rate": 1.1111111111111112e-05, "loss": 1.2968, "step": 190 }, { "epoch": 0.05, "learning_rate": 1.0101010101010103e-06, "loss": 1.2567, "step": 200 }, { "epoch": 0.05, "step": 200, "total_flos": 1428558744158208.0, "train_loss": 1.3583835124969483, "train_runtime": 1540.4432, "train_samples_per_second": 0.519, "train_steps_per_second": 0.13 } ], "logging_steps": 10, "max_steps": 200, "num_train_epochs": 1, "save_steps": 500, "total_flos": 1428558744158208.0, "trial_name": null, "trial_params": null }