{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.584507042253521, "eval_steps": 500, "global_step": 1800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0880281690140845, "grad_norm": 1221.827392578125, "learning_rate": 2.7777777777777783e-06, "loss": 297.1973, "step": 100 }, { "epoch": 0.176056338028169, "grad_norm": 1063.7159423828125, "learning_rate": 4.998119881260576e-06, "loss": 161.9498, "step": 200 }, { "epoch": 0.2640845070422535, "grad_norm": 1415.1790771484375, "learning_rate": 4.93261217644956e-06, "loss": 149.2985, "step": 300 }, { "epoch": 0.352112676056338, "grad_norm": 1143.650634765625, "learning_rate": 4.775907352415367e-06, "loss": 146.5141, "step": 400 }, { "epoch": 0.44014084507042256, "grad_norm": 1112.0836181640625, "learning_rate": 4.533880175657419e-06, "loss": 125.0505, "step": 500 }, { "epoch": 0.528169014084507, "grad_norm": 2647.099365234375, "learning_rate": 4.215604094671835e-06, "loss": 136.0549, "step": 600 }, { "epoch": 0.6161971830985915, "grad_norm": 914.672119140625, "learning_rate": 3.833011082004229e-06, "loss": 152.9934, "step": 700 }, { "epoch": 0.704225352112676, "grad_norm": 941.3635864257812, "learning_rate": 3.400444312011776e-06, "loss": 142.6481, "step": 800 }, { "epoch": 0.7922535211267606, "grad_norm": 2750.364990234375, "learning_rate": 2.9341204441673267e-06, "loss": 180.6238, "step": 900 }, { "epoch": 0.8802816901408451, "grad_norm": 1102.6456298828125, "learning_rate": 2.4515216705704396e-06, "loss": 160.7811, "step": 1000 }, { "epoch": 0.9683098591549296, "grad_norm": 2127.451416015625, "learning_rate": 1.970740319426474e-06, "loss": 136.9254, "step": 1100 }, { "epoch": 1.0, "eval_loss": 415.93927001953125, "eval_runtime": 8.7817, "eval_samples_per_second": 115.012, "eval_steps_per_second": 14.462, "step": 1136 }, { "epoch": 1.056338028169014, "grad_norm": 2116.73193359375, "learning_rate": 1.509800584902108e-06, "loss": 142.92, "step": 1200 }, { "epoch": 1.1443661971830985, "grad_norm": 1667.754638671875, "learning_rate": 1.085982811283654e-06, "loss": 141.8554, "step": 1300 }, { "epoch": 1.232394366197183, "grad_norm": 1366.0006103515625, "learning_rate": 7.151756636052529e-07, "loss": 149.8122, "step": 1400 }, { "epoch": 1.3204225352112675, "grad_norm": 1805.1434326171875, "learning_rate": 4.1128047146765936e-07, "loss": 148.2215, "step": 1500 }, { "epoch": 1.408450704225352, "grad_norm": 1603.68212890625, "learning_rate": 1.8569007682777417e-07, "loss": 142.1948, "step": 1600 }, { "epoch": 1.4964788732394365, "grad_norm": 1250.8704833984375, "learning_rate": 4.6861723431538273e-08, "loss": 123.6116, "step": 1700 }, { "epoch": 1.584507042253521, "grad_norm": 3193.298828125, "learning_rate": 0.0, "loss": 142.5536, "step": 1800 } ], "logging_steps": 100, "max_steps": 1800, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 600, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }