{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997490589711417, "eval_steps": 500, "global_step": 996, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.050188205771643665, "grad_norm": 2.3202156020973708, "learning_rate": 9.989427142584392e-06, "loss": 1.5003, "step": 50 }, { "epoch": 0.10037641154328733, "grad_norm": 2.314291595101664, "learning_rate": 9.870995413367397e-06, "loss": 1.3883, "step": 100 }, { "epoch": 0.15056461731493098, "grad_norm": 2.314931158603531, "learning_rate": 9.624050979896533e-06, "loss": 1.3754, "step": 150 }, { "epoch": 0.20075282308657466, "grad_norm": 2.152003211136021, "learning_rate": 9.255109039631998e-06, "loss": 1.3628, "step": 200 }, { "epoch": 0.25094102885821834, "grad_norm": 2.1047973694759157, "learning_rate": 8.773903481118611e-06, "loss": 1.3543, "step": 250 }, { "epoch": 0.30112923462986196, "grad_norm": 2.1025984809300318, "learning_rate": 8.193130072341872e-06, "loss": 1.3515, "step": 300 }, { "epoch": 0.35131744040150564, "grad_norm": 2.2036136599303076, "learning_rate": 7.528111505069428e-06, "loss": 1.3419, "step": 350 }, { "epoch": 0.4015056461731493, "grad_norm": 2.1361038466937563, "learning_rate": 6.796393132397829e-06, "loss": 1.3361, "step": 400 }, { "epoch": 0.451693851944793, "grad_norm": 2.1253080705715175, "learning_rate": 6.0172800652631706e-06, "loss": 1.3336, "step": 450 }, { "epoch": 0.5018820577164367, "grad_norm": 2.094902290851612, "learning_rate": 5.211327840815459e-06, "loss": 1.321, "step": 500 }, { "epoch": 0.5520702634880803, "grad_norm": 2.086008706466924, "learning_rate": 4.399800100481858e-06, "loss": 1.3173, "step": 550 }, { "epoch": 0.6022584692597239, "grad_norm": 2.084376290248031, "learning_rate": 3.6041075859356383e-06, "loss": 1.3044, "step": 600 }, { "epoch": 0.6524466750313677, "grad_norm": 2.1873617946017525, "learning_rate": 2.845243254082134e-06, "loss": 1.3029, "step": 650 }, { "epoch": 0.7026348808030113, "grad_norm": 2.1539300794808733, "learning_rate": 2.1432284145659104e-06, "loss": 1.2977, "step": 700 }, { "epoch": 0.7528230865746549, "grad_norm": 2.100942527337154, "learning_rate": 1.5165845024934366e-06, "loss": 1.3072, "step": 750 }, { "epoch": 0.8030112923462986, "grad_norm": 2.2004509246977477, "learning_rate": 9.81844422725109e-07, "loss": 1.3014, "step": 800 }, { "epoch": 0.8531994981179423, "grad_norm": 2.148051612184209, "learning_rate": 5.531163580638483e-07, "loss": 1.2948, "step": 850 }, { "epoch": 0.903387703889586, "grad_norm": 2.0857628452747248, "learning_rate": 2.417115494991107e-07, "loss": 1.2925, "step": 900 }, { "epoch": 0.9535759096612296, "grad_norm": 2.1477939821041936, "learning_rate": 5.584586887435739e-08, "loss": 1.2951, "step": 950 }, { "epoch": 0.9997490589711417, "step": 996, "total_flos": 3245135674474496.0, "train_loss": 1.3331998112690018, "train_runtime": 34717.8895, "train_samples_per_second": 7.345, "train_steps_per_second": 0.029 } ], "logging_steps": 50, "max_steps": 996, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 420, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3245135674474496.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }