{ "best_metric": 7.9168, "best_model_checkpoint": "/content/tst-translation/checkpoint-1600", "epoch": 20.0, "eval_steps": 200, "global_step": 2540, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.574803149606299, "grad_norm": 2.453336238861084, "learning_rate": 0.00046062992125984255, "loss": 4.5126, "step": 200 }, { "epoch": 1.574803149606299, "eval_bleu": 1.0891, "eval_gen_len": 120.8315, "eval_loss": 2.747382402420044, "eval_runtime": 505.8332, "eval_samples_per_second": 1.971, "eval_steps_per_second": 0.125, "step": 200 }, { "epoch": 3.1496062992125986, "grad_norm": 2.0367627143859863, "learning_rate": 0.00042125984251968504, "loss": 2.4414, "step": 400 }, { "epoch": 3.1496062992125986, "eval_bleu": 5.0172, "eval_gen_len": 54.7623, "eval_loss": 2.5120186805725098, "eval_runtime": 340.8894, "eval_samples_per_second": 2.925, "eval_steps_per_second": 0.185, "step": 400 }, { "epoch": 4.724409448818898, "grad_norm": 2.0367233753204346, "learning_rate": 0.00038188976377952753, "loss": 1.724, "step": 600 }, { "epoch": 4.724409448818898, "eval_bleu": 5.2115, "eval_gen_len": 61.985, "eval_loss": 2.4089620113372803, "eval_runtime": 368.5588, "eval_samples_per_second": 2.705, "eval_steps_per_second": 0.171, "step": 600 }, { "epoch": 6.299212598425197, "grad_norm": 1.8066316843032837, "learning_rate": 0.00034251968503937007, "loss": 1.2755, "step": 800 }, { "epoch": 6.299212598425197, "eval_bleu": 7.0203, "eval_gen_len": 46.1414, "eval_loss": 2.551859140396118, "eval_runtime": 215.1683, "eval_samples_per_second": 4.634, "eval_steps_per_second": 0.293, "step": 800 }, { "epoch": 7.874015748031496, "grad_norm": 1.8381917476654053, "learning_rate": 0.0003031496062992126, "loss": 0.97, "step": 1000 }, { "epoch": 7.874015748031496, "eval_bleu": 7.005, "eval_gen_len": 56.5266, "eval_loss": 2.5974559783935547, "eval_runtime": 308.9884, "eval_samples_per_second": 3.227, "eval_steps_per_second": 0.204, "step": 1000 }, { "epoch": 9.448818897637794, "grad_norm": 1.8475762605667114, "learning_rate": 0.0002637795275590551, "loss": 0.7251, "step": 1200 }, { "epoch": 9.448818897637794, "eval_bleu": 7.6235, "eval_gen_len": 52.6841, "eval_loss": 2.791808605194092, "eval_runtime": 280.2999, "eval_samples_per_second": 3.557, "eval_steps_per_second": 0.225, "step": 1200 }, { "epoch": 11.023622047244094, "grad_norm": 1.7481825351715088, "learning_rate": 0.00022440944881889764, "loss": 0.584, "step": 1400 }, { "epoch": 11.023622047244094, "eval_bleu": 7.3273, "eval_gen_len": 49.9659, "eval_loss": 2.8952395915985107, "eval_runtime": 205.6302, "eval_samples_per_second": 4.849, "eval_steps_per_second": 0.306, "step": 1400 }, { "epoch": 12.598425196850394, "grad_norm": 1.5020047426223755, "learning_rate": 0.00018503937007874016, "loss": 0.4358, "step": 1600 }, { "epoch": 12.598425196850394, "eval_bleu": 7.9168, "eval_gen_len": 51.4945, "eval_loss": 3.120616912841797, "eval_runtime": 243.389, "eval_samples_per_second": 4.096, "eval_steps_per_second": 0.259, "step": 1600 }, { "epoch": 14.173228346456693, "grad_norm": 1.6936888694763184, "learning_rate": 0.00014566929133858267, "loss": 0.3619, "step": 1800 }, { "epoch": 14.173228346456693, "eval_bleu": 7.9096, "eval_gen_len": 50.5517, "eval_loss": 3.264512538909912, "eval_runtime": 220.31, "eval_samples_per_second": 4.525, "eval_steps_per_second": 0.286, "step": 1800 }, { "epoch": 15.748031496062993, "grad_norm": 1.4287927150726318, "learning_rate": 0.0001062992125984252, "loss": 0.2933, "step": 2000 }, { "epoch": 15.748031496062993, "eval_bleu": 7.9015, "eval_gen_len": 49.6169, "eval_loss": 3.3970730304718018, "eval_runtime": 195.9873, "eval_samples_per_second": 5.087, "eval_steps_per_second": 0.321, "step": 2000 }, { "epoch": 17.322834645669293, "grad_norm": 1.6358137130737305, "learning_rate": 6.692913385826773e-05, "loss": 0.2447, "step": 2200 }, { "epoch": 17.322834645669293, "eval_bleu": 7.8441, "eval_gen_len": 49.6911, "eval_loss": 3.5039305686950684, "eval_runtime": 201.4971, "eval_samples_per_second": 4.948, "eval_steps_per_second": 0.313, "step": 2200 }, { "epoch": 18.89763779527559, "grad_norm": 1.3624520301818848, "learning_rate": 2.7559055118110236e-05, "loss": 0.2151, "step": 2400 }, { "epoch": 18.89763779527559, "eval_bleu": 7.8198, "eval_gen_len": 50.1153, "eval_loss": 3.5556399822235107, "eval_runtime": 195.3596, "eval_samples_per_second": 5.103, "eval_steps_per_second": 0.322, "step": 2400 }, { "epoch": 20.0, "step": 2540, "total_flos": 2811538357714944.0, "train_loss": 1.096170819650485, "train_runtime": 5288.9286, "train_samples_per_second": 3.827, "train_steps_per_second": 0.48 } ], "logging_steps": 200, "max_steps": 2540, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2811538357714944.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }