{ "best_metric": 2.8250861167907715, "best_model_checkpoint": "en-to-lg-ufal-al/checkpoint-160", "epoch": 10.0, "eval_steps": 500, "global_step": 160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 12.3779, "eval_gen_len": 42.6083, "eval_loss": 2.9468307495117188, "eval_runtime": 1501.8811, "eval_samples_per_second": 10.334, "eval_steps_per_second": 0.323, "step": 16 }, { "epoch": 2.0, "eval_bleu": 12.5162, "eval_gen_len": 42.7061, "eval_loss": 2.9221065044403076, "eval_runtime": 1525.1235, "eval_samples_per_second": 10.176, "eval_steps_per_second": 0.318, "step": 32 }, { "epoch": 3.0, "eval_bleu": 12.6486, "eval_gen_len": 42.7809, "eval_loss": 2.8996763229370117, "eval_runtime": 1520.5333, "eval_samples_per_second": 10.207, "eval_steps_per_second": 0.319, "step": 48 }, { "epoch": 3.125, "grad_norm": 1.846739411354065, "learning_rate": 1.3875e-05, "loss": 3.2685, "step": 50 }, { "epoch": 4.0, "eval_bleu": 12.7887, "eval_gen_len": 42.9278, "eval_loss": 2.8806185722351074, "eval_runtime": 1541.7574, "eval_samples_per_second": 10.066, "eval_steps_per_second": 0.315, "step": 64 }, { "epoch": 5.0, "eval_bleu": 12.8971, "eval_gen_len": 42.9779, "eval_loss": 2.863759994506836, "eval_runtime": 1538.3478, "eval_samples_per_second": 10.089, "eval_steps_per_second": 0.315, "step": 80 }, { "epoch": 6.0, "eval_bleu": 12.9175, "eval_gen_len": 42.9273, "eval_loss": 2.8501696586608887, "eval_runtime": 1540.1728, "eval_samples_per_second": 10.077, "eval_steps_per_second": 0.315, "step": 96 }, { "epoch": 6.25, "grad_norm": 0.854383111000061, "learning_rate": 7.625e-06, "loss": 3.1513, "step": 100 }, { "epoch": 7.0, "eval_bleu": 12.9938, "eval_gen_len": 43.0523, "eval_loss": 2.839545249938965, "eval_runtime": 1546.6529, "eval_samples_per_second": 10.035, "eval_steps_per_second": 0.314, "step": 112 }, { "epoch": 8.0, "eval_bleu": 13.0635, "eval_gen_len": 43.0526, "eval_loss": 2.831665277481079, "eval_runtime": 1543.1426, "eval_samples_per_second": 10.057, "eval_steps_per_second": 0.314, "step": 128 }, { "epoch": 9.0, "eval_bleu": 13.1002, "eval_gen_len": 43.099, "eval_loss": 2.8269288539886475, "eval_runtime": 1549.5221, "eval_samples_per_second": 10.016, "eval_steps_per_second": 0.313, "step": 144 }, { "epoch": 9.375, "grad_norm": 0.8836955428123474, "learning_rate": 1.3750000000000002e-06, "loss": 3.1409, "step": 150 }, { "epoch": 10.0, "eval_bleu": 13.0773, "eval_gen_len": 43.1512, "eval_loss": 2.8250861167907715, "eval_runtime": 1562.623, "eval_samples_per_second": 9.932, "eval_steps_per_second": 0.31, "step": 160 } ], "logging_steps": 50, "max_steps": 160, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 327971658792960.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }