{ "best_metric": 0.13481491804122925, "best_model_checkpoint": "./xls-r-1b-bem-msv/checkpoint-2000", "epoch": 4.994797086368366, "eval_steps": 500, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4162330905306972, "grad_norm": 11.17747974395752, "learning_rate": 1.97e-05, "loss": 4.9143, "step": 200 }, { "epoch": 0.8324661810613944, "grad_norm": 4.283135414123535, "learning_rate": 3.960000000000001e-05, "loss": 2.4467, "step": 400 }, { "epoch": 1.0405827263267429, "eval_loss": 0.24771228432655334, "eval_runtime": 86.9901, "eval_samples_per_second": 12.07, "eval_steps_per_second": 1.517, "eval_wer": 0.8371428571428572, "step": 500 }, { "epoch": 1.2486992715920915, "grad_norm": 1.9659106731414795, "learning_rate": 4.747368421052632e-05, "loss": 0.583, "step": 600 }, { "epoch": 1.6649323621227887, "grad_norm": 1.3077179193496704, "learning_rate": 4.221052631578948e-05, "loss": 0.4601, "step": 800 }, { "epoch": 2.0811654526534857, "grad_norm": 1.9944076538085938, "learning_rate": 3.6947368421052635e-05, "loss": 0.4021, "step": 1000 }, { "epoch": 2.0811654526534857, "eval_loss": 0.1869359165430069, "eval_runtime": 86.2672, "eval_samples_per_second": 12.171, "eval_steps_per_second": 1.53, "eval_wer": 0.7790476190476191, "step": 1000 }, { "epoch": 2.497398543184183, "grad_norm": 2.027698516845703, "learning_rate": 3.168421052631579e-05, "loss": 0.3278, "step": 1200 }, { "epoch": 2.91363163371488, "grad_norm": 1.2876348495483398, "learning_rate": 2.6421052631578945e-05, "loss": 0.3192, "step": 1400 }, { "epoch": 3.121748178980229, "eval_loss": 0.14687703549861908, "eval_runtime": 87.0632, "eval_samples_per_second": 12.06, "eval_steps_per_second": 1.516, "eval_wer": 0.6580952380952381, "step": 1500 }, { "epoch": 3.3298647242455774, "grad_norm": 0.6025579571723938, "learning_rate": 2.1157894736842106e-05, "loss": 0.2686, "step": 1600 }, { "epoch": 3.7460978147762747, "grad_norm": 0.5916275978088379, "learning_rate": 1.5894736842105266e-05, "loss": 0.2524, "step": 1800 }, { "epoch": 4.1623309053069715, "grad_norm": 1.3612473011016846, "learning_rate": 1.0631578947368421e-05, "loss": 0.2252, "step": 2000 }, { "epoch": 4.1623309053069715, "eval_loss": 0.13481491804122925, "eval_runtime": 86.2138, "eval_samples_per_second": 12.179, "eval_steps_per_second": 1.531, "eval_wer": 0.6038095238095238, "step": 2000 }, { "epoch": 4.578563995837669, "grad_norm": 1.8633441925048828, "learning_rate": 5.36842105263158e-06, "loss": 0.1991, "step": 2200 }, { "epoch": 4.994797086368366, "grad_norm": 7.069295883178711, "learning_rate": 1.3157894736842107e-07, "loss": 0.1907, "step": 2400 }, { "epoch": 4.994797086368366, "step": 2400, "total_flos": 2.1780564758105436e+19, "train_loss": 0.8824304628372193, "train_runtime": 6499.422, "train_samples_per_second": 5.911, "train_steps_per_second": 0.369 } ], "logging_steps": 200, "max_steps": 2400, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.1780564758105436e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }