{ "best_metric": 21.407068619939793, "best_model_checkpoint": "/cosmos/home/sp-operator/ai/training/models/huggingface/scripts/../breeze-listen-dsw-base-ta/checkpoint-1000", "epoch": 1.0, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.973833272194737e-06, "loss": 0.5836, "step": 25 }, { "epoch": 0.05, "learning_rate": 6.195318418690893e-06, "loss": 0.308, "step": 50 }, { "epoch": 0.07, "learning_rate": 6.881634451095711e-06, "loss": 0.2177, "step": 75 }, { "epoch": 0.1, "learning_rate": 7.361221988663844e-06, "loss": 0.1698, "step": 100 }, { "epoch": 0.1, "eval_loss": 0.572265625, "eval_runtime": 14672.9249, "eval_samples_per_second": 0.79, "eval_steps_per_second": 0.049, "eval_wer": 30.440569885233597, "step": 100 }, { "epoch": 0.12, "learning_rate": 7.730207550743121e-06, "loss": 0.1375, "step": 125 }, { "epoch": 0.15, "learning_rate": 8.03016458599496e-06, "loss": 0.4086, "step": 150 }, { "epoch": 0.17, "learning_rate": 8.282894746203441e-06, "loss": 0.4454, "step": 175 }, { "epoch": 0.2, "learning_rate": 8.501266121799902e-06, "loss": 0.3578, "step": 200 }, { "epoch": 0.2, "eval_loss": 0.43017578125, "eval_runtime": 14830.0095, "eval_samples_per_second": 0.782, "eval_steps_per_second": 0.049, "eval_wer": 25.686238091921464, "step": 200 }, { "epoch": 0.23, "learning_rate": 8.693512601774437e-06, "loss": 0.3099, "step": 225 }, { "epoch": 0.25, "learning_rate": 8.865222471593567e-06, "loss": 0.3064, "step": 250 }, { "epoch": 0.28, "learning_rate": 9.020362953730323e-06, "loss": 0.2555, "step": 275 }, { "epoch": 0.3, "learning_rate": 9.161852281961698e-06, "loss": 0.2832, "step": 300 }, { "epoch": 0.3, "eval_loss": 0.396728515625, "eval_runtime": 14915.7095, "eval_samples_per_second": 0.778, "eval_steps_per_second": 0.049, "eval_wer": 23.204791864973416, "step": 300 }, { "epoch": 0.33, "learning_rate": 9.29189975311636e-06, "loss": 0.239, "step": 325 }, { "epoch": 0.35, "learning_rate": 9.412218256259678e-06, "loss": 0.2391, "step": 350 }, { "epoch": 0.38, "learning_rate": 9.524162683365145e-06, "loss": 0.3022, "step": 375 }, { "epoch": 0.4, "learning_rate": 9.62882322733502e-06, "loss": 0.2663, "step": 400 }, { "epoch": 0.4, "eval_loss": 0.40380859375, "eval_runtime": 14958.2575, "eval_samples_per_second": 0.775, "eval_steps_per_second": 0.048, "eval_wer": 23.852529426787083, "step": 400 }, { "epoch": 0.42, "learning_rate": 9.727090137141168e-06, "loss": 0.2667, "step": 425 }, { "epoch": 0.45, "learning_rate": 9.819699807237934e-06, "loss": 0.2368, "step": 450 }, { "epoch": 0.47, "learning_rate": 9.907268307310855e-06, "loss": 0.4503, "step": 475 }, { "epoch": 0.5, "learning_rate": 9.990316248055788e-06, "loss": 0.5175, "step": 500 }, { "epoch": 0.5, "eval_loss": 0.396240234375, "eval_runtime": 14832.7471, "eval_samples_per_second": 0.782, "eval_steps_per_second": 0.049, "eval_wer": 24.14660382761049, "step": 500 }, { "epoch": 0.53, "learning_rate": 9.58e-06, "loss": 0.4672, "step": 525 }, { "epoch": 0.55, "learning_rate": 9.080000000000001e-06, "loss": 0.4226, "step": 550 }, { "epoch": 0.57, "learning_rate": 8.580000000000001e-06, "loss": 0.3758, "step": 575 }, { "epoch": 0.6, "learning_rate": 8.08e-06, "loss": 0.2365, "step": 600 }, { "epoch": 0.6, "eval_loss": 0.385009765625, "eval_runtime": 14663.0923, "eval_samples_per_second": 0.791, "eval_steps_per_second": 0.049, "eval_wer": 22.25949744232659, "step": 600 }, { "epoch": 0.62, "learning_rate": 7.58e-06, "loss": 0.205, "step": 625 }, { "epoch": 0.65, "learning_rate": 7.08e-06, "loss": 0.2127, "step": 650 }, { "epoch": 0.68, "learning_rate": 6.5800000000000005e-06, "loss": 0.1918, "step": 675 }, { "epoch": 0.7, "learning_rate": 6.08e-06, "loss": 0.1692, "step": 700 }, { "epoch": 0.7, "eval_loss": 0.39599609375, "eval_runtime": 14696.2041, "eval_samples_per_second": 0.789, "eval_steps_per_second": 0.049, "eval_wer": 21.868688041232325, "step": 700 }, { "epoch": 0.72, "learning_rate": 5.580000000000001e-06, "loss": 0.155, "step": 725 }, { "epoch": 0.75, "learning_rate": 5.0800000000000005e-06, "loss": 0.1639, "step": 750 }, { "epoch": 0.78, "learning_rate": 4.58e-06, "loss": 0.1611, "step": 775 }, { "epoch": 0.8, "learning_rate": 4.08e-06, "loss": 0.1815, "step": 800 }, { "epoch": 0.8, "eval_loss": 0.38232421875, "eval_runtime": 14658.9058, "eval_samples_per_second": 0.791, "eval_steps_per_second": 0.049, "eval_wer": 22.077248701816295, "step": 800 }, { "epoch": 0.82, "learning_rate": 3.58e-06, "loss": 0.1854, "step": 825 }, { "epoch": 0.85, "learning_rate": 3.08e-06, "loss": 0.175, "step": 850 }, { "epoch": 0.88, "learning_rate": 2.5800000000000003e-06, "loss": 0.1684, "step": 875 }, { "epoch": 0.9, "learning_rate": 2.08e-06, "loss": 0.1612, "step": 900 }, { "epoch": 0.9, "eval_loss": 0.3701171875, "eval_runtime": 14864.357, "eval_samples_per_second": 0.78, "eval_steps_per_second": 0.049, "eval_wer": 21.805616821055725, "step": 900 }, { "epoch": 0.93, "learning_rate": 1.5800000000000001e-06, "loss": 0.1528, "step": 925 }, { "epoch": 0.95, "learning_rate": 1.08e-06, "loss": 0.1651, "step": 950 }, { "epoch": 0.97, "learning_rate": 5.800000000000001e-07, "loss": 0.1474, "step": 975 }, { "epoch": 1.0, "learning_rate": 8e-08, "loss": 0.1393, "step": 1000 }, { "epoch": 1.0, "eval_loss": 0.375, "eval_runtime": 14729.6157, "eval_samples_per_second": 0.787, "eval_steps_per_second": 0.049, "eval_wer": 21.407068619939793, "step": 1000 }, { "epoch": 1.0, "step": 1000, "total_flos": 2.0755199247672934e+18, "train_loss": 0.2634566650390625, "train_runtime": 177344.9657, "train_samples_per_second": 0.18, "train_steps_per_second": 0.006 } ], "logging_steps": 25, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 100, "total_flos": 2.0755199247672934e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }