{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 200, "global_step": 8312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "eval_loss": 3.2670648097991943, "eval_runtime": 692.0326, "eval_samples_per_second": 5.263, "eval_steps_per_second": 0.659, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.19, "eval_loss": 2.8740603923797607, "eval_runtime": 683.8046, "eval_samples_per_second": 5.326, "eval_steps_per_second": 0.667, "eval_wer": 1.0006963141769567, "step": 400 }, { "epoch": 0.24, "learning_rate": 0.0009517778860204579, "loss": 3.8381, "step": 500 }, { "epoch": 0.29, "eval_loss": 2.761221170425415, "eval_runtime": 683.642, "eval_samples_per_second": 5.327, "eval_steps_per_second": 0.667, "eval_wer": 0.9954507473772166, "step": 600 }, { "epoch": 0.38, "eval_loss": 2.633348226547241, "eval_runtime": 684.1815, "eval_samples_per_second": 5.323, "eval_steps_per_second": 0.666, "eval_wer": 0.9981431621947823, "step": 800 }, { "epoch": 0.48, "learning_rate": 0.000890891378470531, "loss": 2.6996, "step": 1000 }, { "epoch": 0.48, "eval_loss": 2.3073549270629883, "eval_runtime": 686.3923, "eval_samples_per_second": 5.306, "eval_steps_per_second": 0.664, "eval_wer": 0.9770680531055612, "step": 1000 }, { "epoch": 0.58, "eval_loss": 2.0154612064361572, "eval_runtime": 686.5478, "eval_samples_per_second": 5.305, "eval_steps_per_second": 0.664, "eval_wer": 0.9286045863893789, "step": 1200 }, { "epoch": 0.67, "eval_loss": 1.9155136346817017, "eval_runtime": 689.547, "eval_samples_per_second": 5.282, "eval_steps_per_second": 0.661, "eval_wer": 0.8947172964441557, "step": 1400 }, { "epoch": 0.72, "learning_rate": 0.000830004870920604, "loss": 2.2919, "step": 1500 }, { "epoch": 0.77, "eval_loss": 1.641204595565796, "eval_runtime": 685.3748, "eval_samples_per_second": 5.314, "eval_steps_per_second": 0.665, "eval_wer": 0.8813944851917185, "step": 1600 }, { "epoch": 0.87, "eval_loss": 1.4531193971633911, "eval_runtime": 689.2035, "eval_samples_per_second": 5.284, "eval_steps_per_second": 0.662, "eval_wer": 0.8285210286881441, "step": 1800 }, { "epoch": 0.96, "learning_rate": 0.0007691183633706771, "loss": 1.5872, "step": 2000 }, { "epoch": 0.96, "eval_loss": 0.1812867820262909, "eval_runtime": 685.9058, "eval_samples_per_second": 5.31, "eval_steps_per_second": 0.665, "eval_wer": 0.2060161544889054, "step": 2000 }, { "epoch": 1.06, "eval_loss": 0.1635832041501999, "eval_runtime": 687.9409, "eval_samples_per_second": 5.294, "eval_steps_per_second": 0.663, "eval_wer": 0.18062389750255314, "step": 2200 }, { "epoch": 1.15, "eval_loss": 0.155806764960289, "eval_runtime": 692.1735, "eval_samples_per_second": 5.262, "eval_steps_per_second": 0.659, "eval_wer": 0.17444991180020425, "step": 2400 }, { "epoch": 1.2, "learning_rate": 0.0007084754018509498, "loss": 0.2659, "step": 2500 }, { "epoch": 1.25, "eval_loss": 0.152183398604393, "eval_runtime": 688.556, "eval_samples_per_second": 5.289, "eval_steps_per_second": 0.662, "eval_wer": 0.1646550923776808, "step": 2600 }, { "epoch": 1.35, "eval_loss": 0.15532232820987701, "eval_runtime": 688.1144, "eval_samples_per_second": 5.293, "eval_steps_per_second": 0.663, "eval_wer": 0.16641908829263763, "step": 2800 }, { "epoch": 1.44, "learning_rate": 0.0006475888943010228, "loss": 0.2436, "step": 3000 }, { "epoch": 1.44, "eval_loss": 0.1840931922197342, "eval_runtime": 692.811, "eval_samples_per_second": 5.257, "eval_steps_per_second": 0.658, "eval_wer": 0.1960820722309906, "step": 3000 }, { "epoch": 1.54, "eval_loss": 0.14190182089805603, "eval_runtime": 690.3365, "eval_samples_per_second": 5.276, "eval_steps_per_second": 0.661, "eval_wer": 0.1640051991458546, "step": 3200 }, { "epoch": 1.64, "eval_loss": 0.14559713006019592, "eval_runtime": 685.0999, "eval_samples_per_second": 5.316, "eval_steps_per_second": 0.666, "eval_wer": 0.17143255036672547, "step": 3400 }, { "epoch": 1.68, "learning_rate": 0.000586702386751096, "loss": 0.2464, "step": 3500 }, { "epoch": 1.73, "eval_loss": 0.14024095237255096, "eval_runtime": 692.5402, "eval_samples_per_second": 5.259, "eval_steps_per_second": 0.658, "eval_wer": 0.16070931204159317, "step": 3600 }, { "epoch": 1.83, "eval_loss": 0.1345185786485672, "eval_runtime": 694.4502, "eval_samples_per_second": 5.244, "eval_steps_per_second": 0.657, "eval_wer": 0.1528177513694179, "step": 3800 }, { "epoch": 1.92, "learning_rate": 0.000525815879201169, "loss": 0.2292, "step": 4000 }, { "epoch": 1.92, "eval_loss": 0.134234219789505, "eval_runtime": 693.8578, "eval_samples_per_second": 5.249, "eval_steps_per_second": 0.657, "eval_wer": 0.155556587132114, "step": 4000 }, { "epoch": 2.02, "eval_loss": 0.13340923190116882, "eval_runtime": 684.0209, "eval_samples_per_second": 5.324, "eval_steps_per_second": 0.667, "eval_wer": 0.15518521957107045, "step": 4200 }, { "epoch": 2.12, "eval_loss": 0.13518257439136505, "eval_runtime": 687.2622, "eval_samples_per_second": 5.299, "eval_steps_per_second": 0.664, "eval_wer": 0.1543496425587225, "step": 4400 }, { "epoch": 2.17, "learning_rate": 0.0004649293716512421, "loss": 0.2209, "step": 4500 }, { "epoch": 2.21, "eval_loss": 0.13499902188777924, "eval_runtime": 691.8679, "eval_samples_per_second": 5.264, "eval_steps_per_second": 0.659, "eval_wer": 0.1537925912171572, "step": 4600 }, { "epoch": 2.31, "eval_loss": 0.13418444991111755, "eval_runtime": 685.3615, "eval_samples_per_second": 5.314, "eval_steps_per_second": 0.665, "eval_wer": 0.1530498560950701, "step": 4800 }, { "epoch": 2.41, "learning_rate": 0.00040404286410131515, "loss": 0.2136, "step": 5000 }, { "epoch": 2.41, "eval_loss": 0.1319747269153595, "eval_runtime": 688.5799, "eval_samples_per_second": 5.289, "eval_steps_per_second": 0.662, "eval_wer": 0.1540246959428094, "step": 5000 }, { "epoch": 2.5, "eval_loss": 0.13689081370830536, "eval_runtime": 691.8314, "eval_samples_per_second": 5.264, "eval_steps_per_second": 0.659, "eval_wer": 0.15690279454089684, "step": 5200 }, { "epoch": 2.6, "eval_loss": 0.13139554858207703, "eval_runtime": 689.159, "eval_samples_per_second": 5.285, "eval_steps_per_second": 0.662, "eval_wer": 0.1516572277411568, "step": 5400 }, { "epoch": 2.65, "learning_rate": 0.0003431563565513882, "loss": 0.2154, "step": 5500 }, { "epoch": 2.69, "eval_loss": 0.1303856372833252, "eval_runtime": 694.7157, "eval_samples_per_second": 5.242, "eval_steps_per_second": 0.656, "eval_wer": 0.15063596694828707, "step": 5600 }, { "epoch": 2.79, "eval_loss": 0.13201411068439484, "eval_runtime": 691.5101, "eval_samples_per_second": 5.267, "eval_steps_per_second": 0.659, "eval_wer": 0.15072880883854795, "step": 5800 }, { "epoch": 2.89, "learning_rate": 0.0002822698490014613, "loss": 0.2123, "step": 6000 }, { "epoch": 2.89, "eval_loss": 0.13187964260578156, "eval_runtime": 687.2712, "eval_samples_per_second": 5.299, "eval_steps_per_second": 0.663, "eval_wer": 0.1523999628632439, "step": 6000 }, { "epoch": 2.98, "eval_loss": 0.12917861342430115, "eval_runtime": 691.2948, "eval_samples_per_second": 5.268, "eval_steps_per_second": 0.66, "eval_wer": 0.1523999628632439, "step": 6200 }, { "epoch": 3.08, "eval_loss": 0.12825024127960205, "eval_runtime": 689.7813, "eval_samples_per_second": 5.28, "eval_steps_per_second": 0.661, "eval_wer": 0.1488255500881998, "step": 6400 }, { "epoch": 3.13, "learning_rate": 0.00022138334145153436, "loss": 0.2109, "step": 6500 }, { "epoch": 3.18, "eval_loss": 0.1257564276456833, "eval_runtime": 687.694, "eval_samples_per_second": 5.296, "eval_steps_per_second": 0.663, "eval_wer": 0.14919691764924334, "step": 6600 }, { "epoch": 3.27, "eval_loss": 0.12906372547149658, "eval_runtime": 687.8093, "eval_samples_per_second": 5.295, "eval_steps_per_second": 0.663, "eval_wer": 0.1488255500881998, "step": 6800 }, { "epoch": 3.37, "learning_rate": 0.0001604968339016074, "loss": 0.2103, "step": 7000 }, { "epoch": 3.37, "eval_loss": 0.12778830528259277, "eval_runtime": 694.3257, "eval_samples_per_second": 5.245, "eval_steps_per_second": 0.657, "eval_wer": 0.14840776158202582, "step": 7000 }, { "epoch": 3.46, "eval_loss": 0.12501177191734314, "eval_runtime": 679.1124, "eval_samples_per_second": 5.363, "eval_steps_per_second": 0.671, "eval_wer": 0.14780428929533004, "step": 7200 }, { "epoch": 3.56, "eval_loss": 0.12769711017608643, "eval_runtime": 683.2755, "eval_samples_per_second": 5.33, "eval_steps_per_second": 0.667, "eval_wer": 0.14822207780150404, "step": 7400 }, { "epoch": 3.61, "learning_rate": 9.961032635168047e-05, "loss": 0.1986, "step": 7500 }, { "epoch": 3.66, "eval_loss": 0.1256353259086609, "eval_runtime": 680.6384, "eval_samples_per_second": 5.351, "eval_steps_per_second": 0.67, "eval_wer": 0.14757218456967783, "step": 7600 }, { "epoch": 3.75, "eval_loss": 0.12579868733882904, "eval_runtime": 683.2757, "eval_samples_per_second": 5.33, "eval_steps_per_second": 0.667, "eval_wer": 0.14682944944759074, "step": 7800 }, { "epoch": 3.85, "learning_rate": 3.884559181685338e-05, "loss": 0.1954, "step": 8000 }, { "epoch": 3.85, "eval_loss": 0.12557055056095123, "eval_runtime": 690.701, "eval_samples_per_second": 5.273, "eval_steps_per_second": 0.66, "eval_wer": 0.14645808188654721, "step": 8000 }, { "epoch": 3.95, "eval_loss": 0.12530682981014252, "eval_runtime": 692.3328, "eval_samples_per_second": 5.26, "eval_steps_per_second": 0.659, "eval_wer": 0.1455760839290688, "step": 8200 }, { "epoch": 4.0, "step": 8312, "total_flos": 1.5580571693960135e+19, "train_loss": 0.7944976037459608, "train_runtime": 46739.0877, "train_samples_per_second": 0.711, "train_steps_per_second": 0.178 } ], "logging_steps": 500, "max_steps": 8312, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 200, "total_flos": 1.5580571693960135e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }