{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 100, "global_step": 3544, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "eval_loss": 0.4486841857433319, "eval_runtime": 759.5602, "eval_samples_per_second": 4.091, "eval_steps_per_second": 0.512, "eval_wer": 0.35654000233363153, "step": 100 }, { "epoch": 0.23, "eval_loss": 0.3544192612171173, "eval_runtime": 754.7374, "eval_samples_per_second": 4.117, "eval_steps_per_second": 0.515, "eval_wer": 0.33168682665007193, "step": 200 }, { "epoch": 0.34, "eval_loss": 0.36927035450935364, "eval_runtime": 750.419, "eval_samples_per_second": 4.14, "eval_steps_per_second": 0.518, "eval_wer": 0.308817237758158, "step": 300 }, { "epoch": 0.45, "eval_loss": 0.3404374122619629, "eval_runtime": 754.9059, "eval_samples_per_second": 4.116, "eval_steps_per_second": 0.515, "eval_wer": 0.3040332931430127, "step": 400 }, { "epoch": 0.56, "learning_rate": 0.0008853077816492451, "loss": 1.5084, "step": 500 }, { "epoch": 0.56, "eval_loss": 0.33462777733802795, "eval_runtime": 756.1347, "eval_samples_per_second": 4.109, "eval_steps_per_second": 0.514, "eval_wer": 0.29952160553848545, "step": 500 }, { "epoch": 0.68, "eval_loss": 0.34105485677719116, "eval_runtime": 756.3042, "eval_samples_per_second": 4.108, "eval_steps_per_second": 0.514, "eval_wer": 0.2935708451635487, "step": 600 }, { "epoch": 0.79, "eval_loss": 0.31745076179504395, "eval_runtime": 748.8702, "eval_samples_per_second": 4.149, "eval_steps_per_second": 0.519, "eval_wer": 0.2887480066897437, "step": 700 }, { "epoch": 0.9, "eval_loss": 0.3159240484237671, "eval_runtime": 765.2091, "eval_samples_per_second": 4.06, "eval_steps_per_second": 0.508, "eval_wer": 0.2898370347322158, "step": 800 }, { "epoch": 1.02, "eval_loss": 0.3138948380947113, "eval_runtime": 752.8617, "eval_samples_per_second": 4.127, "eval_steps_per_second": 0.517, "eval_wer": 0.304538913305589, "step": 900 }, { "epoch": 1.13, "learning_rate": 0.000740418118466899, "loss": 0.3485, "step": 1000 }, { "epoch": 1.13, "eval_loss": 0.3067200779914856, "eval_runtime": 749.2127, "eval_samples_per_second": 4.147, "eval_steps_per_second": 0.519, "eval_wer": 0.2957877951071526, "step": 1000 }, { "epoch": 1.24, "eval_loss": 0.29693612456321716, "eval_runtime": 752.9596, "eval_samples_per_second": 4.126, "eval_steps_per_second": 0.517, "eval_wer": 0.2766909105052312, "step": 1100 }, { "epoch": 1.35, "eval_loss": 0.29161983728408813, "eval_runtime": 757.6219, "eval_samples_per_second": 4.101, "eval_steps_per_second": 0.513, "eval_wer": 0.2714013457275096, "step": 1200 }, { "epoch": 1.47, "eval_loss": 0.2893225848674774, "eval_runtime": 751.196, "eval_samples_per_second": 4.136, "eval_steps_per_second": 0.518, "eval_wer": 0.2663451441017463, "step": 1300 }, { "epoch": 1.58, "eval_loss": 0.3183298408985138, "eval_runtime": 752.5817, "eval_samples_per_second": 4.128, "eval_steps_per_second": 0.517, "eval_wer": 0.29854925907199253, "step": 1400 }, { "epoch": 1.69, "learning_rate": 0.0005952380952380953, "loss": 0.3152, "step": 1500 }, { "epoch": 1.69, "eval_loss": 0.296146422624588, "eval_runtime": 747.739, "eval_samples_per_second": 4.155, "eval_steps_per_second": 0.52, "eval_wer": 0.26875656333864884, "step": 1500 }, { "epoch": 1.81, "eval_loss": 0.2847990095615387, "eval_runtime": 745.157, "eval_samples_per_second": 4.17, "eval_steps_per_second": 0.522, "eval_wer": 0.26653961339504495, "step": 1600 }, { "epoch": 1.92, "eval_loss": 0.28440287709236145, "eval_runtime": 753.6747, "eval_samples_per_second": 4.122, "eval_steps_per_second": 0.516, "eval_wer": 0.2656450546458714, "step": 1700 }, { "epoch": 2.03, "eval_loss": 0.2854562997817993, "eval_runtime": 748.224, "eval_samples_per_second": 4.153, "eval_steps_per_second": 0.52, "eval_wer": 0.2707401501302944, "step": 1800 }, { "epoch": 2.14, "eval_loss": 0.2886996865272522, "eval_runtime": 750.8758, "eval_samples_per_second": 4.138, "eval_steps_per_second": 0.518, "eval_wer": 0.26860098790400994, "step": 1900 }, { "epoch": 2.26, "learning_rate": 0.0004500580720092916, "loss": 0.3058, "step": 2000 }, { "epoch": 2.26, "eval_loss": 0.28578099608421326, "eval_runtime": 748.0813, "eval_samples_per_second": 4.153, "eval_steps_per_second": 0.52, "eval_wer": 0.26568394850453114, "step": 2000 }, { "epoch": 2.37, "eval_loss": 0.2813587188720703, "eval_runtime": 756.7359, "eval_samples_per_second": 4.106, "eval_steps_per_second": 0.514, "eval_wer": 0.2629224845396912, "step": 2100 }, { "epoch": 2.48, "eval_loss": 0.2809281349182129, "eval_runtime": 759.1954, "eval_samples_per_second": 4.092, "eval_steps_per_second": 0.512, "eval_wer": 0.26331142312628836, "step": 2200 }, { "epoch": 2.6, "eval_loss": 0.2778892517089844, "eval_runtime": 756.4055, "eval_samples_per_second": 4.108, "eval_steps_per_second": 0.514, "eval_wer": 0.26128894247598305, "step": 2300 }, { "epoch": 2.71, "eval_loss": 0.2744755446910858, "eval_runtime": 758.3998, "eval_samples_per_second": 4.097, "eval_steps_per_second": 0.513, "eval_wer": 0.25806075220722646, "step": 2400 }, { "epoch": 2.82, "learning_rate": 0.0003051684088269454, "loss": 0.2861, "step": 2500 }, { "epoch": 2.82, "eval_loss": 0.2769048810005188, "eval_runtime": 757.4274, "eval_samples_per_second": 4.102, "eval_steps_per_second": 0.514, "eval_wer": 0.26175566877989964, "step": 2500 }, { "epoch": 2.93, "eval_loss": 0.2742438316345215, "eval_runtime": 761.8109, "eval_samples_per_second": 4.078, "eval_steps_per_second": 0.511, "eval_wer": 0.2575940259033099, "step": 2600 }, { "epoch": 3.05, "eval_loss": 0.27301648259162903, "eval_runtime": 765.5965, "eval_samples_per_second": 4.058, "eval_steps_per_second": 0.508, "eval_wer": 0.2574773443273307, "step": 2700 }, { "epoch": 3.16, "eval_loss": 0.27274471521377563, "eval_runtime": 756.2486, "eval_samples_per_second": 4.108, "eval_steps_per_second": 0.514, "eval_wer": 0.25638831628485864, "step": 2800 }, { "epoch": 3.27, "eval_loss": 0.27257227897644043, "eval_runtime": 762.5719, "eval_samples_per_second": 4.074, "eval_steps_per_second": 0.51, "eval_wer": 0.2562716347088795, "step": 2900 }, { "epoch": 3.39, "learning_rate": 0.0001599883855981417, "loss": 0.2839, "step": 3000 }, { "epoch": 3.39, "eval_loss": 0.2713397443294525, "eval_runtime": 762.4867, "eval_samples_per_second": 4.075, "eval_steps_per_second": 0.51, "eval_wer": 0.25755513204465014, "step": 3000 }, { "epoch": 3.5, "eval_loss": 0.26895028352737427, "eval_runtime": 763.0484, "eval_samples_per_second": 4.072, "eval_steps_per_second": 0.51, "eval_wer": 0.2536657461786784, "step": 3100 }, { "epoch": 3.61, "eval_loss": 0.2706141769886017, "eval_runtime": 754.9996, "eval_samples_per_second": 4.115, "eval_steps_per_second": 0.515, "eval_wer": 0.2539768970479561, "step": 3200 }, { "epoch": 3.72, "eval_loss": 0.26873159408569336, "eval_runtime": 760.416, "eval_samples_per_second": 4.086, "eval_steps_per_second": 0.512, "eval_wer": 0.25421026019991444, "step": 3300 }, { "epoch": 3.84, "eval_loss": 0.26714619994163513, "eval_runtime": 754.0393, "eval_samples_per_second": 4.12, "eval_steps_per_second": 0.516, "eval_wer": 0.2521488856909494, "step": 3400 }, { "epoch": 3.95, "learning_rate": 1.5098722415795587e-05, "loss": 0.2706, "step": 3500 }, { "epoch": 3.95, "eval_loss": 0.267267644405365, "eval_runtime": 758.6405, "eval_samples_per_second": 4.095, "eval_steps_per_second": 0.513, "eval_wer": 0.2522266734082688, "step": 3500 }, { "epoch": 4.0, "step": 3544, "total_flos": 1.1766650360089125e+19, "train_loss": 0.471761938411549, "train_runtime": 38765.3056, "train_samples_per_second": 0.731, "train_steps_per_second": 0.091 } ], "logging_steps": 500, "max_steps": 3544, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 200, "total_flos": 1.1766650360089125e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }