|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 100, |
|
"global_step": 3544, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 0.4486841857433319, |
|
"eval_runtime": 759.5602, |
|
"eval_samples_per_second": 4.091, |
|
"eval_steps_per_second": 0.512, |
|
"eval_wer": 0.35654000233363153, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 0.3544192612171173, |
|
"eval_runtime": 754.7374, |
|
"eval_samples_per_second": 4.117, |
|
"eval_steps_per_second": 0.515, |
|
"eval_wer": 0.33168682665007193, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 0.36927035450935364, |
|
"eval_runtime": 750.419, |
|
"eval_samples_per_second": 4.14, |
|
"eval_steps_per_second": 0.518, |
|
"eval_wer": 0.308817237758158, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 0.3404374122619629, |
|
"eval_runtime": 754.9059, |
|
"eval_samples_per_second": 4.116, |
|
"eval_steps_per_second": 0.515, |
|
"eval_wer": 0.3040332931430127, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0008853077816492451, |
|
"loss": 1.5084, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 0.33462777733802795, |
|
"eval_runtime": 756.1347, |
|
"eval_samples_per_second": 4.109, |
|
"eval_steps_per_second": 0.514, |
|
"eval_wer": 0.29952160553848545, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 0.34105485677719116, |
|
"eval_runtime": 756.3042, |
|
"eval_samples_per_second": 4.108, |
|
"eval_steps_per_second": 0.514, |
|
"eval_wer": 0.2935708451635487, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.31745076179504395, |
|
"eval_runtime": 748.8702, |
|
"eval_samples_per_second": 4.149, |
|
"eval_steps_per_second": 0.519, |
|
"eval_wer": 0.2887480066897437, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 0.3159240484237671, |
|
"eval_runtime": 765.2091, |
|
"eval_samples_per_second": 4.06, |
|
"eval_steps_per_second": 0.508, |
|
"eval_wer": 0.2898370347322158, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 0.3138948380947113, |
|
"eval_runtime": 752.8617, |
|
"eval_samples_per_second": 4.127, |
|
"eval_steps_per_second": 0.517, |
|
"eval_wer": 0.304538913305589, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.000740418118466899, |
|
"loss": 0.3485, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 0.3067200779914856, |
|
"eval_runtime": 749.2127, |
|
"eval_samples_per_second": 4.147, |
|
"eval_steps_per_second": 0.519, |
|
"eval_wer": 0.2957877951071526, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 0.29693612456321716, |
|
"eval_runtime": 752.9596, |
|
"eval_samples_per_second": 4.126, |
|
"eval_steps_per_second": 0.517, |
|
"eval_wer": 0.2766909105052312, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 0.29161983728408813, |
|
"eval_runtime": 757.6219, |
|
"eval_samples_per_second": 4.101, |
|
"eval_steps_per_second": 0.513, |
|
"eval_wer": 0.2714013457275096, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 0.2893225848674774, |
|
"eval_runtime": 751.196, |
|
"eval_samples_per_second": 4.136, |
|
"eval_steps_per_second": 0.518, |
|
"eval_wer": 0.2663451441017463, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 0.3183298408985138, |
|
"eval_runtime": 752.5817, |
|
"eval_samples_per_second": 4.128, |
|
"eval_steps_per_second": 0.517, |
|
"eval_wer": 0.29854925907199253, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0005952380952380953, |
|
"loss": 0.3152, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_loss": 0.296146422624588, |
|
"eval_runtime": 747.739, |
|
"eval_samples_per_second": 4.155, |
|
"eval_steps_per_second": 0.52, |
|
"eval_wer": 0.26875656333864884, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_loss": 0.2847990095615387, |
|
"eval_runtime": 745.157, |
|
"eval_samples_per_second": 4.17, |
|
"eval_steps_per_second": 0.522, |
|
"eval_wer": 0.26653961339504495, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 0.28440287709236145, |
|
"eval_runtime": 753.6747, |
|
"eval_samples_per_second": 4.122, |
|
"eval_steps_per_second": 0.516, |
|
"eval_wer": 0.2656450546458714, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 0.2854562997817993, |
|
"eval_runtime": 748.224, |
|
"eval_samples_per_second": 4.153, |
|
"eval_steps_per_second": 0.52, |
|
"eval_wer": 0.2707401501302944, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 0.2886996865272522, |
|
"eval_runtime": 750.8758, |
|
"eval_samples_per_second": 4.138, |
|
"eval_steps_per_second": 0.518, |
|
"eval_wer": 0.26860098790400994, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0004500580720092916, |
|
"loss": 0.3058, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 0.28578099608421326, |
|
"eval_runtime": 748.0813, |
|
"eval_samples_per_second": 4.153, |
|
"eval_steps_per_second": 0.52, |
|
"eval_wer": 0.26568394850453114, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_loss": 0.2813587188720703, |
|
"eval_runtime": 756.7359, |
|
"eval_samples_per_second": 4.106, |
|
"eval_steps_per_second": 0.514, |
|
"eval_wer": 0.2629224845396912, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 0.2809281349182129, |
|
"eval_runtime": 759.1954, |
|
"eval_samples_per_second": 4.092, |
|
"eval_steps_per_second": 0.512, |
|
"eval_wer": 0.26331142312628836, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 0.2778892517089844, |
|
"eval_runtime": 756.4055, |
|
"eval_samples_per_second": 4.108, |
|
"eval_steps_per_second": 0.514, |
|
"eval_wer": 0.26128894247598305, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 0.2744755446910858, |
|
"eval_runtime": 758.3998, |
|
"eval_samples_per_second": 4.097, |
|
"eval_steps_per_second": 0.513, |
|
"eval_wer": 0.25806075220722646, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.0003051684088269454, |
|
"loss": 0.2861, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_loss": 0.2769048810005188, |
|
"eval_runtime": 757.4274, |
|
"eval_samples_per_second": 4.102, |
|
"eval_steps_per_second": 0.514, |
|
"eval_wer": 0.26175566877989964, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 0.2742438316345215, |
|
"eval_runtime": 761.8109, |
|
"eval_samples_per_second": 4.078, |
|
"eval_steps_per_second": 0.511, |
|
"eval_wer": 0.2575940259033099, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_loss": 0.27301648259162903, |
|
"eval_runtime": 765.5965, |
|
"eval_samples_per_second": 4.058, |
|
"eval_steps_per_second": 0.508, |
|
"eval_wer": 0.2574773443273307, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"eval_loss": 0.27274471521377563, |
|
"eval_runtime": 756.2486, |
|
"eval_samples_per_second": 4.108, |
|
"eval_steps_per_second": 0.514, |
|
"eval_wer": 0.25638831628485864, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_loss": 0.27257227897644043, |
|
"eval_runtime": 762.5719, |
|
"eval_samples_per_second": 4.074, |
|
"eval_steps_per_second": 0.51, |
|
"eval_wer": 0.2562716347088795, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.0001599883855981417, |
|
"loss": 0.2839, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_loss": 0.2713397443294525, |
|
"eval_runtime": 762.4867, |
|
"eval_samples_per_second": 4.075, |
|
"eval_steps_per_second": 0.51, |
|
"eval_wer": 0.25755513204465014, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 0.26895028352737427, |
|
"eval_runtime": 763.0484, |
|
"eval_samples_per_second": 4.072, |
|
"eval_steps_per_second": 0.51, |
|
"eval_wer": 0.2536657461786784, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_loss": 0.2706141769886017, |
|
"eval_runtime": 754.9996, |
|
"eval_samples_per_second": 4.115, |
|
"eval_steps_per_second": 0.515, |
|
"eval_wer": 0.2539768970479561, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_loss": 0.26873159408569336, |
|
"eval_runtime": 760.416, |
|
"eval_samples_per_second": 4.086, |
|
"eval_steps_per_second": 0.512, |
|
"eval_wer": 0.25421026019991444, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 0.26714619994163513, |
|
"eval_runtime": 754.0393, |
|
"eval_samples_per_second": 4.12, |
|
"eval_steps_per_second": 0.516, |
|
"eval_wer": 0.2521488856909494, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 1.5098722415795587e-05, |
|
"loss": 0.2706, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_loss": 0.267267644405365, |
|
"eval_runtime": 758.6405, |
|
"eval_samples_per_second": 4.095, |
|
"eval_steps_per_second": 0.513, |
|
"eval_wer": 0.2522266734082688, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 3544, |
|
"total_flos": 1.1766650360089125e+19, |
|
"train_loss": 0.471761938411549, |
|
"train_runtime": 38765.3056, |
|
"train_samples_per_second": 0.731, |
|
"train_steps_per_second": 0.091 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3544, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 200, |
|
"total_flos": 1.1766650360089125e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|