{ "best_metric": 1.0, "best_model_checkpoint": "wav2vec2-base-lang-id/checkpoint-94", "epoch": 10.0, "eval_steps": 500, "global_step": 940, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10638297872340426, "grad_norm": 3.8006672859191895, "learning_rate": 3.1914893617021275e-05, "loss": 4.4798, "step": 10 }, { "epoch": 0.2127659574468085, "grad_norm": 6.771675109863281, "learning_rate": 6.382978723404255e-05, "loss": 3.4754, "step": 20 }, { "epoch": 0.3191489361702128, "grad_norm": 8.21432113647461, "learning_rate": 9.574468085106382e-05, "loss": 2.4467, "step": 30 }, { "epoch": 0.425531914893617, "grad_norm": 7.680928707122803, "learning_rate": 0.0001276595744680851, "loss": 1.4232, "step": 40 }, { "epoch": 0.5319148936170213, "grad_norm": 2.195053815841675, "learning_rate": 0.00015957446808510637, "loss": 0.4151, "step": 50 }, { "epoch": 0.6382978723404256, "grad_norm": 0.13116297125816345, "learning_rate": 0.00019148936170212765, "loss": 0.0361, "step": 60 }, { "epoch": 0.7446808510638298, "grad_norm": 0.014657862484455109, "learning_rate": 0.0002234042553191489, "loss": 0.0017, "step": 70 }, { "epoch": 0.851063829787234, "grad_norm": 0.004712587222456932, "learning_rate": 0.0002553191489361702, "loss": 0.0003, "step": 80 }, { "epoch": 0.9574468085106383, "grad_norm": 0.0027682166546583176, "learning_rate": 0.0002872340425531915, "loss": 0.0001, "step": 90 }, { "epoch": 1.0, "eval_accuracy": 1.0, "eval_loss": 5.837103526573628e-05, "eval_runtime": 11.542, "eval_samples_per_second": 34.829, "eval_steps_per_second": 34.829, "step": 94 }, { "epoch": 1.0638297872340425, "grad_norm": 0.0021660495549440384, "learning_rate": 0.00029787234042553186, "loss": 0.0001, "step": 100 }, { "epoch": 1.1702127659574468, "grad_norm": 0.001946191769093275, "learning_rate": 0.00029432624113475173, "loss": 0.0001, "step": 110 }, { "epoch": 1.2765957446808511, "grad_norm": 0.001837807591073215, "learning_rate": 0.0002907801418439716, "loss": 0.0001, "step": 120 }, { "epoch": 1.3829787234042552, "grad_norm": 0.0017718354938551784, "learning_rate": 0.0002872340425531915, "loss": 0.0001, "step": 130 }, { "epoch": 1.4893617021276595, "grad_norm": 0.0017246523639187217, "learning_rate": 0.00028368794326241134, "loss": 0.0001, "step": 140 }, { "epoch": 1.5957446808510638, "grad_norm": 0.0016802914906293154, "learning_rate": 0.00028014184397163116, "loss": 0.0001, "step": 150 }, { "epoch": 1.702127659574468, "grad_norm": 0.0016365655465051532, "learning_rate": 0.00027659574468085103, "loss": 0.0001, "step": 160 }, { "epoch": 1.8085106382978724, "grad_norm": 0.001596157904714346, "learning_rate": 0.0002730496453900709, "loss": 0.0001, "step": 170 }, { "epoch": 1.9148936170212765, "grad_norm": 0.0015629915287718177, "learning_rate": 0.00026950354609929077, "loss": 0.0001, "step": 180 }, { "epoch": 2.0, "eval_accuracy": 1.0, "eval_loss": 3.635883331298828e-05, "eval_runtime": 10.0577, "eval_samples_per_second": 39.969, "eval_steps_per_second": 39.969, "step": 188 }, { "epoch": 2.021276595744681, "grad_norm": 0.001525467843748629, "learning_rate": 0.0002659574468085106, "loss": 0.0001, "step": 190 }, { "epoch": 2.127659574468085, "grad_norm": 0.0014883955009281635, "learning_rate": 0.00026241134751773046, "loss": 0.0001, "step": 200 }, { "epoch": 2.2340425531914896, "grad_norm": 0.0014570483472198248, "learning_rate": 0.00025886524822695033, "loss": 0.0, "step": 210 }, { "epoch": 2.3404255319148937, "grad_norm": 0.0014193649403750896, "learning_rate": 0.0002553191489361702, "loss": 0.0, "step": 220 }, { "epoch": 2.4468085106382977, "grad_norm": 0.0013909874251112342, "learning_rate": 0.00025177304964539007, "loss": 0.0, "step": 230 }, { "epoch": 2.5531914893617023, "grad_norm": 0.0013578328071162105, "learning_rate": 0.0002482269503546099, "loss": 0.0, "step": 240 }, { "epoch": 2.6595744680851063, "grad_norm": 0.0013242242857813835, "learning_rate": 0.00024468085106382976, "loss": 0.0, "step": 250 }, { "epoch": 2.7659574468085104, "grad_norm": 0.0013041673228144646, "learning_rate": 0.00024113475177304963, "loss": 0.0, "step": 260 }, { "epoch": 2.872340425531915, "grad_norm": 0.0012726597487926483, "learning_rate": 0.0002375886524822695, "loss": 0.0, "step": 270 }, { "epoch": 2.978723404255319, "grad_norm": 0.0012477930868044496, "learning_rate": 0.00023404255319148934, "loss": 0.0, "step": 280 }, { "epoch": 3.0, "eval_accuracy": 1.0, "eval_loss": 2.9017082852078602e-05, "eval_runtime": 9.9982, "eval_samples_per_second": 40.207, "eval_steps_per_second": 40.207, "step": 282 }, { "epoch": 3.0851063829787235, "grad_norm": 0.0012191747082397342, "learning_rate": 0.0002304964539007092, "loss": 0.0, "step": 290 }, { "epoch": 3.1914893617021276, "grad_norm": 0.0011935862712562084, "learning_rate": 0.00022695035460992905, "loss": 0.0, "step": 300 }, { "epoch": 3.297872340425532, "grad_norm": 0.0011742267524823546, "learning_rate": 0.0002234042553191489, "loss": 0.0, "step": 310 }, { "epoch": 3.404255319148936, "grad_norm": 0.0011481853434816003, "learning_rate": 0.00021985815602836877, "loss": 0.0, "step": 320 }, { "epoch": 3.5106382978723403, "grad_norm": 0.0011253234697505832, "learning_rate": 0.00021631205673758864, "loss": 0.0, "step": 330 }, { "epoch": 3.617021276595745, "grad_norm": 0.0011064092395827174, "learning_rate": 0.0002127659574468085, "loss": 0.0, "step": 340 }, { "epoch": 3.723404255319149, "grad_norm": 0.0010885618394240737, "learning_rate": 0.00020921985815602835, "loss": 0.0, "step": 350 }, { "epoch": 3.829787234042553, "grad_norm": 0.001064595184288919, "learning_rate": 0.0002056737588652482, "loss": 0.0, "step": 360 }, { "epoch": 3.9361702127659575, "grad_norm": 0.0010447927052155137, "learning_rate": 0.00020212765957446807, "loss": 0.0, "step": 370 }, { "epoch": 4.0, "eval_accuracy": 1.0, "eval_loss": 2.372264862060547e-05, "eval_runtime": 9.9011, "eval_samples_per_second": 40.602, "eval_steps_per_second": 40.602, "step": 376 }, { "epoch": 4.042553191489362, "grad_norm": 0.001030342886224389, "learning_rate": 0.0001985815602836879, "loss": 0.0, "step": 380 }, { "epoch": 4.148936170212766, "grad_norm": 0.0010120035149157047, "learning_rate": 0.0001950354609929078, "loss": 0.0, "step": 390 }, { "epoch": 4.25531914893617, "grad_norm": 0.0009937717113643885, "learning_rate": 0.00019148936170212765, "loss": 0.0, "step": 400 }, { "epoch": 4.361702127659575, "grad_norm": 0.0009758667438291013, "learning_rate": 0.0001879432624113475, "loss": 0.0, "step": 410 }, { "epoch": 4.468085106382979, "grad_norm": 0.0009558630990795791, "learning_rate": 0.00018439716312056736, "loss": 0.0, "step": 420 }, { "epoch": 4.574468085106383, "grad_norm": 0.0009456143015995622, "learning_rate": 0.0001808510638297872, "loss": 0.0, "step": 430 }, { "epoch": 4.680851063829787, "grad_norm": 0.0009262987296096981, "learning_rate": 0.00017730496453900708, "loss": 0.0, "step": 440 }, { "epoch": 4.787234042553192, "grad_norm": 0.0009140170877799392, "learning_rate": 0.00017375886524822692, "loss": 0.0, "step": 450 }, { "epoch": 4.8936170212765955, "grad_norm": 0.000904095999430865, "learning_rate": 0.00017021276595744682, "loss": 0.0, "step": 460 }, { "epoch": 5.0, "grad_norm": 0.0008834420586936176, "learning_rate": 0.00016666666666666666, "loss": 0.0, "step": 470 }, { "epoch": 5.0, "eval_accuracy": 1.0, "eval_loss": 2.014636993408203e-05, "eval_runtime": 9.9558, "eval_samples_per_second": 40.379, "eval_steps_per_second": 40.379, "step": 470 }, { "epoch": 5.1063829787234045, "grad_norm": 0.0008752320427447557, "learning_rate": 0.0001631205673758865, "loss": 0.0, "step": 480 }, { "epoch": 5.212765957446808, "grad_norm": 0.0008663799380883574, "learning_rate": 0.00015957446808510637, "loss": 0.0, "step": 490 }, { "epoch": 5.319148936170213, "grad_norm": 0.0008535313536413014, "learning_rate": 0.00015602836879432622, "loss": 0.0, "step": 500 }, { "epoch": 5.425531914893617, "grad_norm": 0.0008452454931102693, "learning_rate": 0.00015248226950354606, "loss": 0.0, "step": 510 }, { "epoch": 5.531914893617021, "grad_norm": 0.0008268958772532642, "learning_rate": 0.00014893617021276593, "loss": 0.0, "step": 520 }, { "epoch": 5.638297872340425, "grad_norm": 0.0008181555895134807, "learning_rate": 0.0001453900709219858, "loss": 0.0, "step": 530 }, { "epoch": 5.74468085106383, "grad_norm": 0.0008063354762271047, "learning_rate": 0.00014184397163120567, "loss": 0.0, "step": 540 }, { "epoch": 5.851063829787234, "grad_norm": 0.0007958101341500878, "learning_rate": 0.00013829787234042552, "loss": 0.0, "step": 550 }, { "epoch": 5.957446808510638, "grad_norm": 0.0007865344523452222, "learning_rate": 0.00013475177304964539, "loss": 0.0, "step": 560 }, { "epoch": 6.0, "eval_accuracy": 1.0, "eval_loss": 1.7642974853515625e-05, "eval_runtime": 9.9723, "eval_samples_per_second": 40.312, "eval_steps_per_second": 40.312, "step": 564 }, { "epoch": 6.0638297872340425, "grad_norm": 0.0007770307711325586, "learning_rate": 0.00013120567375886523, "loss": 0.0, "step": 570 }, { "epoch": 6.170212765957447, "grad_norm": 0.0007697618566453457, "learning_rate": 0.0001276595744680851, "loss": 0.0, "step": 580 }, { "epoch": 6.276595744680851, "grad_norm": 0.0007645227597095072, "learning_rate": 0.00012411347517730494, "loss": 0.0, "step": 590 }, { "epoch": 6.382978723404255, "grad_norm": 0.0007574139162898064, "learning_rate": 0.00012056737588652481, "loss": 0.0, "step": 600 }, { "epoch": 6.48936170212766, "grad_norm": 0.000741077761631459, "learning_rate": 0.00011702127659574467, "loss": 0.0, "step": 610 }, { "epoch": 6.595744680851064, "grad_norm": 0.000734071247279644, "learning_rate": 0.00011347517730496453, "loss": 0.0, "step": 620 }, { "epoch": 6.702127659574468, "grad_norm": 0.000728779355995357, "learning_rate": 0.00010992907801418438, "loss": 0.0, "step": 630 }, { "epoch": 6.808510638297872, "grad_norm": 0.0007212815107777715, "learning_rate": 0.00010638297872340425, "loss": 0.0, "step": 640 }, { "epoch": 6.914893617021277, "grad_norm": 0.0007169453892856836, "learning_rate": 0.0001028368794326241, "loss": 0.0, "step": 650 }, { "epoch": 7.0, "eval_accuracy": 1.0, "eval_loss": 1.596455513208639e-05, "eval_runtime": 9.9126, "eval_samples_per_second": 40.554, "eval_steps_per_second": 40.554, "step": 658 }, { "epoch": 7.0212765957446805, "grad_norm": 0.000711097614839673, "learning_rate": 9.929078014184395e-05, "loss": 0.0, "step": 660 }, { "epoch": 7.127659574468085, "grad_norm": 0.0007094301981851459, "learning_rate": 9.574468085106382e-05, "loss": 0.0, "step": 670 }, { "epoch": 7.23404255319149, "grad_norm": 0.0006968958768993616, "learning_rate": 9.219858156028368e-05, "loss": 0.0, "step": 680 }, { "epoch": 7.340425531914893, "grad_norm": 0.0006909930380061269, "learning_rate": 8.865248226950354e-05, "loss": 0.0, "step": 690 }, { "epoch": 7.446808510638298, "grad_norm": 0.0006865290924906731, "learning_rate": 8.510638297872341e-05, "loss": 0.0, "step": 700 }, { "epoch": 7.553191489361702, "grad_norm": 0.0006844609742984176, "learning_rate": 8.156028368794325e-05, "loss": 0.0, "step": 710 }, { "epoch": 7.659574468085106, "grad_norm": 0.0006792008061893284, "learning_rate": 7.801418439716311e-05, "loss": 0.0, "step": 720 }, { "epoch": 7.76595744680851, "grad_norm": 0.0006731408648192883, "learning_rate": 7.446808510638297e-05, "loss": 0.0, "step": 730 }, { "epoch": 7.872340425531915, "grad_norm": 0.0006701324600726366, "learning_rate": 7.092198581560284e-05, "loss": 0.0, "step": 740 }, { "epoch": 7.9787234042553195, "grad_norm": 0.0006633326993323863, "learning_rate": 6.737588652482269e-05, "loss": 0.0, "step": 750 }, { "epoch": 8.0, "eval_accuracy": 1.0, "eval_loss": 1.4901161193847656e-05, "eval_runtime": 9.9494, "eval_samples_per_second": 40.405, "eval_steps_per_second": 40.405, "step": 752 }, { "epoch": 8.085106382978724, "grad_norm": 0.0006632324075326324, "learning_rate": 6.382978723404255e-05, "loss": 0.0, "step": 760 }, { "epoch": 8.191489361702128, "grad_norm": 0.000655403477139771, "learning_rate": 6.028368794326241e-05, "loss": 0.0, "step": 770 }, { "epoch": 8.297872340425531, "grad_norm": 0.0006541645270772278, "learning_rate": 5.6737588652482264e-05, "loss": 0.0, "step": 780 }, { "epoch": 8.404255319148936, "grad_norm": 0.0006483749020844698, "learning_rate": 5.319148936170213e-05, "loss": 0.0, "step": 790 }, { "epoch": 8.51063829787234, "grad_norm": 0.0006471078377217054, "learning_rate": 4.964539007092198e-05, "loss": 0.0, "step": 800 }, { "epoch": 8.617021276595745, "grad_norm": 0.0006466888007707894, "learning_rate": 4.609929078014184e-05, "loss": 0.0, "step": 810 }, { "epoch": 8.72340425531915, "grad_norm": 0.0006407785695046186, "learning_rate": 4.2553191489361704e-05, "loss": 0.0, "step": 820 }, { "epoch": 8.829787234042554, "grad_norm": 0.0006384547450579703, "learning_rate": 3.9007092198581555e-05, "loss": 0.0, "step": 830 }, { "epoch": 8.936170212765958, "grad_norm": 0.0006379844271577895, "learning_rate": 3.546099290780142e-05, "loss": 0.0, "step": 840 }, { "epoch": 9.0, "eval_accuracy": 1.0, "eval_loss": 1.4424324035644531e-05, "eval_runtime": 9.9315, "eval_samples_per_second": 40.477, "eval_steps_per_second": 40.477, "step": 846 }, { "epoch": 9.042553191489361, "grad_norm": 0.0006334384088404477, "learning_rate": 3.1914893617021275e-05, "loss": 0.0, "step": 850 }, { "epoch": 9.148936170212766, "grad_norm": 0.0006309397285804152, "learning_rate": 2.8368794326241132e-05, "loss": 0.0, "step": 860 }, { "epoch": 9.25531914893617, "grad_norm": 0.0006310406024567783, "learning_rate": 2.482269503546099e-05, "loss": 0.0, "step": 870 }, { "epoch": 9.361702127659575, "grad_norm": 0.0006308447918854654, "learning_rate": 2.1276595744680852e-05, "loss": 0.0, "step": 880 }, { "epoch": 9.46808510638298, "grad_norm": 0.0006286040297709405, "learning_rate": 1.773049645390071e-05, "loss": 0.0, "step": 890 }, { "epoch": 9.574468085106384, "grad_norm": 0.0006258686189539731, "learning_rate": 1.4184397163120566e-05, "loss": 0.0, "step": 900 }, { "epoch": 9.680851063829786, "grad_norm": 0.0006251951563172042, "learning_rate": 1.0638297872340426e-05, "loss": 0.0, "step": 910 }, { "epoch": 9.787234042553191, "grad_norm": 0.0006240535294637084, "learning_rate": 7.092198581560283e-06, "loss": 0.0, "step": 920 }, { "epoch": 9.893617021276595, "grad_norm": 0.0006240674993023276, "learning_rate": 3.5460992907801415e-06, "loss": 0.0, "step": 930 }, { "epoch": 10.0, "grad_norm": 0.0006238200003281236, "learning_rate": 0.0, "loss": 0.0, "step": 940 }, { "epoch": 10.0, "eval_accuracy": 1.0, "eval_loss": 1.4065806681173854e-05, "eval_runtime": 9.9903, "eval_samples_per_second": 40.239, "eval_steps_per_second": 40.239, "step": 940 }, { "epoch": 10.0, "step": 940, "total_flos": 4.315241031363276e+18, "train_loss": 0.13065080859353445, "train_runtime": 1086.1024, "train_samples_per_second": 27.677, "train_steps_per_second": 0.865 } ], "logging_steps": 10, "max_steps": 940, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.315241031363276e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }