|
{ |
|
"best_metric": 1.0, |
|
"best_model_checkpoint": "wav2vec2-base-lang-id/checkpoint-94", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 940, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.10638297872340426, |
|
"grad_norm": 3.8006672859191895, |
|
"learning_rate": 3.1914893617021275e-05, |
|
"loss": 4.4798, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2127659574468085, |
|
"grad_norm": 6.771675109863281, |
|
"learning_rate": 6.382978723404255e-05, |
|
"loss": 3.4754, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3191489361702128, |
|
"grad_norm": 8.21432113647461, |
|
"learning_rate": 9.574468085106382e-05, |
|
"loss": 2.4467, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.425531914893617, |
|
"grad_norm": 7.680928707122803, |
|
"learning_rate": 0.0001276595744680851, |
|
"loss": 1.4232, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5319148936170213, |
|
"grad_norm": 2.195053815841675, |
|
"learning_rate": 0.00015957446808510637, |
|
"loss": 0.4151, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6382978723404256, |
|
"grad_norm": 0.13116297125816345, |
|
"learning_rate": 0.00019148936170212765, |
|
"loss": 0.0361, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7446808510638298, |
|
"grad_norm": 0.014657862484455109, |
|
"learning_rate": 0.0002234042553191489, |
|
"loss": 0.0017, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.851063829787234, |
|
"grad_norm": 0.004712587222456932, |
|
"learning_rate": 0.0002553191489361702, |
|
"loss": 0.0003, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9574468085106383, |
|
"grad_norm": 0.0027682166546583176, |
|
"learning_rate": 0.0002872340425531915, |
|
"loss": 0.0001, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 5.837103526573628e-05, |
|
"eval_runtime": 11.542, |
|
"eval_samples_per_second": 34.829, |
|
"eval_steps_per_second": 34.829, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.0638297872340425, |
|
"grad_norm": 0.0021660495549440384, |
|
"learning_rate": 0.00029787234042553186, |
|
"loss": 0.0001, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1702127659574468, |
|
"grad_norm": 0.001946191769093275, |
|
"learning_rate": 0.00029432624113475173, |
|
"loss": 0.0001, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.2765957446808511, |
|
"grad_norm": 0.001837807591073215, |
|
"learning_rate": 0.0002907801418439716, |
|
"loss": 0.0001, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3829787234042552, |
|
"grad_norm": 0.0017718354938551784, |
|
"learning_rate": 0.0002872340425531915, |
|
"loss": 0.0001, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.4893617021276595, |
|
"grad_norm": 0.0017246523639187217, |
|
"learning_rate": 0.00028368794326241134, |
|
"loss": 0.0001, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5957446808510638, |
|
"grad_norm": 0.0016802914906293154, |
|
"learning_rate": 0.00028014184397163116, |
|
"loss": 0.0001, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.702127659574468, |
|
"grad_norm": 0.0016365655465051532, |
|
"learning_rate": 0.00027659574468085103, |
|
"loss": 0.0001, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.8085106382978724, |
|
"grad_norm": 0.001596157904714346, |
|
"learning_rate": 0.0002730496453900709, |
|
"loss": 0.0001, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.9148936170212765, |
|
"grad_norm": 0.0015629915287718177, |
|
"learning_rate": 0.00026950354609929077, |
|
"loss": 0.0001, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 3.635883331298828e-05, |
|
"eval_runtime": 10.0577, |
|
"eval_samples_per_second": 39.969, |
|
"eval_steps_per_second": 39.969, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 2.021276595744681, |
|
"grad_norm": 0.001525467843748629, |
|
"learning_rate": 0.0002659574468085106, |
|
"loss": 0.0001, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.127659574468085, |
|
"grad_norm": 0.0014883955009281635, |
|
"learning_rate": 0.00026241134751773046, |
|
"loss": 0.0001, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.2340425531914896, |
|
"grad_norm": 0.0014570483472198248, |
|
"learning_rate": 0.00025886524822695033, |
|
"loss": 0.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.3404255319148937, |
|
"grad_norm": 0.0014193649403750896, |
|
"learning_rate": 0.0002553191489361702, |
|
"loss": 0.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.4468085106382977, |
|
"grad_norm": 0.0013909874251112342, |
|
"learning_rate": 0.00025177304964539007, |
|
"loss": 0.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.5531914893617023, |
|
"grad_norm": 0.0013578328071162105, |
|
"learning_rate": 0.0002482269503546099, |
|
"loss": 0.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.6595744680851063, |
|
"grad_norm": 0.0013242242857813835, |
|
"learning_rate": 0.00024468085106382976, |
|
"loss": 0.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.7659574468085104, |
|
"grad_norm": 0.0013041673228144646, |
|
"learning_rate": 0.00024113475177304963, |
|
"loss": 0.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.872340425531915, |
|
"grad_norm": 0.0012726597487926483, |
|
"learning_rate": 0.0002375886524822695, |
|
"loss": 0.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.978723404255319, |
|
"grad_norm": 0.0012477930868044496, |
|
"learning_rate": 0.00023404255319148934, |
|
"loss": 0.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 2.9017082852078602e-05, |
|
"eval_runtime": 9.9982, |
|
"eval_samples_per_second": 40.207, |
|
"eval_steps_per_second": 40.207, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 3.0851063829787235, |
|
"grad_norm": 0.0012191747082397342, |
|
"learning_rate": 0.0002304964539007092, |
|
"loss": 0.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.1914893617021276, |
|
"grad_norm": 0.0011935862712562084, |
|
"learning_rate": 0.00022695035460992905, |
|
"loss": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.297872340425532, |
|
"grad_norm": 0.0011742267524823546, |
|
"learning_rate": 0.0002234042553191489, |
|
"loss": 0.0, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.404255319148936, |
|
"grad_norm": 0.0011481853434816003, |
|
"learning_rate": 0.00021985815602836877, |
|
"loss": 0.0, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.5106382978723403, |
|
"grad_norm": 0.0011253234697505832, |
|
"learning_rate": 0.00021631205673758864, |
|
"loss": 0.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.617021276595745, |
|
"grad_norm": 0.0011064092395827174, |
|
"learning_rate": 0.0002127659574468085, |
|
"loss": 0.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.723404255319149, |
|
"grad_norm": 0.0010885618394240737, |
|
"learning_rate": 0.00020921985815602835, |
|
"loss": 0.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.829787234042553, |
|
"grad_norm": 0.001064595184288919, |
|
"learning_rate": 0.0002056737588652482, |
|
"loss": 0.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.9361702127659575, |
|
"grad_norm": 0.0010447927052155137, |
|
"learning_rate": 0.00020212765957446807, |
|
"loss": 0.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 2.372264862060547e-05, |
|
"eval_runtime": 9.9011, |
|
"eval_samples_per_second": 40.602, |
|
"eval_steps_per_second": 40.602, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 4.042553191489362, |
|
"grad_norm": 0.001030342886224389, |
|
"learning_rate": 0.0001985815602836879, |
|
"loss": 0.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.148936170212766, |
|
"grad_norm": 0.0010120035149157047, |
|
"learning_rate": 0.0001950354609929078, |
|
"loss": 0.0, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.25531914893617, |
|
"grad_norm": 0.0009937717113643885, |
|
"learning_rate": 0.00019148936170212765, |
|
"loss": 0.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.361702127659575, |
|
"grad_norm": 0.0009758667438291013, |
|
"learning_rate": 0.0001879432624113475, |
|
"loss": 0.0, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.468085106382979, |
|
"grad_norm": 0.0009558630990795791, |
|
"learning_rate": 0.00018439716312056736, |
|
"loss": 0.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.574468085106383, |
|
"grad_norm": 0.0009456143015995622, |
|
"learning_rate": 0.0001808510638297872, |
|
"loss": 0.0, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.680851063829787, |
|
"grad_norm": 0.0009262987296096981, |
|
"learning_rate": 0.00017730496453900708, |
|
"loss": 0.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.787234042553192, |
|
"grad_norm": 0.0009140170877799392, |
|
"learning_rate": 0.00017375886524822692, |
|
"loss": 0.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.8936170212765955, |
|
"grad_norm": 0.000904095999430865, |
|
"learning_rate": 0.00017021276595744682, |
|
"loss": 0.0, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.0008834420586936176, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 0.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 2.014636993408203e-05, |
|
"eval_runtime": 9.9558, |
|
"eval_samples_per_second": 40.379, |
|
"eval_steps_per_second": 40.379, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 5.1063829787234045, |
|
"grad_norm": 0.0008752320427447557, |
|
"learning_rate": 0.0001631205673758865, |
|
"loss": 0.0, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.212765957446808, |
|
"grad_norm": 0.0008663799380883574, |
|
"learning_rate": 0.00015957446808510637, |
|
"loss": 0.0, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 5.319148936170213, |
|
"grad_norm": 0.0008535313536413014, |
|
"learning_rate": 0.00015602836879432622, |
|
"loss": 0.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.425531914893617, |
|
"grad_norm": 0.0008452454931102693, |
|
"learning_rate": 0.00015248226950354606, |
|
"loss": 0.0, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.531914893617021, |
|
"grad_norm": 0.0008268958772532642, |
|
"learning_rate": 0.00014893617021276593, |
|
"loss": 0.0, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.638297872340425, |
|
"grad_norm": 0.0008181555895134807, |
|
"learning_rate": 0.0001453900709219858, |
|
"loss": 0.0, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.74468085106383, |
|
"grad_norm": 0.0008063354762271047, |
|
"learning_rate": 0.00014184397163120567, |
|
"loss": 0.0, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.851063829787234, |
|
"grad_norm": 0.0007958101341500878, |
|
"learning_rate": 0.00013829787234042552, |
|
"loss": 0.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.957446808510638, |
|
"grad_norm": 0.0007865344523452222, |
|
"learning_rate": 0.00013475177304964539, |
|
"loss": 0.0, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 1.7642974853515625e-05, |
|
"eval_runtime": 9.9723, |
|
"eval_samples_per_second": 40.312, |
|
"eval_steps_per_second": 40.312, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 6.0638297872340425, |
|
"grad_norm": 0.0007770307711325586, |
|
"learning_rate": 0.00013120567375886523, |
|
"loss": 0.0, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 6.170212765957447, |
|
"grad_norm": 0.0007697618566453457, |
|
"learning_rate": 0.0001276595744680851, |
|
"loss": 0.0, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 6.276595744680851, |
|
"grad_norm": 0.0007645227597095072, |
|
"learning_rate": 0.00012411347517730494, |
|
"loss": 0.0, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 6.382978723404255, |
|
"grad_norm": 0.0007574139162898064, |
|
"learning_rate": 0.00012056737588652481, |
|
"loss": 0.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.48936170212766, |
|
"grad_norm": 0.000741077761631459, |
|
"learning_rate": 0.00011702127659574467, |
|
"loss": 0.0, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.595744680851064, |
|
"grad_norm": 0.000734071247279644, |
|
"learning_rate": 0.00011347517730496453, |
|
"loss": 0.0, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.702127659574468, |
|
"grad_norm": 0.000728779355995357, |
|
"learning_rate": 0.00010992907801418438, |
|
"loss": 0.0, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 6.808510638297872, |
|
"grad_norm": 0.0007212815107777715, |
|
"learning_rate": 0.00010638297872340425, |
|
"loss": 0.0, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 6.914893617021277, |
|
"grad_norm": 0.0007169453892856836, |
|
"learning_rate": 0.0001028368794326241, |
|
"loss": 0.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 1.596455513208639e-05, |
|
"eval_runtime": 9.9126, |
|
"eval_samples_per_second": 40.554, |
|
"eval_steps_per_second": 40.554, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 7.0212765957446805, |
|
"grad_norm": 0.000711097614839673, |
|
"learning_rate": 9.929078014184395e-05, |
|
"loss": 0.0, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 7.127659574468085, |
|
"grad_norm": 0.0007094301981851459, |
|
"learning_rate": 9.574468085106382e-05, |
|
"loss": 0.0, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 7.23404255319149, |
|
"grad_norm": 0.0006968958768993616, |
|
"learning_rate": 9.219858156028368e-05, |
|
"loss": 0.0, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 7.340425531914893, |
|
"grad_norm": 0.0006909930380061269, |
|
"learning_rate": 8.865248226950354e-05, |
|
"loss": 0.0, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.446808510638298, |
|
"grad_norm": 0.0006865290924906731, |
|
"learning_rate": 8.510638297872341e-05, |
|
"loss": 0.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.553191489361702, |
|
"grad_norm": 0.0006844609742984176, |
|
"learning_rate": 8.156028368794325e-05, |
|
"loss": 0.0, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 7.659574468085106, |
|
"grad_norm": 0.0006792008061893284, |
|
"learning_rate": 7.801418439716311e-05, |
|
"loss": 0.0, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.76595744680851, |
|
"grad_norm": 0.0006731408648192883, |
|
"learning_rate": 7.446808510638297e-05, |
|
"loss": 0.0, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 7.872340425531915, |
|
"grad_norm": 0.0006701324600726366, |
|
"learning_rate": 7.092198581560284e-05, |
|
"loss": 0.0, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 7.9787234042553195, |
|
"grad_norm": 0.0006633326993323863, |
|
"learning_rate": 6.737588652482269e-05, |
|
"loss": 0.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 1.4901161193847656e-05, |
|
"eval_runtime": 9.9494, |
|
"eval_samples_per_second": 40.405, |
|
"eval_steps_per_second": 40.405, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 8.085106382978724, |
|
"grad_norm": 0.0006632324075326324, |
|
"learning_rate": 6.382978723404255e-05, |
|
"loss": 0.0, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 8.191489361702128, |
|
"grad_norm": 0.000655403477139771, |
|
"learning_rate": 6.028368794326241e-05, |
|
"loss": 0.0, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 8.297872340425531, |
|
"grad_norm": 0.0006541645270772278, |
|
"learning_rate": 5.6737588652482264e-05, |
|
"loss": 0.0, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 8.404255319148936, |
|
"grad_norm": 0.0006483749020844698, |
|
"learning_rate": 5.319148936170213e-05, |
|
"loss": 0.0, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 8.51063829787234, |
|
"grad_norm": 0.0006471078377217054, |
|
"learning_rate": 4.964539007092198e-05, |
|
"loss": 0.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.617021276595745, |
|
"grad_norm": 0.0006466888007707894, |
|
"learning_rate": 4.609929078014184e-05, |
|
"loss": 0.0, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 8.72340425531915, |
|
"grad_norm": 0.0006407785695046186, |
|
"learning_rate": 4.2553191489361704e-05, |
|
"loss": 0.0, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 8.829787234042554, |
|
"grad_norm": 0.0006384547450579703, |
|
"learning_rate": 3.9007092198581555e-05, |
|
"loss": 0.0, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 8.936170212765958, |
|
"grad_norm": 0.0006379844271577895, |
|
"learning_rate": 3.546099290780142e-05, |
|
"loss": 0.0, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 1.4424324035644531e-05, |
|
"eval_runtime": 9.9315, |
|
"eval_samples_per_second": 40.477, |
|
"eval_steps_per_second": 40.477, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 9.042553191489361, |
|
"grad_norm": 0.0006334384088404477, |
|
"learning_rate": 3.1914893617021275e-05, |
|
"loss": 0.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 9.148936170212766, |
|
"grad_norm": 0.0006309397285804152, |
|
"learning_rate": 2.8368794326241132e-05, |
|
"loss": 0.0, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 9.25531914893617, |
|
"grad_norm": 0.0006310406024567783, |
|
"learning_rate": 2.482269503546099e-05, |
|
"loss": 0.0, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 9.361702127659575, |
|
"grad_norm": 0.0006308447918854654, |
|
"learning_rate": 2.1276595744680852e-05, |
|
"loss": 0.0, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 9.46808510638298, |
|
"grad_norm": 0.0006286040297709405, |
|
"learning_rate": 1.773049645390071e-05, |
|
"loss": 0.0, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 9.574468085106384, |
|
"grad_norm": 0.0006258686189539731, |
|
"learning_rate": 1.4184397163120566e-05, |
|
"loss": 0.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.680851063829786, |
|
"grad_norm": 0.0006251951563172042, |
|
"learning_rate": 1.0638297872340426e-05, |
|
"loss": 0.0, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 9.787234042553191, |
|
"grad_norm": 0.0006240535294637084, |
|
"learning_rate": 7.092198581560283e-06, |
|
"loss": 0.0, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 9.893617021276595, |
|
"grad_norm": 0.0006240674993023276, |
|
"learning_rate": 3.5460992907801415e-06, |
|
"loss": 0.0, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.0006238200003281236, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 1.0, |
|
"eval_loss": 1.4065806681173854e-05, |
|
"eval_runtime": 9.9903, |
|
"eval_samples_per_second": 40.239, |
|
"eval_steps_per_second": 40.239, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 940, |
|
"total_flos": 4.315241031363276e+18, |
|
"train_loss": 0.13065080859353445, |
|
"train_runtime": 1086.1024, |
|
"train_samples_per_second": 27.677, |
|
"train_steps_per_second": 0.865 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 940, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.315241031363276e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|