whisper-medium-korean-ggml / trainer_state.json
royshilkrot's picture
Upload 10 files
189a446 verified
{
"best_metric": 16.52112384371117,
"best_model_checkpoint": "./whisper-medium-korean/checkpoint-9000",
"epoch": 3.461405330564209,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.008653513326410523,
"grad_norm": 9.3709077835083,
"learning_rate": 5.000000000000001e-07,
"loss": 0.8506,
"step": 25
},
{
"epoch": 0.017307026652821047,
"grad_norm": 5.6410064697265625,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.7759,
"step": 50
},
{
"epoch": 0.02596053997923157,
"grad_norm": 6.114657878875732,
"learning_rate": 1.5e-06,
"loss": 0.627,
"step": 75
},
{
"epoch": 0.034614053305642094,
"grad_norm": 4.652756690979004,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.4309,
"step": 100
},
{
"epoch": 0.04326756663205261,
"grad_norm": 4.665812015533447,
"learning_rate": 2.5e-06,
"loss": 0.3935,
"step": 125
},
{
"epoch": 0.05192107995846314,
"grad_norm": 4.651270389556885,
"learning_rate": 3e-06,
"loss": 0.365,
"step": 150
},
{
"epoch": 0.060574593284873655,
"grad_norm": 4.162643909454346,
"learning_rate": 3.5e-06,
"loss": 0.3651,
"step": 175
},
{
"epoch": 0.06922810661128419,
"grad_norm": 3.771670341491699,
"learning_rate": 4.000000000000001e-06,
"loss": 0.3666,
"step": 200
},
{
"epoch": 0.0778816199376947,
"grad_norm": 4.835504531860352,
"learning_rate": 4.5e-06,
"loss": 0.3505,
"step": 225
},
{
"epoch": 0.08653513326410522,
"grad_norm": 5.903632164001465,
"learning_rate": 5e-06,
"loss": 0.3394,
"step": 250
},
{
"epoch": 0.09518864659051575,
"grad_norm": 4.5515336990356445,
"learning_rate": 5.500000000000001e-06,
"loss": 0.3298,
"step": 275
},
{
"epoch": 0.10384215991692627,
"grad_norm": 4.889475345611572,
"learning_rate": 6e-06,
"loss": 0.3493,
"step": 300
},
{
"epoch": 0.1124956732433368,
"grad_norm": 4.62334680557251,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.2977,
"step": 325
},
{
"epoch": 0.12114918656974731,
"grad_norm": 4.044426441192627,
"learning_rate": 7e-06,
"loss": 0.3322,
"step": 350
},
{
"epoch": 0.12980269989615784,
"grad_norm": 4.073489189147949,
"learning_rate": 7.500000000000001e-06,
"loss": 0.3359,
"step": 375
},
{
"epoch": 0.13845621322256838,
"grad_norm": 5.082677841186523,
"learning_rate": 8.000000000000001e-06,
"loss": 0.3426,
"step": 400
},
{
"epoch": 0.1471097265489789,
"grad_norm": 4.216761589050293,
"learning_rate": 8.5e-06,
"loss": 0.3317,
"step": 425
},
{
"epoch": 0.1557632398753894,
"grad_norm": 5.513101577758789,
"learning_rate": 9e-06,
"loss": 0.3044,
"step": 450
},
{
"epoch": 0.16441675320179994,
"grad_norm": 4.340704917907715,
"learning_rate": 9.5e-06,
"loss": 0.3284,
"step": 475
},
{
"epoch": 0.17307026652821045,
"grad_norm": 4.588245391845703,
"learning_rate": 1e-05,
"loss": 0.3114,
"step": 500
},
{
"epoch": 0.17307026652821045,
"eval_loss": 0.2976242005825043,
"eval_runtime": 6820.6363,
"eval_samples_per_second": 1.506,
"eval_steps_per_second": 0.094,
"eval_wer": 24.376984674858484,
"step": 500
},
{
"epoch": 0.181723779854621,
"grad_norm": 3.102633237838745,
"learning_rate": 9.973684210526316e-06,
"loss": 0.2907,
"step": 525
},
{
"epoch": 0.1903772931810315,
"grad_norm": 4.586500644683838,
"learning_rate": 9.947368421052632e-06,
"loss": 0.2977,
"step": 550
},
{
"epoch": 0.199030806507442,
"grad_norm": 4.89168119430542,
"learning_rate": 9.921052631578947e-06,
"loss": 0.2984,
"step": 575
},
{
"epoch": 0.20768431983385255,
"grad_norm": 4.386935710906982,
"learning_rate": 9.894736842105264e-06,
"loss": 0.3103,
"step": 600
},
{
"epoch": 0.21633783316026306,
"grad_norm": 4.106662750244141,
"learning_rate": 9.868421052631579e-06,
"loss": 0.2919,
"step": 625
},
{
"epoch": 0.2249913464866736,
"grad_norm": 4.3443827629089355,
"learning_rate": 9.842105263157896e-06,
"loss": 0.3029,
"step": 650
},
{
"epoch": 0.2336448598130841,
"grad_norm": 4.201015472412109,
"learning_rate": 9.815789473684212e-06,
"loss": 0.2764,
"step": 675
},
{
"epoch": 0.24229837313949462,
"grad_norm": 4.213696002960205,
"learning_rate": 9.789473684210527e-06,
"loss": 0.289,
"step": 700
},
{
"epoch": 0.25095188646590516,
"grad_norm": 4.507613182067871,
"learning_rate": 9.763157894736844e-06,
"loss": 0.2937,
"step": 725
},
{
"epoch": 0.25960539979231567,
"grad_norm": 4.777523994445801,
"learning_rate": 9.736842105263159e-06,
"loss": 0.3014,
"step": 750
},
{
"epoch": 0.2682589131187262,
"grad_norm": 3.791252613067627,
"learning_rate": 9.710526315789474e-06,
"loss": 0.2873,
"step": 775
},
{
"epoch": 0.27691242644513675,
"grad_norm": 4.165738105773926,
"learning_rate": 9.68421052631579e-06,
"loss": 0.2959,
"step": 800
},
{
"epoch": 0.28556593977154726,
"grad_norm": 4.374238967895508,
"learning_rate": 9.657894736842106e-06,
"loss": 0.286,
"step": 825
},
{
"epoch": 0.2942194530979578,
"grad_norm": 4.140951633453369,
"learning_rate": 9.631578947368422e-06,
"loss": 0.2824,
"step": 850
},
{
"epoch": 0.3028729664243683,
"grad_norm": 3.896604537963867,
"learning_rate": 9.605263157894737e-06,
"loss": 0.2774,
"step": 875
},
{
"epoch": 0.3115264797507788,
"grad_norm": 4.016544818878174,
"learning_rate": 9.578947368421054e-06,
"loss": 0.2629,
"step": 900
},
{
"epoch": 0.32017999307718936,
"grad_norm": 3.743680000305176,
"learning_rate": 9.552631578947369e-06,
"loss": 0.2585,
"step": 925
},
{
"epoch": 0.3288335064035999,
"grad_norm": 3.727074384689331,
"learning_rate": 9.526315789473684e-06,
"loss": 0.2608,
"step": 950
},
{
"epoch": 0.3374870197300104,
"grad_norm": 3.8571677207946777,
"learning_rate": 9.5e-06,
"loss": 0.273,
"step": 975
},
{
"epoch": 0.3461405330564209,
"grad_norm": 4.067797660827637,
"learning_rate": 9.473684210526315e-06,
"loss": 0.2765,
"step": 1000
},
{
"epoch": 0.3461405330564209,
"eval_loss": 0.26566416025161743,
"eval_runtime": 6826.7941,
"eval_samples_per_second": 1.505,
"eval_steps_per_second": 0.094,
"eval_wer": 21.943600717934558,
"step": 1000
},
{
"epoch": 0.3547940463828314,
"grad_norm": 3.9521491527557373,
"learning_rate": 9.447368421052632e-06,
"loss": 0.2696,
"step": 1025
},
{
"epoch": 0.363447559709242,
"grad_norm": 4.91107177734375,
"learning_rate": 9.421052631578949e-06,
"loss": 0.2748,
"step": 1050
},
{
"epoch": 0.3721010730356525,
"grad_norm": 3.5369150638580322,
"learning_rate": 9.394736842105264e-06,
"loss": 0.272,
"step": 1075
},
{
"epoch": 0.380754586362063,
"grad_norm": 3.5748019218444824,
"learning_rate": 9.36842105263158e-06,
"loss": 0.2572,
"step": 1100
},
{
"epoch": 0.3894080996884735,
"grad_norm": 4.190589904785156,
"learning_rate": 9.342105263157895e-06,
"loss": 0.2932,
"step": 1125
},
{
"epoch": 0.398061613014884,
"grad_norm": 3.347419023513794,
"learning_rate": 9.315789473684212e-06,
"loss": 0.2656,
"step": 1150
},
{
"epoch": 0.4067151263412946,
"grad_norm": 2.9487226009368896,
"learning_rate": 9.289473684210527e-06,
"loss": 0.2666,
"step": 1175
},
{
"epoch": 0.4153686396677051,
"grad_norm": 3.4610071182250977,
"learning_rate": 9.263157894736842e-06,
"loss": 0.2526,
"step": 1200
},
{
"epoch": 0.4240221529941156,
"grad_norm": 4.092984676361084,
"learning_rate": 9.236842105263159e-06,
"loss": 0.2707,
"step": 1225
},
{
"epoch": 0.4326756663205261,
"grad_norm": 4.32016658782959,
"learning_rate": 9.210526315789474e-06,
"loss": 0.2716,
"step": 1250
},
{
"epoch": 0.44132917964693663,
"grad_norm": 4.7245330810546875,
"learning_rate": 9.18421052631579e-06,
"loss": 0.2554,
"step": 1275
},
{
"epoch": 0.4499826929733472,
"grad_norm": 4.063606262207031,
"learning_rate": 9.157894736842105e-06,
"loss": 0.2617,
"step": 1300
},
{
"epoch": 0.4586362062997577,
"grad_norm": 3.9536917209625244,
"learning_rate": 9.131578947368422e-06,
"loss": 0.2471,
"step": 1325
},
{
"epoch": 0.4672897196261682,
"grad_norm": 3.8031530380249023,
"learning_rate": 9.105263157894739e-06,
"loss": 0.2687,
"step": 1350
},
{
"epoch": 0.47594323295257873,
"grad_norm": 3.8580405712127686,
"learning_rate": 9.078947368421054e-06,
"loss": 0.2592,
"step": 1375
},
{
"epoch": 0.48459674627898924,
"grad_norm": 3.3053033351898193,
"learning_rate": 9.05263157894737e-06,
"loss": 0.2412,
"step": 1400
},
{
"epoch": 0.4932502596053998,
"grad_norm": 3.1904115676879883,
"learning_rate": 9.026315789473685e-06,
"loss": 0.2672,
"step": 1425
},
{
"epoch": 0.5019037729318103,
"grad_norm": 2.96549916267395,
"learning_rate": 9e-06,
"loss": 0.2425,
"step": 1450
},
{
"epoch": 0.5105572862582208,
"grad_norm": 3.1574394702911377,
"learning_rate": 8.973684210526317e-06,
"loss": 0.2536,
"step": 1475
},
{
"epoch": 0.5192107995846313,
"grad_norm": 4.734163284301758,
"learning_rate": 8.947368421052632e-06,
"loss": 0.2737,
"step": 1500
},
{
"epoch": 0.5192107995846313,
"eval_loss": 0.24431759119033813,
"eval_runtime": 6895.0818,
"eval_samples_per_second": 1.49,
"eval_steps_per_second": 0.093,
"eval_wer": 20.694981361314372,
"step": 1500
},
{
"epoch": 0.5278643129110419,
"grad_norm": 4.172092437744141,
"learning_rate": 8.921052631578949e-06,
"loss": 0.2561,
"step": 1525
},
{
"epoch": 0.5365178262374524,
"grad_norm": 4.126692295074463,
"learning_rate": 8.894736842105264e-06,
"loss": 0.2582,
"step": 1550
},
{
"epoch": 0.5451713395638629,
"grad_norm": 3.5045013427734375,
"learning_rate": 8.86842105263158e-06,
"loss": 0.2563,
"step": 1575
},
{
"epoch": 0.5538248528902735,
"grad_norm": 3.760963201522827,
"learning_rate": 8.842105263157895e-06,
"loss": 0.2591,
"step": 1600
},
{
"epoch": 0.562478366216684,
"grad_norm": 3.065091848373413,
"learning_rate": 8.81578947368421e-06,
"loss": 0.2512,
"step": 1625
},
{
"epoch": 0.5711318795430945,
"grad_norm": 3.6731486320495605,
"learning_rate": 8.789473684210527e-06,
"loss": 0.243,
"step": 1650
},
{
"epoch": 0.579785392869505,
"grad_norm": 4.433581352233887,
"learning_rate": 8.763157894736842e-06,
"loss": 0.2265,
"step": 1675
},
{
"epoch": 0.5884389061959155,
"grad_norm": 3.274906635284424,
"learning_rate": 8.736842105263158e-06,
"loss": 0.2495,
"step": 1700
},
{
"epoch": 0.5970924195223261,
"grad_norm": 3.508803129196167,
"learning_rate": 8.710526315789475e-06,
"loss": 0.2397,
"step": 1725
},
{
"epoch": 0.6057459328487366,
"grad_norm": 4.499612331390381,
"learning_rate": 8.68421052631579e-06,
"loss": 0.2592,
"step": 1750
},
{
"epoch": 0.6143994461751471,
"grad_norm": 3.7848877906799316,
"learning_rate": 8.657894736842107e-06,
"loss": 0.2438,
"step": 1775
},
{
"epoch": 0.6230529595015576,
"grad_norm": 3.3857030868530273,
"learning_rate": 8.631578947368422e-06,
"loss": 0.2353,
"step": 1800
},
{
"epoch": 0.6317064728279681,
"grad_norm": 4.552176475524902,
"learning_rate": 8.605263157894738e-06,
"loss": 0.2537,
"step": 1825
},
{
"epoch": 0.6403599861543787,
"grad_norm": 3.2606961727142334,
"learning_rate": 8.578947368421053e-06,
"loss": 0.2579,
"step": 1850
},
{
"epoch": 0.6490134994807892,
"grad_norm": 4.423758506774902,
"learning_rate": 8.552631578947368e-06,
"loss": 0.2443,
"step": 1875
},
{
"epoch": 0.6576670128071997,
"grad_norm": 3.7660324573516846,
"learning_rate": 8.526315789473685e-06,
"loss": 0.2353,
"step": 1900
},
{
"epoch": 0.6663205261336103,
"grad_norm": 3.4143283367156982,
"learning_rate": 8.5e-06,
"loss": 0.232,
"step": 1925
},
{
"epoch": 0.6749740394600208,
"grad_norm": 3.8295090198516846,
"learning_rate": 8.473684210526317e-06,
"loss": 0.2451,
"step": 1950
},
{
"epoch": 0.6836275527864313,
"grad_norm": 2.5151150226593018,
"learning_rate": 8.447368421052632e-06,
"loss": 0.2443,
"step": 1975
},
{
"epoch": 0.6922810661128418,
"grad_norm": 3.353902816772461,
"learning_rate": 8.421052631578948e-06,
"loss": 0.2421,
"step": 2000
},
{
"epoch": 0.6922810661128418,
"eval_loss": 0.2288905382156372,
"eval_runtime": 6825.4022,
"eval_samples_per_second": 1.505,
"eval_steps_per_second": 0.094,
"eval_wer": 19.04683832666022,
"step": 2000
},
{
"epoch": 0.7009345794392523,
"grad_norm": 3.2154219150543213,
"learning_rate": 8.394736842105263e-06,
"loss": 0.2369,
"step": 2025
},
{
"epoch": 0.7095880927656628,
"grad_norm": 3.2622313499450684,
"learning_rate": 8.36842105263158e-06,
"loss": 0.2323,
"step": 2050
},
{
"epoch": 0.7182416060920734,
"grad_norm": 4.255507946014404,
"learning_rate": 8.342105263157897e-06,
"loss": 0.2383,
"step": 2075
},
{
"epoch": 0.726895119418484,
"grad_norm": 3.3413267135620117,
"learning_rate": 8.315789473684212e-06,
"loss": 0.2647,
"step": 2100
},
{
"epoch": 0.7355486327448945,
"grad_norm": 3.5837645530700684,
"learning_rate": 8.289473684210526e-06,
"loss": 0.2209,
"step": 2125
},
{
"epoch": 0.744202146071305,
"grad_norm": 3.2607805728912354,
"learning_rate": 8.263157894736843e-06,
"loss": 0.2338,
"step": 2150
},
{
"epoch": 0.7528556593977155,
"grad_norm": 4.119529724121094,
"learning_rate": 8.236842105263158e-06,
"loss": 0.2279,
"step": 2175
},
{
"epoch": 0.761509172724126,
"grad_norm": 3.5672607421875,
"learning_rate": 8.210526315789475e-06,
"loss": 0.2376,
"step": 2200
},
{
"epoch": 0.7701626860505365,
"grad_norm": 4.001561164855957,
"learning_rate": 8.18421052631579e-06,
"loss": 0.2426,
"step": 2225
},
{
"epoch": 0.778816199376947,
"grad_norm": 3.481905698776245,
"learning_rate": 8.157894736842106e-06,
"loss": 0.203,
"step": 2250
},
{
"epoch": 0.7874697127033575,
"grad_norm": 3.281797170639038,
"learning_rate": 8.131578947368421e-06,
"loss": 0.2119,
"step": 2275
},
{
"epoch": 0.796123226029768,
"grad_norm": 3.9247775077819824,
"learning_rate": 8.105263157894736e-06,
"loss": 0.2161,
"step": 2300
},
{
"epoch": 0.8047767393561787,
"grad_norm": 3.621325731277466,
"learning_rate": 8.078947368421053e-06,
"loss": 0.2402,
"step": 2325
},
{
"epoch": 0.8134302526825892,
"grad_norm": 3.3069469928741455,
"learning_rate": 8.052631578947368e-06,
"loss": 0.2236,
"step": 2350
},
{
"epoch": 0.8220837660089997,
"grad_norm": 2.723513603210449,
"learning_rate": 8.026315789473685e-06,
"loss": 0.2356,
"step": 2375
},
{
"epoch": 0.8307372793354102,
"grad_norm": 3.1568410396575928,
"learning_rate": 8.000000000000001e-06,
"loss": 0.2054,
"step": 2400
},
{
"epoch": 0.8393907926618207,
"grad_norm": 3.152918815612793,
"learning_rate": 7.973684210526316e-06,
"loss": 0.2292,
"step": 2425
},
{
"epoch": 0.8480443059882312,
"grad_norm": 3.4354190826416016,
"learning_rate": 7.947368421052633e-06,
"loss": 0.2228,
"step": 2450
},
{
"epoch": 0.8566978193146417,
"grad_norm": 3.4018874168395996,
"learning_rate": 7.921052631578948e-06,
"loss": 0.2346,
"step": 2475
},
{
"epoch": 0.8653513326410522,
"grad_norm": 2.7067339420318604,
"learning_rate": 7.894736842105265e-06,
"loss": 0.2237,
"step": 2500
},
{
"epoch": 0.8653513326410522,
"eval_loss": 0.22117741405963898,
"eval_runtime": 6835.5047,
"eval_samples_per_second": 1.503,
"eval_steps_per_second": 0.094,
"eval_wer": 18.436766533204473,
"step": 2500
},
{
"epoch": 0.8740048459674628,
"grad_norm": 3.9850449562072754,
"learning_rate": 7.86842105263158e-06,
"loss": 0.2334,
"step": 2525
},
{
"epoch": 0.8826583592938733,
"grad_norm": 3.366445302963257,
"learning_rate": 7.842105263157895e-06,
"loss": 0.2507,
"step": 2550
},
{
"epoch": 0.8913118726202839,
"grad_norm": 2.6732399463653564,
"learning_rate": 7.815789473684211e-06,
"loss": 0.23,
"step": 2575
},
{
"epoch": 0.8999653859466944,
"grad_norm": 3.804313898086548,
"learning_rate": 7.789473684210526e-06,
"loss": 0.2527,
"step": 2600
},
{
"epoch": 0.9086188992731049,
"grad_norm": 4.052364826202393,
"learning_rate": 7.763157894736843e-06,
"loss": 0.2459,
"step": 2625
},
{
"epoch": 0.9172724125995154,
"grad_norm": 3.691915988922119,
"learning_rate": 7.736842105263158e-06,
"loss": 0.2333,
"step": 2650
},
{
"epoch": 0.9259259259259259,
"grad_norm": 3.742799758911133,
"learning_rate": 7.710526315789474e-06,
"loss": 0.2318,
"step": 2675
},
{
"epoch": 0.9345794392523364,
"grad_norm": 3.024702310562134,
"learning_rate": 7.68421052631579e-06,
"loss": 0.2229,
"step": 2700
},
{
"epoch": 0.943232952578747,
"grad_norm": 2.7311902046203613,
"learning_rate": 7.657894736842106e-06,
"loss": 0.201,
"step": 2725
},
{
"epoch": 0.9518864659051575,
"grad_norm": 4.097088813781738,
"learning_rate": 7.631578947368423e-06,
"loss": 0.2375,
"step": 2750
},
{
"epoch": 0.960539979231568,
"grad_norm": 3.8325319290161133,
"learning_rate": 7.605263157894738e-06,
"loss": 0.256,
"step": 2775
},
{
"epoch": 0.9691934925579785,
"grad_norm": 3.960142135620117,
"learning_rate": 7.578947368421054e-06,
"loss": 0.2536,
"step": 2800
},
{
"epoch": 0.9778470058843891,
"grad_norm": 2.902967929840088,
"learning_rate": 7.552631578947369e-06,
"loss": 0.2343,
"step": 2825
},
{
"epoch": 0.9865005192107996,
"grad_norm": 3.7276077270507812,
"learning_rate": 7.526315789473685e-06,
"loss": 0.2103,
"step": 2850
},
{
"epoch": 0.9951540325372101,
"grad_norm": 3.44114089012146,
"learning_rate": 7.500000000000001e-06,
"loss": 0.2212,
"step": 2875
},
{
"epoch": 1.0038075458636206,
"grad_norm": 2.8075897693634033,
"learning_rate": 7.473684210526316e-06,
"loss": 0.1869,
"step": 2900
},
{
"epoch": 1.0124610591900312,
"grad_norm": 2.7247374057769775,
"learning_rate": 7.447368421052632e-06,
"loss": 0.1468,
"step": 2925
},
{
"epoch": 1.0211145725164417,
"grad_norm": 2.5912511348724365,
"learning_rate": 7.421052631578948e-06,
"loss": 0.1507,
"step": 2950
},
{
"epoch": 1.0297680858428522,
"grad_norm": 3.2269246578216553,
"learning_rate": 7.3947368421052635e-06,
"loss": 0.1598,
"step": 2975
},
{
"epoch": 1.0384215991692627,
"grad_norm": 2.3670835494995117,
"learning_rate": 7.368421052631579e-06,
"loss": 0.1301,
"step": 3000
},
{
"epoch": 1.0384215991692627,
"eval_loss": 0.21585418283939362,
"eval_runtime": 6825.6837,
"eval_samples_per_second": 1.505,
"eval_steps_per_second": 0.094,
"eval_wer": 17.737815822173133,
"step": 3000
},
{
"epoch": 1.0470751124956732,
"grad_norm": 3.005739450454712,
"learning_rate": 7.342105263157895e-06,
"loss": 0.1571,
"step": 3025
},
{
"epoch": 1.0557286258220837,
"grad_norm": 3.31783127784729,
"learning_rate": 7.315789473684212e-06,
"loss": 0.1368,
"step": 3050
},
{
"epoch": 1.0643821391484942,
"grad_norm": 2.8186709880828857,
"learning_rate": 7.289473684210528e-06,
"loss": 0.1494,
"step": 3075
},
{
"epoch": 1.0730356524749047,
"grad_norm": 3.0247764587402344,
"learning_rate": 7.263157894736843e-06,
"loss": 0.1382,
"step": 3100
},
{
"epoch": 1.0816891658013152,
"grad_norm": 3.01029372215271,
"learning_rate": 7.236842105263158e-06,
"loss": 0.1518,
"step": 3125
},
{
"epoch": 1.0903426791277258,
"grad_norm": 3.1356372833251953,
"learning_rate": 7.210526315789474e-06,
"loss": 0.1557,
"step": 3150
},
{
"epoch": 1.0989961924541363,
"grad_norm": 3.427821636199951,
"learning_rate": 7.18421052631579e-06,
"loss": 0.1672,
"step": 3175
},
{
"epoch": 1.107649705780547,
"grad_norm": 2.804636240005493,
"learning_rate": 7.157894736842106e-06,
"loss": 0.1489,
"step": 3200
},
{
"epoch": 1.1163032191069575,
"grad_norm": 3.353768825531006,
"learning_rate": 7.131578947368422e-06,
"loss": 0.1446,
"step": 3225
},
{
"epoch": 1.124956732433368,
"grad_norm": 2.2061870098114014,
"learning_rate": 7.1052631578947375e-06,
"loss": 0.1392,
"step": 3250
},
{
"epoch": 1.1336102457597785,
"grad_norm": 3.4037885665893555,
"learning_rate": 7.078947368421053e-06,
"loss": 0.1361,
"step": 3275
},
{
"epoch": 1.142263759086189,
"grad_norm": 3.9204328060150146,
"learning_rate": 7.052631578947369e-06,
"loss": 0.154,
"step": 3300
},
{
"epoch": 1.1509172724125996,
"grad_norm": 2.112093925476074,
"learning_rate": 7.026315789473684e-06,
"loss": 0.1636,
"step": 3325
},
{
"epoch": 1.15957078573901,
"grad_norm": 2.0618863105773926,
"learning_rate": 7e-06,
"loss": 0.1382,
"step": 3350
},
{
"epoch": 1.1682242990654206,
"grad_norm": 3.1674695014953613,
"learning_rate": 6.973684210526316e-06,
"loss": 0.1437,
"step": 3375
},
{
"epoch": 1.176877812391831,
"grad_norm": 3.2779016494750977,
"learning_rate": 6.947368421052632e-06,
"loss": 0.1421,
"step": 3400
},
{
"epoch": 1.1855313257182416,
"grad_norm": 2.3959224224090576,
"learning_rate": 6.921052631578948e-06,
"loss": 0.1366,
"step": 3425
},
{
"epoch": 1.1941848390446521,
"grad_norm": 3.081360340118408,
"learning_rate": 6.894736842105264e-06,
"loss": 0.1425,
"step": 3450
},
{
"epoch": 1.2028383523710626,
"grad_norm": 2.6062395572662354,
"learning_rate": 6.86842105263158e-06,
"loss": 0.1413,
"step": 3475
},
{
"epoch": 1.2114918656974731,
"grad_norm": 3.2448296546936035,
"learning_rate": 6.842105263157896e-06,
"loss": 0.1391,
"step": 3500
},
{
"epoch": 1.2114918656974731,
"eval_loss": 0.21471112966537476,
"eval_runtime": 6831.4547,
"eval_samples_per_second": 1.504,
"eval_steps_per_second": 0.094,
"eval_wer": 18.2641861107276,
"step": 3500
},
{
"epoch": 1.2201453790238836,
"grad_norm": 2.590219259262085,
"learning_rate": 6.8157894736842115e-06,
"loss": 0.1417,
"step": 3525
},
{
"epoch": 1.2287988923502942,
"grad_norm": 1.8229866027832031,
"learning_rate": 6.789473684210527e-06,
"loss": 0.1407,
"step": 3550
},
{
"epoch": 1.2374524056767047,
"grad_norm": 2.4515280723571777,
"learning_rate": 6.763157894736842e-06,
"loss": 0.1587,
"step": 3575
},
{
"epoch": 1.2461059190031152,
"grad_norm": 2.929910182952881,
"learning_rate": 6.736842105263158e-06,
"loss": 0.1626,
"step": 3600
},
{
"epoch": 1.254759432329526,
"grad_norm": 2.7540080547332764,
"learning_rate": 6.710526315789474e-06,
"loss": 0.1714,
"step": 3625
},
{
"epoch": 1.2634129456559364,
"grad_norm": 3.262491226196289,
"learning_rate": 6.68421052631579e-06,
"loss": 0.1474,
"step": 3650
},
{
"epoch": 1.272066458982347,
"grad_norm": 2.461406946182251,
"learning_rate": 6.6578947368421055e-06,
"loss": 0.1451,
"step": 3675
},
{
"epoch": 1.2807199723087574,
"grad_norm": 2.6654164791107178,
"learning_rate": 6.631578947368421e-06,
"loss": 0.1598,
"step": 3700
},
{
"epoch": 1.289373485635168,
"grad_norm": 2.850276231765747,
"learning_rate": 6.605263157894738e-06,
"loss": 0.1452,
"step": 3725
},
{
"epoch": 1.2980269989615785,
"grad_norm": 3.434420585632324,
"learning_rate": 6.578947368421054e-06,
"loss": 0.149,
"step": 3750
},
{
"epoch": 1.306680512287989,
"grad_norm": 2.520908832550049,
"learning_rate": 6.55263157894737e-06,
"loss": 0.1444,
"step": 3775
},
{
"epoch": 1.3153340256143995,
"grad_norm": 1.7677472829818726,
"learning_rate": 6.526315789473685e-06,
"loss": 0.1589,
"step": 3800
},
{
"epoch": 1.32398753894081,
"grad_norm": 2.4634532928466797,
"learning_rate": 6.5000000000000004e-06,
"loss": 0.1491,
"step": 3825
},
{
"epoch": 1.3326410522672205,
"grad_norm": 2.644317626953125,
"learning_rate": 6.473684210526316e-06,
"loss": 0.1546,
"step": 3850
},
{
"epoch": 1.341294565593631,
"grad_norm": 2.459674596786499,
"learning_rate": 6.447368421052632e-06,
"loss": 0.1411,
"step": 3875
},
{
"epoch": 1.3499480789200415,
"grad_norm": 2.2588868141174316,
"learning_rate": 6.421052631578948e-06,
"loss": 0.1335,
"step": 3900
},
{
"epoch": 1.358601592246452,
"grad_norm": 3.8076562881469727,
"learning_rate": 6.394736842105264e-06,
"loss": 0.1456,
"step": 3925
},
{
"epoch": 1.3672551055728626,
"grad_norm": 2.1609466075897217,
"learning_rate": 6.3684210526315795e-06,
"loss": 0.1519,
"step": 3950
},
{
"epoch": 1.375908618899273,
"grad_norm": 2.537875175476074,
"learning_rate": 6.342105263157895e-06,
"loss": 0.1435,
"step": 3975
},
{
"epoch": 1.3845621322256836,
"grad_norm": 2.0628812313079834,
"learning_rate": 6.31578947368421e-06,
"loss": 0.1406,
"step": 4000
},
{
"epoch": 1.3845621322256836,
"eval_loss": 0.2134682536125183,
"eval_runtime": 6847.272,
"eval_samples_per_second": 1.5,
"eval_steps_per_second": 0.094,
"eval_wer": 18.063992820654427,
"step": 4000
},
{
"epoch": 1.393215645552094,
"grad_norm": 2.8480277061462402,
"learning_rate": 6.289473684210526e-06,
"loss": 0.1569,
"step": 4025
},
{
"epoch": 1.4018691588785046,
"grad_norm": 2.712129831314087,
"learning_rate": 6.263157894736842e-06,
"loss": 0.1417,
"step": 4050
},
{
"epoch": 1.4105226722049151,
"grad_norm": 2.2308459281921387,
"learning_rate": 6.236842105263159e-06,
"loss": 0.1348,
"step": 4075
},
{
"epoch": 1.4191761855313256,
"grad_norm": 2.9454047679901123,
"learning_rate": 6.2105263157894745e-06,
"loss": 0.1396,
"step": 4100
},
{
"epoch": 1.4278296988577361,
"grad_norm": 3.238121509552002,
"learning_rate": 6.18421052631579e-06,
"loss": 0.1491,
"step": 4125
},
{
"epoch": 1.4364832121841467,
"grad_norm": 2.46944260597229,
"learning_rate": 6.157894736842106e-06,
"loss": 0.1362,
"step": 4150
},
{
"epoch": 1.4451367255105572,
"grad_norm": 2.5275824069976807,
"learning_rate": 6.131578947368422e-06,
"loss": 0.131,
"step": 4175
},
{
"epoch": 1.4537902388369677,
"grad_norm": 3.005089521408081,
"learning_rate": 6.105263157894738e-06,
"loss": 0.1588,
"step": 4200
},
{
"epoch": 1.4624437521633784,
"grad_norm": 3.0360686779022217,
"learning_rate": 6.0789473684210535e-06,
"loss": 0.1565,
"step": 4225
},
{
"epoch": 1.471097265489789,
"grad_norm": 2.272226333618164,
"learning_rate": 6.0526315789473685e-06,
"loss": 0.1478,
"step": 4250
},
{
"epoch": 1.4797507788161994,
"grad_norm": 2.23938250541687,
"learning_rate": 6.026315789473684e-06,
"loss": 0.1523,
"step": 4275
},
{
"epoch": 1.48840429214261,
"grad_norm": 3.1530332565307617,
"learning_rate": 6e-06,
"loss": 0.1462,
"step": 4300
},
{
"epoch": 1.4970578054690205,
"grad_norm": 2.3748743534088135,
"learning_rate": 5.973684210526316e-06,
"loss": 0.1549,
"step": 4325
},
{
"epoch": 1.505711318795431,
"grad_norm": 2.5215299129486084,
"learning_rate": 5.947368421052632e-06,
"loss": 0.1418,
"step": 4350
},
{
"epoch": 1.5143648321218415,
"grad_norm": 1.9181177616119385,
"learning_rate": 5.921052631578948e-06,
"loss": 0.1452,
"step": 4375
},
{
"epoch": 1.523018345448252,
"grad_norm": 3.432988166809082,
"learning_rate": 5.8947368421052634e-06,
"loss": 0.1692,
"step": 4400
},
{
"epoch": 1.5316718587746625,
"grad_norm": 2.3905420303344727,
"learning_rate": 5.86842105263158e-06,
"loss": 0.1428,
"step": 4425
},
{
"epoch": 1.540325372101073,
"grad_norm": 2.4743220806121826,
"learning_rate": 5.842105263157896e-06,
"loss": 0.1487,
"step": 4450
},
{
"epoch": 1.5489788854274835,
"grad_norm": 3.31463623046875,
"learning_rate": 5.815789473684212e-06,
"loss": 0.154,
"step": 4475
},
{
"epoch": 1.557632398753894,
"grad_norm": 2.186415672302246,
"learning_rate": 5.789473684210527e-06,
"loss": 0.1421,
"step": 4500
},
{
"epoch": 1.557632398753894,
"eval_loss": 0.20704275369644165,
"eval_runtime": 6831.8854,
"eval_samples_per_second": 1.504,
"eval_steps_per_second": 0.094,
"eval_wer": 17.19073588292144,
"step": 4500
},
{
"epoch": 1.5662859120803048,
"grad_norm": 1.9734889268875122,
"learning_rate": 5.7631578947368425e-06,
"loss": 0.1348,
"step": 4525
},
{
"epoch": 1.5749394254067153,
"grad_norm": 3.3035688400268555,
"learning_rate": 5.736842105263158e-06,
"loss": 0.1294,
"step": 4550
},
{
"epoch": 1.5835929387331258,
"grad_norm": 2.1477127075195312,
"learning_rate": 5.710526315789474e-06,
"loss": 0.1463,
"step": 4575
},
{
"epoch": 1.5922464520595363,
"grad_norm": 2.911785364151001,
"learning_rate": 5.68421052631579e-06,
"loss": 0.1425,
"step": 4600
},
{
"epoch": 1.6008999653859468,
"grad_norm": 2.9243428707122803,
"learning_rate": 5.657894736842106e-06,
"loss": 0.1437,
"step": 4625
},
{
"epoch": 1.6095534787123573,
"grad_norm": 1.9402472972869873,
"learning_rate": 5.631578947368422e-06,
"loss": 0.1448,
"step": 4650
},
{
"epoch": 1.6182069920387678,
"grad_norm": 2.558166980743408,
"learning_rate": 5.605263157894737e-06,
"loss": 0.162,
"step": 4675
},
{
"epoch": 1.6268605053651783,
"grad_norm": 2.497208833694458,
"learning_rate": 5.578947368421052e-06,
"loss": 0.1534,
"step": 4700
},
{
"epoch": 1.6355140186915889,
"grad_norm": 3.3218460083007812,
"learning_rate": 5.552631578947368e-06,
"loss": 0.1491,
"step": 4725
},
{
"epoch": 1.6441675320179994,
"grad_norm": 2.5457661151885986,
"learning_rate": 5.526315789473685e-06,
"loss": 0.1491,
"step": 4750
},
{
"epoch": 1.6528210453444099,
"grad_norm": 2.2707815170288086,
"learning_rate": 5.500000000000001e-06,
"loss": 0.1363,
"step": 4775
},
{
"epoch": 1.6614745586708204,
"grad_norm": 2.816852569580078,
"learning_rate": 5.4736842105263165e-06,
"loss": 0.1404,
"step": 4800
},
{
"epoch": 1.670128071997231,
"grad_norm": 2.974275827407837,
"learning_rate": 5.447368421052632e-06,
"loss": 0.1372,
"step": 4825
},
{
"epoch": 1.6787815853236414,
"grad_norm": 2.9549601078033447,
"learning_rate": 5.421052631578948e-06,
"loss": 0.1342,
"step": 4850
},
{
"epoch": 1.687435098650052,
"grad_norm": 3.5474634170532227,
"learning_rate": 5.394736842105264e-06,
"loss": 0.1481,
"step": 4875
},
{
"epoch": 1.6960886119764624,
"grad_norm": 3.140803813934326,
"learning_rate": 5.36842105263158e-06,
"loss": 0.1406,
"step": 4900
},
{
"epoch": 1.704742125302873,
"grad_norm": 3.267249584197998,
"learning_rate": 5.342105263157895e-06,
"loss": 0.1503,
"step": 4925
},
{
"epoch": 1.7133956386292835,
"grad_norm": 3.2628836631774902,
"learning_rate": 5.315789473684211e-06,
"loss": 0.154,
"step": 4950
},
{
"epoch": 1.722049151955694,
"grad_norm": 3.291492462158203,
"learning_rate": 5.289473684210526e-06,
"loss": 0.1526,
"step": 4975
},
{
"epoch": 1.7307026652821045,
"grad_norm": 2.379425525665283,
"learning_rate": 5.263157894736842e-06,
"loss": 0.1427,
"step": 5000
},
{
"epoch": 1.7307026652821045,
"eval_loss": 0.20547065138816833,
"eval_runtime": 6830.2168,
"eval_samples_per_second": 1.504,
"eval_steps_per_second": 0.094,
"eval_wer": 16.991405494960652,
"step": 5000
},
{
"epoch": 1.739356178608515,
"grad_norm": 2.3537607192993164,
"learning_rate": 5.236842105263158e-06,
"loss": 0.1389,
"step": 5025
},
{
"epoch": 1.7480096919349255,
"grad_norm": 2.943369150161743,
"learning_rate": 5.210526315789474e-06,
"loss": 0.1521,
"step": 5050
},
{
"epoch": 1.756663205261336,
"grad_norm": 3.2738773822784424,
"learning_rate": 5.18421052631579e-06,
"loss": 0.1601,
"step": 5075
},
{
"epoch": 1.7653167185877465,
"grad_norm": 2.1923158168792725,
"learning_rate": 5.157894736842106e-06,
"loss": 0.1631,
"step": 5100
},
{
"epoch": 1.773970231914157,
"grad_norm": 3.9332115650177,
"learning_rate": 5.131578947368422e-06,
"loss": 0.1455,
"step": 5125
},
{
"epoch": 1.7826237452405675,
"grad_norm": 2.184039354324341,
"learning_rate": 5.105263157894738e-06,
"loss": 0.1565,
"step": 5150
},
{
"epoch": 1.791277258566978,
"grad_norm": 3.008984327316284,
"learning_rate": 5.078947368421053e-06,
"loss": 0.1395,
"step": 5175
},
{
"epoch": 1.7999307718933886,
"grad_norm": 2.965576648712158,
"learning_rate": 5.052631578947369e-06,
"loss": 0.1398,
"step": 5200
},
{
"epoch": 1.808584285219799,
"grad_norm": 2.224731922149658,
"learning_rate": 5.026315789473685e-06,
"loss": 0.1327,
"step": 5225
},
{
"epoch": 1.8172377985462098,
"grad_norm": 2.2881205081939697,
"learning_rate": 5e-06,
"loss": 0.1515,
"step": 5250
},
{
"epoch": 1.8258913118726203,
"grad_norm": 2.452091932296753,
"learning_rate": 4.973684210526316e-06,
"loss": 0.1433,
"step": 5275
},
{
"epoch": 1.8345448251990308,
"grad_norm": 2.7906880378723145,
"learning_rate": 4.947368421052632e-06,
"loss": 0.1459,
"step": 5300
},
{
"epoch": 1.8431983385254413,
"grad_norm": 2.501685619354248,
"learning_rate": 4.921052631578948e-06,
"loss": 0.1471,
"step": 5325
},
{
"epoch": 1.8518518518518519,
"grad_norm": 2.82496976852417,
"learning_rate": 4.894736842105264e-06,
"loss": 0.1339,
"step": 5350
},
{
"epoch": 1.8605053651782624,
"grad_norm": 2.912050485610962,
"learning_rate": 4.8684210526315795e-06,
"loss": 0.1359,
"step": 5375
},
{
"epoch": 1.8691588785046729,
"grad_norm": 2.363729953765869,
"learning_rate": 4.842105263157895e-06,
"loss": 0.1577,
"step": 5400
},
{
"epoch": 1.8778123918310834,
"grad_norm": 2.8046460151672363,
"learning_rate": 4.815789473684211e-06,
"loss": 0.1519,
"step": 5425
},
{
"epoch": 1.886465905157494,
"grad_norm": 3.004221200942993,
"learning_rate": 4.789473684210527e-06,
"loss": 0.1486,
"step": 5450
},
{
"epoch": 1.8951194184839044,
"grad_norm": 2.8844528198242188,
"learning_rate": 4.763157894736842e-06,
"loss": 0.1309,
"step": 5475
},
{
"epoch": 1.9037729318103151,
"grad_norm": 2.8573217391967773,
"learning_rate": 4.736842105263158e-06,
"loss": 0.1429,
"step": 5500
},
{
"epoch": 1.9037729318103151,
"eval_loss": 0.20240795612335205,
"eval_runtime": 6794.1597,
"eval_samples_per_second": 1.512,
"eval_steps_per_second": 0.094,
"eval_wer": 16.77567996686456,
"step": 5500
},
{
"epoch": 1.9124264451367257,
"grad_norm": 2.1657752990722656,
"learning_rate": 4.710526315789474e-06,
"loss": 0.1384,
"step": 5525
},
{
"epoch": 1.9210799584631362,
"grad_norm": 2.8199362754821777,
"learning_rate": 4.68421052631579e-06,
"loss": 0.1383,
"step": 5550
},
{
"epoch": 1.9297334717895467,
"grad_norm": 2.596308469772339,
"learning_rate": 4.657894736842106e-06,
"loss": 0.1368,
"step": 5575
},
{
"epoch": 1.9383869851159572,
"grad_norm": 2.499068260192871,
"learning_rate": 4.631578947368421e-06,
"loss": 0.1425,
"step": 5600
},
{
"epoch": 1.9470404984423677,
"grad_norm": 2.7802367210388184,
"learning_rate": 4.605263157894737e-06,
"loss": 0.1381,
"step": 5625
},
{
"epoch": 1.9556940117687782,
"grad_norm": 2.1181230545043945,
"learning_rate": 4.578947368421053e-06,
"loss": 0.1427,
"step": 5650
},
{
"epoch": 1.9643475250951887,
"grad_norm": 2.876237630844116,
"learning_rate": 4.552631578947369e-06,
"loss": 0.1325,
"step": 5675
},
{
"epoch": 1.9730010384215992,
"grad_norm": 2.6661200523376465,
"learning_rate": 4.526315789473685e-06,
"loss": 0.1401,
"step": 5700
},
{
"epoch": 1.9816545517480098,
"grad_norm": 2.1553337574005127,
"learning_rate": 4.5e-06,
"loss": 0.1479,
"step": 5725
},
{
"epoch": 1.9903080650744203,
"grad_norm": 1.7381737232208252,
"learning_rate": 4.473684210526316e-06,
"loss": 0.1386,
"step": 5750
},
{
"epoch": 1.9989615784008308,
"grad_norm": 2.856213331222534,
"learning_rate": 4.447368421052632e-06,
"loss": 0.147,
"step": 5775
},
{
"epoch": 2.0076150917272413,
"grad_norm": 2.158287286758423,
"learning_rate": 4.4210526315789476e-06,
"loss": 0.0941,
"step": 5800
},
{
"epoch": 2.016268605053652,
"grad_norm": 1.5348066091537476,
"learning_rate": 4.394736842105263e-06,
"loss": 0.0839,
"step": 5825
},
{
"epoch": 2.0249221183800623,
"grad_norm": 2.0257511138916016,
"learning_rate": 4.368421052631579e-06,
"loss": 0.0836,
"step": 5850
},
{
"epoch": 2.033575631706473,
"grad_norm": 2.420926809310913,
"learning_rate": 4.342105263157895e-06,
"loss": 0.0912,
"step": 5875
},
{
"epoch": 2.0422291450328833,
"grad_norm": 1.870279312133789,
"learning_rate": 4.315789473684211e-06,
"loss": 0.0785,
"step": 5900
},
{
"epoch": 2.050882658359294,
"grad_norm": 2.135033130645752,
"learning_rate": 4.289473684210527e-06,
"loss": 0.0808,
"step": 5925
},
{
"epoch": 2.0595361716857044,
"grad_norm": 1.9781912565231323,
"learning_rate": 4.2631578947368425e-06,
"loss": 0.0937,
"step": 5950
},
{
"epoch": 2.068189685012115,
"grad_norm": 2.5464608669281006,
"learning_rate": 4.236842105263158e-06,
"loss": 0.0824,
"step": 5975
},
{
"epoch": 2.0768431983385254,
"grad_norm": 2.4031896591186523,
"learning_rate": 4.210526315789474e-06,
"loss": 0.0893,
"step": 6000
},
{
"epoch": 2.0768431983385254,
"eval_loss": 0.20871570706367493,
"eval_runtime": 6782.3344,
"eval_samples_per_second": 1.515,
"eval_steps_per_second": 0.095,
"eval_wer": 16.56685765566754,
"step": 6000
},
{
"epoch": 2.085496711664936,
"grad_norm": 1.7652385234832764,
"learning_rate": 4.18421052631579e-06,
"loss": 0.085,
"step": 6025
},
{
"epoch": 2.0941502249913464,
"grad_norm": 2.4497318267822266,
"learning_rate": 4.157894736842106e-06,
"loss": 0.0825,
"step": 6050
},
{
"epoch": 2.102803738317757,
"grad_norm": 1.7798666954040527,
"learning_rate": 4.1315789473684216e-06,
"loss": 0.077,
"step": 6075
},
{
"epoch": 2.1114572516441674,
"grad_norm": 1.8536590337753296,
"learning_rate": 4.105263157894737e-06,
"loss": 0.0799,
"step": 6100
},
{
"epoch": 2.120110764970578,
"grad_norm": 1.8329837322235107,
"learning_rate": 4.078947368421053e-06,
"loss": 0.0819,
"step": 6125
},
{
"epoch": 2.1287642782969884,
"grad_norm": 1.9826552867889404,
"learning_rate": 4.052631578947368e-06,
"loss": 0.0953,
"step": 6150
},
{
"epoch": 2.137417791623399,
"grad_norm": 1.9027239084243774,
"learning_rate": 4.026315789473684e-06,
"loss": 0.0889,
"step": 6175
},
{
"epoch": 2.1460713049498095,
"grad_norm": 1.8616188764572144,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0869,
"step": 6200
},
{
"epoch": 2.15472481827622,
"grad_norm": 1.987467885017395,
"learning_rate": 3.9736842105263165e-06,
"loss": 0.0854,
"step": 6225
},
{
"epoch": 2.1633783316026305,
"grad_norm": 1.5226447582244873,
"learning_rate": 3.947368421052632e-06,
"loss": 0.0747,
"step": 6250
},
{
"epoch": 2.172031844929041,
"grad_norm": 2.361544370651245,
"learning_rate": 3.921052631578947e-06,
"loss": 0.0749,
"step": 6275
},
{
"epoch": 2.1806853582554515,
"grad_norm": 2.086289882659912,
"learning_rate": 3.894736842105263e-06,
"loss": 0.0824,
"step": 6300
},
{
"epoch": 2.189338871581862,
"grad_norm": 1.9018774032592773,
"learning_rate": 3.868421052631579e-06,
"loss": 0.076,
"step": 6325
},
{
"epoch": 2.1979923849082725,
"grad_norm": 2.450321912765503,
"learning_rate": 3.842105263157895e-06,
"loss": 0.0861,
"step": 6350
},
{
"epoch": 2.2066458982346835,
"grad_norm": 2.0160300731658936,
"learning_rate": 3.815789473684211e-06,
"loss": 0.0832,
"step": 6375
},
{
"epoch": 2.215299411561094,
"grad_norm": 2.0652830600738525,
"learning_rate": 3.789473684210527e-06,
"loss": 0.0844,
"step": 6400
},
{
"epoch": 2.2239529248875045,
"grad_norm": 1.6188254356384277,
"learning_rate": 3.7631578947368426e-06,
"loss": 0.081,
"step": 6425
},
{
"epoch": 2.232606438213915,
"grad_norm": 2.319303035736084,
"learning_rate": 3.736842105263158e-06,
"loss": 0.0894,
"step": 6450
},
{
"epoch": 2.2412599515403255,
"grad_norm": 2.1996448040008545,
"learning_rate": 3.710526315789474e-06,
"loss": 0.0843,
"step": 6475
},
{
"epoch": 2.249913464866736,
"grad_norm": 2.6930758953094482,
"learning_rate": 3.6842105263157896e-06,
"loss": 0.0821,
"step": 6500
},
{
"epoch": 2.249913464866736,
"eval_loss": 0.21049684286117554,
"eval_runtime": 6797.122,
"eval_samples_per_second": 1.511,
"eval_steps_per_second": 0.094,
"eval_wer": 16.584978600027615,
"step": 6500
},
{
"epoch": 2.2585669781931466,
"grad_norm": 2.480389356613159,
"learning_rate": 3.657894736842106e-06,
"loss": 0.0847,
"step": 6525
},
{
"epoch": 2.267220491519557,
"grad_norm": 2.3303518295288086,
"learning_rate": 3.6315789473684217e-06,
"loss": 0.0878,
"step": 6550
},
{
"epoch": 2.2758740048459676,
"grad_norm": 2.0243282318115234,
"learning_rate": 3.605263157894737e-06,
"loss": 0.0864,
"step": 6575
},
{
"epoch": 2.284527518172378,
"grad_norm": 2.6420583724975586,
"learning_rate": 3.578947368421053e-06,
"loss": 0.0884,
"step": 6600
},
{
"epoch": 2.2931810314987886,
"grad_norm": 2.4340221881866455,
"learning_rate": 3.5526315789473687e-06,
"loss": 0.0894,
"step": 6625
},
{
"epoch": 2.301834544825199,
"grad_norm": 2.174543857574463,
"learning_rate": 3.5263157894736846e-06,
"loss": 0.0814,
"step": 6650
},
{
"epoch": 2.3104880581516096,
"grad_norm": 2.3189802169799805,
"learning_rate": 3.5e-06,
"loss": 0.0857,
"step": 6675
},
{
"epoch": 2.31914157147802,
"grad_norm": 2.114579439163208,
"learning_rate": 3.473684210526316e-06,
"loss": 0.0907,
"step": 6700
},
{
"epoch": 2.3277950848044306,
"grad_norm": 1.8805601596832275,
"learning_rate": 3.447368421052632e-06,
"loss": 0.0869,
"step": 6725
},
{
"epoch": 2.336448598130841,
"grad_norm": 1.9643834829330444,
"learning_rate": 3.421052631578948e-06,
"loss": 0.0878,
"step": 6750
},
{
"epoch": 2.3451021114572517,
"grad_norm": 1.8426399230957031,
"learning_rate": 3.3947368421052636e-06,
"loss": 0.0815,
"step": 6775
},
{
"epoch": 2.353755624783662,
"grad_norm": 2.246006965637207,
"learning_rate": 3.368421052631579e-06,
"loss": 0.0839,
"step": 6800
},
{
"epoch": 2.3624091381100727,
"grad_norm": 2.0131287574768066,
"learning_rate": 3.342105263157895e-06,
"loss": 0.0773,
"step": 6825
},
{
"epoch": 2.371062651436483,
"grad_norm": 1.8699673414230347,
"learning_rate": 3.3157894736842107e-06,
"loss": 0.0834,
"step": 6850
},
{
"epoch": 2.3797161647628937,
"grad_norm": 1.833447813987732,
"learning_rate": 3.289473684210527e-06,
"loss": 0.0817,
"step": 6875
},
{
"epoch": 2.3883696780893042,
"grad_norm": 1.5582021474838257,
"learning_rate": 3.2631578947368423e-06,
"loss": 0.0746,
"step": 6900
},
{
"epoch": 2.3970231914157147,
"grad_norm": 1.5744956731796265,
"learning_rate": 3.236842105263158e-06,
"loss": 0.0773,
"step": 6925
},
{
"epoch": 2.4056767047421252,
"grad_norm": 2.09190034866333,
"learning_rate": 3.210526315789474e-06,
"loss": 0.0756,
"step": 6950
},
{
"epoch": 2.4143302180685358,
"grad_norm": 2.2656619548797607,
"learning_rate": 3.1842105263157898e-06,
"loss": 0.0814,
"step": 6975
},
{
"epoch": 2.4229837313949463,
"grad_norm": 1.941187858581543,
"learning_rate": 3.157894736842105e-06,
"loss": 0.0784,
"step": 7000
},
{
"epoch": 2.4229837313949463,
"eval_loss": 0.21001853048801422,
"eval_runtime": 6836.3983,
"eval_samples_per_second": 1.503,
"eval_steps_per_second": 0.094,
"eval_wer": 17.051808642827556,
"step": 7000
},
{
"epoch": 2.431637244721357,
"grad_norm": 1.3389655351638794,
"learning_rate": 3.131578947368421e-06,
"loss": 0.0818,
"step": 7025
},
{
"epoch": 2.4402907580477673,
"grad_norm": 1.9891241788864136,
"learning_rate": 3.1052631578947372e-06,
"loss": 0.0858,
"step": 7050
},
{
"epoch": 2.448944271374178,
"grad_norm": 1.729161024093628,
"learning_rate": 3.078947368421053e-06,
"loss": 0.0792,
"step": 7075
},
{
"epoch": 2.4575977847005883,
"grad_norm": 2.3100719451904297,
"learning_rate": 3.052631578947369e-06,
"loss": 0.0773,
"step": 7100
},
{
"epoch": 2.466251298026999,
"grad_norm": 1.6781518459320068,
"learning_rate": 3.0263157894736843e-06,
"loss": 0.0862,
"step": 7125
},
{
"epoch": 2.4749048113534093,
"grad_norm": 2.458012342453003,
"learning_rate": 3e-06,
"loss": 0.0901,
"step": 7150
},
{
"epoch": 2.48355832467982,
"grad_norm": 2.350067377090454,
"learning_rate": 2.973684210526316e-06,
"loss": 0.0859,
"step": 7175
},
{
"epoch": 2.4922118380062304,
"grad_norm": 2.4465065002441406,
"learning_rate": 2.9473684210526317e-06,
"loss": 0.0793,
"step": 7200
},
{
"epoch": 2.5008653513326413,
"grad_norm": 2.160317897796631,
"learning_rate": 2.921052631578948e-06,
"loss": 0.0847,
"step": 7225
},
{
"epoch": 2.509518864659052,
"grad_norm": 1.6664947271347046,
"learning_rate": 2.8947368421052634e-06,
"loss": 0.0941,
"step": 7250
},
{
"epoch": 2.5181723779854623,
"grad_norm": 2.054807424545288,
"learning_rate": 2.868421052631579e-06,
"loss": 0.0811,
"step": 7275
},
{
"epoch": 2.526825891311873,
"grad_norm": 2.8626656532287598,
"learning_rate": 2.842105263157895e-06,
"loss": 0.0893,
"step": 7300
},
{
"epoch": 2.5354794046382834,
"grad_norm": 2.172909736633301,
"learning_rate": 2.815789473684211e-06,
"loss": 0.0828,
"step": 7325
},
{
"epoch": 2.544132917964694,
"grad_norm": 2.266849994659424,
"learning_rate": 2.789473684210526e-06,
"loss": 0.0763,
"step": 7350
},
{
"epoch": 2.5527864312911044,
"grad_norm": 1.8911911249160767,
"learning_rate": 2.7631578947368424e-06,
"loss": 0.0839,
"step": 7375
},
{
"epoch": 2.561439944617515,
"grad_norm": 2.2773964405059814,
"learning_rate": 2.7368421052631583e-06,
"loss": 0.0773,
"step": 7400
},
{
"epoch": 2.5700934579439254,
"grad_norm": 1.95570707321167,
"learning_rate": 2.710526315789474e-06,
"loss": 0.0835,
"step": 7425
},
{
"epoch": 2.578746971270336,
"grad_norm": 1.5412108898162842,
"learning_rate": 2.68421052631579e-06,
"loss": 0.0847,
"step": 7450
},
{
"epoch": 2.5874004845967464,
"grad_norm": 2.286245107650757,
"learning_rate": 2.6578947368421053e-06,
"loss": 0.0893,
"step": 7475
},
{
"epoch": 2.596053997923157,
"grad_norm": 2.308685779571533,
"learning_rate": 2.631578947368421e-06,
"loss": 0.0785,
"step": 7500
},
{
"epoch": 2.596053997923157,
"eval_loss": 0.2088063508272171,
"eval_runtime": 6812.1745,
"eval_samples_per_second": 1.508,
"eval_steps_per_second": 0.094,
"eval_wer": 16.74634129504349,
"step": 7500
},
{
"epoch": 2.6047075112495675,
"grad_norm": 2.2219200134277344,
"learning_rate": 2.605263157894737e-06,
"loss": 0.0774,
"step": 7525
},
{
"epoch": 2.613361024575978,
"grad_norm": 3.492546558380127,
"learning_rate": 2.578947368421053e-06,
"loss": 0.0852,
"step": 7550
},
{
"epoch": 2.6220145379023885,
"grad_norm": 1.7338074445724487,
"learning_rate": 2.552631578947369e-06,
"loss": 0.0825,
"step": 7575
},
{
"epoch": 2.630668051228799,
"grad_norm": 2.582502603530884,
"learning_rate": 2.5263157894736844e-06,
"loss": 0.0758,
"step": 7600
},
{
"epoch": 2.6393215645552095,
"grad_norm": 3.0539708137512207,
"learning_rate": 2.5e-06,
"loss": 0.0943,
"step": 7625
},
{
"epoch": 2.64797507788162,
"grad_norm": 2.1495137214660645,
"learning_rate": 2.473684210526316e-06,
"loss": 0.0817,
"step": 7650
},
{
"epoch": 2.6566285912080305,
"grad_norm": 1.9127858877182007,
"learning_rate": 2.447368421052632e-06,
"loss": 0.0867,
"step": 7675
},
{
"epoch": 2.665282104534441,
"grad_norm": 1.831830382347107,
"learning_rate": 2.4210526315789477e-06,
"loss": 0.0768,
"step": 7700
},
{
"epoch": 2.6739356178608515,
"grad_norm": 1.5215665102005005,
"learning_rate": 2.3947368421052635e-06,
"loss": 0.0862,
"step": 7725
},
{
"epoch": 2.682589131187262,
"grad_norm": 2.2080304622650146,
"learning_rate": 2.368421052631579e-06,
"loss": 0.0876,
"step": 7750
},
{
"epoch": 2.6912426445136726,
"grad_norm": 3.0281341075897217,
"learning_rate": 2.342105263157895e-06,
"loss": 0.0815,
"step": 7775
},
{
"epoch": 2.699896157840083,
"grad_norm": 2.798521041870117,
"learning_rate": 2.3157894736842105e-06,
"loss": 0.0965,
"step": 7800
},
{
"epoch": 2.7085496711664936,
"grad_norm": 2.5977230072021484,
"learning_rate": 2.2894736842105263e-06,
"loss": 0.0878,
"step": 7825
},
{
"epoch": 2.717203184492904,
"grad_norm": 2.576024055480957,
"learning_rate": 2.2631578947368426e-06,
"loss": 0.0848,
"step": 7850
},
{
"epoch": 2.7258566978193146,
"grad_norm": 2.787841796875,
"learning_rate": 2.236842105263158e-06,
"loss": 0.0867,
"step": 7875
},
{
"epoch": 2.734510211145725,
"grad_norm": 3.0872480869293213,
"learning_rate": 2.2105263157894738e-06,
"loss": 0.0931,
"step": 7900
},
{
"epoch": 2.7431637244721356,
"grad_norm": 1.9903494119644165,
"learning_rate": 2.1842105263157896e-06,
"loss": 0.0824,
"step": 7925
},
{
"epoch": 2.751817237798546,
"grad_norm": 1.7111889123916626,
"learning_rate": 2.1578947368421054e-06,
"loss": 0.0828,
"step": 7950
},
{
"epoch": 2.7604707511249567,
"grad_norm": 2.4591681957244873,
"learning_rate": 2.1315789473684212e-06,
"loss": 0.0847,
"step": 7975
},
{
"epoch": 2.769124264451367,
"grad_norm": 1.9559268951416016,
"learning_rate": 2.105263157894737e-06,
"loss": 0.072,
"step": 8000
},
{
"epoch": 2.769124264451367,
"eval_loss": 0.20778915286064148,
"eval_runtime": 6824.3281,
"eval_samples_per_second": 1.505,
"eval_steps_per_second": 0.094,
"eval_wer": 16.5470109070827,
"step": 8000
},
{
"epoch": 2.7777777777777777,
"grad_norm": 2.040600299835205,
"learning_rate": 2.078947368421053e-06,
"loss": 0.0878,
"step": 8025
},
{
"epoch": 2.786431291104188,
"grad_norm": 2.3346126079559326,
"learning_rate": 2.0526315789473687e-06,
"loss": 0.0811,
"step": 8050
},
{
"epoch": 2.7950848044305987,
"grad_norm": 2.295974016189575,
"learning_rate": 2.026315789473684e-06,
"loss": 0.0793,
"step": 8075
},
{
"epoch": 2.803738317757009,
"grad_norm": 1.8700435161590576,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0791,
"step": 8100
},
{
"epoch": 2.8123918310834197,
"grad_norm": 1.5720834732055664,
"learning_rate": 1.973684210526316e-06,
"loss": 0.0768,
"step": 8125
},
{
"epoch": 2.8210453444098302,
"grad_norm": 2.1670758724212646,
"learning_rate": 1.9473684210526315e-06,
"loss": 0.075,
"step": 8150
},
{
"epoch": 2.8296988577362407,
"grad_norm": 2.925609588623047,
"learning_rate": 1.9210526315789474e-06,
"loss": 0.0752,
"step": 8175
},
{
"epoch": 2.8383523710626513,
"grad_norm": 2.2166748046875,
"learning_rate": 1.8947368421052634e-06,
"loss": 0.0924,
"step": 8200
},
{
"epoch": 2.8470058843890618,
"grad_norm": 2.2777693271636963,
"learning_rate": 1.868421052631579e-06,
"loss": 0.0868,
"step": 8225
},
{
"epoch": 2.8556593977154723,
"grad_norm": 2.00960111618042,
"learning_rate": 1.8421052631578948e-06,
"loss": 0.0696,
"step": 8250
},
{
"epoch": 2.864312911041883,
"grad_norm": 1.7867509126663208,
"learning_rate": 1.8157894736842109e-06,
"loss": 0.0756,
"step": 8275
},
{
"epoch": 2.8729664243682933,
"grad_norm": 1.8508473634719849,
"learning_rate": 1.7894736842105265e-06,
"loss": 0.0887,
"step": 8300
},
{
"epoch": 2.881619937694704,
"grad_norm": 2.443462371826172,
"learning_rate": 1.7631578947368423e-06,
"loss": 0.0819,
"step": 8325
},
{
"epoch": 2.8902734510211143,
"grad_norm": 1.782518982887268,
"learning_rate": 1.736842105263158e-06,
"loss": 0.077,
"step": 8350
},
{
"epoch": 2.898926964347525,
"grad_norm": 2.163900852203369,
"learning_rate": 1.710526315789474e-06,
"loss": 0.0806,
"step": 8375
},
{
"epoch": 2.9075804776739353,
"grad_norm": 2.4447150230407715,
"learning_rate": 1.6842105263157895e-06,
"loss": 0.0807,
"step": 8400
},
{
"epoch": 2.9162339910003463,
"grad_norm": 2.5183095932006836,
"learning_rate": 1.6578947368421053e-06,
"loss": 0.0843,
"step": 8425
},
{
"epoch": 2.924887504326757,
"grad_norm": 1.8049046993255615,
"learning_rate": 1.6315789473684212e-06,
"loss": 0.0841,
"step": 8450
},
{
"epoch": 2.9335410176531673,
"grad_norm": 2.3873138427734375,
"learning_rate": 1.605263157894737e-06,
"loss": 0.0892,
"step": 8475
},
{
"epoch": 2.942194530979578,
"grad_norm": 3.381866693496704,
"learning_rate": 1.5789473684210526e-06,
"loss": 0.086,
"step": 8500
},
{
"epoch": 2.942194530979578,
"eval_loss": 0.20743827521800995,
"eval_runtime": 6784.8177,
"eval_samples_per_second": 1.514,
"eval_steps_per_second": 0.095,
"eval_wer": 16.289003175479774,
"step": 8500
},
{
"epoch": 2.9508480443059883,
"grad_norm": 1.9069478511810303,
"learning_rate": 1.5526315789473686e-06,
"loss": 0.0715,
"step": 8525
},
{
"epoch": 2.959501557632399,
"grad_norm": 2.0899953842163086,
"learning_rate": 1.5263157894736844e-06,
"loss": 0.0782,
"step": 8550
},
{
"epoch": 2.9681550709588094,
"grad_norm": 2.9195377826690674,
"learning_rate": 1.5e-06,
"loss": 0.0875,
"step": 8575
},
{
"epoch": 2.97680858428522,
"grad_norm": 2.9911153316497803,
"learning_rate": 1.4736842105263159e-06,
"loss": 0.082,
"step": 8600
},
{
"epoch": 2.9854620976116304,
"grad_norm": 2.4869954586029053,
"learning_rate": 1.4473684210526317e-06,
"loss": 0.0798,
"step": 8625
},
{
"epoch": 2.994115610938041,
"grad_norm": 2.3882336616516113,
"learning_rate": 1.4210526315789475e-06,
"loss": 0.0711,
"step": 8650
},
{
"epoch": 3.0027691242644514,
"grad_norm": 1.6410326957702637,
"learning_rate": 1.394736842105263e-06,
"loss": 0.0807,
"step": 8675
},
{
"epoch": 3.011422637590862,
"grad_norm": 0.9989892244338989,
"learning_rate": 1.3684210526315791e-06,
"loss": 0.0455,
"step": 8700
},
{
"epoch": 3.0200761509172724,
"grad_norm": 1.602900743484497,
"learning_rate": 1.342105263157895e-06,
"loss": 0.0462,
"step": 8725
},
{
"epoch": 3.028729664243683,
"grad_norm": 1.4966108798980713,
"learning_rate": 1.3157894736842106e-06,
"loss": 0.0464,
"step": 8750
},
{
"epoch": 3.0373831775700935,
"grad_norm": 1.798976182937622,
"learning_rate": 1.2894736842105266e-06,
"loss": 0.0452,
"step": 8775
},
{
"epoch": 3.046036690896504,
"grad_norm": 1.5227916240692139,
"learning_rate": 1.2631578947368422e-06,
"loss": 0.0515,
"step": 8800
},
{
"epoch": 3.0546902042229145,
"grad_norm": 1.3021562099456787,
"learning_rate": 1.236842105263158e-06,
"loss": 0.0486,
"step": 8825
},
{
"epoch": 3.063343717549325,
"grad_norm": 1.3772845268249512,
"learning_rate": 1.2105263157894738e-06,
"loss": 0.0451,
"step": 8850
},
{
"epoch": 3.0719972308757355,
"grad_norm": 2.208716630935669,
"learning_rate": 1.1842105263157894e-06,
"loss": 0.0498,
"step": 8875
},
{
"epoch": 3.080650744202146,
"grad_norm": 1.7014740705490112,
"learning_rate": 1.1578947368421053e-06,
"loss": 0.0446,
"step": 8900
},
{
"epoch": 3.0893042575285565,
"grad_norm": 1.7459232807159424,
"learning_rate": 1.1315789473684213e-06,
"loss": 0.0442,
"step": 8925
},
{
"epoch": 3.097957770854967,
"grad_norm": 1.3665467500686646,
"learning_rate": 1.1052631578947369e-06,
"loss": 0.051,
"step": 8950
},
{
"epoch": 3.1066112841813776,
"grad_norm": 1.5824533700942993,
"learning_rate": 1.0789473684210527e-06,
"loss": 0.052,
"step": 8975
},
{
"epoch": 3.115264797507788,
"grad_norm": 1.0504260063171387,
"learning_rate": 1.0526315789473685e-06,
"loss": 0.0463,
"step": 9000
},
{
"epoch": 3.115264797507788,
"eval_loss": 0.2164984941482544,
"eval_runtime": 6827.9492,
"eval_samples_per_second": 1.504,
"eval_steps_per_second": 0.094,
"eval_wer": 16.52112384371117,
"step": 9000
},
{
"epoch": 3.1239183108341986,
"grad_norm": 2.2462034225463867,
"learning_rate": 1.0263157894736843e-06,
"loss": 0.0492,
"step": 9025
},
{
"epoch": 3.132571824160609,
"grad_norm": 1.408866286277771,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.0483,
"step": 9050
},
{
"epoch": 3.1412253374870196,
"grad_norm": 1.656809687614441,
"learning_rate": 9.736842105263158e-07,
"loss": 0.0516,
"step": 9075
},
{
"epoch": 3.14987885081343,
"grad_norm": 1.4440449476242065,
"learning_rate": 9.473684210526317e-07,
"loss": 0.0497,
"step": 9100
},
{
"epoch": 3.1585323641398406,
"grad_norm": 1.7022453546524048,
"learning_rate": 9.210526315789474e-07,
"loss": 0.0467,
"step": 9125
},
{
"epoch": 3.167185877466251,
"grad_norm": 1.5018113851547241,
"learning_rate": 8.947368421052632e-07,
"loss": 0.0476,
"step": 9150
},
{
"epoch": 3.1758393907926616,
"grad_norm": 1.9194047451019287,
"learning_rate": 8.68421052631579e-07,
"loss": 0.0487,
"step": 9175
},
{
"epoch": 3.184492904119072,
"grad_norm": 1.4882956743240356,
"learning_rate": 8.421052631578948e-07,
"loss": 0.0531,
"step": 9200
},
{
"epoch": 3.1931464174454827,
"grad_norm": 1.4099938869476318,
"learning_rate": 8.157894736842106e-07,
"loss": 0.048,
"step": 9225
},
{
"epoch": 3.2017999307718936,
"grad_norm": 1.5829529762268066,
"learning_rate": 7.894736842105263e-07,
"loss": 0.0503,
"step": 9250
},
{
"epoch": 3.210453444098304,
"grad_norm": 1.280104160308838,
"learning_rate": 7.631578947368422e-07,
"loss": 0.0439,
"step": 9275
},
{
"epoch": 3.2191069574247146,
"grad_norm": 1.5087403059005737,
"learning_rate": 7.368421052631579e-07,
"loss": 0.0486,
"step": 9300
},
{
"epoch": 3.227760470751125,
"grad_norm": 1.830002784729004,
"learning_rate": 7.105263157894737e-07,
"loss": 0.0497,
"step": 9325
},
{
"epoch": 3.2364139840775357,
"grad_norm": 0.9852591156959534,
"learning_rate": 6.842105263157896e-07,
"loss": 0.0526,
"step": 9350
},
{
"epoch": 3.245067497403946,
"grad_norm": 1.8316926956176758,
"learning_rate": 6.578947368421053e-07,
"loss": 0.0517,
"step": 9375
},
{
"epoch": 3.2537210107303567,
"grad_norm": 1.5552564859390259,
"learning_rate": 6.315789473684211e-07,
"loss": 0.0506,
"step": 9400
},
{
"epoch": 3.262374524056767,
"grad_norm": 1.0861077308654785,
"learning_rate": 6.052631578947369e-07,
"loss": 0.0442,
"step": 9425
},
{
"epoch": 3.2710280373831777,
"grad_norm": 1.3339163064956665,
"learning_rate": 5.789473684210526e-07,
"loss": 0.0491,
"step": 9450
},
{
"epoch": 3.2796815507095882,
"grad_norm": 1.8896433115005493,
"learning_rate": 5.526315789473684e-07,
"loss": 0.0428,
"step": 9475
},
{
"epoch": 3.2883350640359987,
"grad_norm": 2.0069425106048584,
"learning_rate": 5.263157894736843e-07,
"loss": 0.047,
"step": 9500
},
{
"epoch": 3.2883350640359987,
"eval_loss": 0.2176118642091751,
"eval_runtime": 6817.142,
"eval_samples_per_second": 1.507,
"eval_steps_per_second": 0.094,
"eval_wer": 16.54269639652078,
"step": 9500
},
{
"epoch": 3.2969885773624092,
"grad_norm": 1.5549033880233765,
"learning_rate": 5.000000000000001e-07,
"loss": 0.0437,
"step": 9525
},
{
"epoch": 3.3056420906888198,
"grad_norm": 1.2936606407165527,
"learning_rate": 4.7368421052631585e-07,
"loss": 0.048,
"step": 9550
},
{
"epoch": 3.3142956040152303,
"grad_norm": 1.3265544176101685,
"learning_rate": 4.473684210526316e-07,
"loss": 0.0416,
"step": 9575
},
{
"epoch": 3.322949117341641,
"grad_norm": 2.265376567840576,
"learning_rate": 4.210526315789474e-07,
"loss": 0.0462,
"step": 9600
},
{
"epoch": 3.3316026306680513,
"grad_norm": 1.4310107231140137,
"learning_rate": 3.9473684210526315e-07,
"loss": 0.0447,
"step": 9625
},
{
"epoch": 3.340256143994462,
"grad_norm": 2.204995632171631,
"learning_rate": 3.6842105263157896e-07,
"loss": 0.0492,
"step": 9650
},
{
"epoch": 3.3489096573208723,
"grad_norm": 1.6492983102798462,
"learning_rate": 3.421052631578948e-07,
"loss": 0.0477,
"step": 9675
},
{
"epoch": 3.357563170647283,
"grad_norm": 1.491167426109314,
"learning_rate": 3.1578947368421055e-07,
"loss": 0.0472,
"step": 9700
},
{
"epoch": 3.3662166839736933,
"grad_norm": 1.4204721450805664,
"learning_rate": 2.894736842105263e-07,
"loss": 0.0483,
"step": 9725
},
{
"epoch": 3.374870197300104,
"grad_norm": 1.7954652309417725,
"learning_rate": 2.6315789473684213e-07,
"loss": 0.0441,
"step": 9750
},
{
"epoch": 3.3835237106265144,
"grad_norm": 1.4561618566513062,
"learning_rate": 2.3684210526315792e-07,
"loss": 0.0477,
"step": 9775
},
{
"epoch": 3.392177223952925,
"grad_norm": 1.0472415685653687,
"learning_rate": 2.105263157894737e-07,
"loss": 0.0466,
"step": 9800
},
{
"epoch": 3.4008307372793354,
"grad_norm": 1.8670166730880737,
"learning_rate": 1.8421052631578948e-07,
"loss": 0.047,
"step": 9825
},
{
"epoch": 3.409484250605746,
"grad_norm": 2.0903148651123047,
"learning_rate": 1.5789473684210527e-07,
"loss": 0.0501,
"step": 9850
},
{
"epoch": 3.4181377639321564,
"grad_norm": 1.7378038167953491,
"learning_rate": 1.3157894736842107e-07,
"loss": 0.0499,
"step": 9875
},
{
"epoch": 3.426791277258567,
"grad_norm": 1.3576021194458008,
"learning_rate": 1.0526315789473685e-07,
"loss": 0.0485,
"step": 9900
},
{
"epoch": 3.4354447905849774,
"grad_norm": 1.5308172702789307,
"learning_rate": 7.894736842105264e-08,
"loss": 0.054,
"step": 9925
},
{
"epoch": 3.444098303911388,
"grad_norm": 1.8448562622070312,
"learning_rate": 5.263157894736842e-08,
"loss": 0.0525,
"step": 9950
},
{
"epoch": 3.4527518172377984,
"grad_norm": 2.0659971237182617,
"learning_rate": 2.631578947368421e-08,
"loss": 0.0476,
"step": 9975
},
{
"epoch": 3.461405330564209,
"grad_norm": 1.5937784910202026,
"learning_rate": 0.0,
"loss": 0.0471,
"step": 10000
},
{
"epoch": 3.461405330564209,
"eval_loss": 0.21720539033412933,
"eval_runtime": 6798.3915,
"eval_samples_per_second": 1.511,
"eval_steps_per_second": 0.094,
"eval_wer": 16.571172166229463,
"step": 10000
}
],
"logging_steps": 25,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.265935704064e+20,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}