whisper-small-LDC-V1 / trainer_state.json
thak123's picture
Upload 10 files
bed066c
raw
history blame
58.9 kB
{
"best_metric": 17.449157344638696,
"best_model_checkpoint": "./openai/whisper-small-gom-LDC-v1.0/checkpoint-11000",
"epoch": 2.846790890269151,
"global_step": 11000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 3.36e-07,
"loss": 2.8574,
"step": 25
},
{
"epoch": 0.01,
"learning_rate": 7.359999999999999e-07,
"loss": 2.3034,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 1.1359999999999998e-06,
"loss": 1.5867,
"step": 75
},
{
"epoch": 0.03,
"learning_rate": 1.536e-06,
"loss": 1.0482,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 1.9359999999999998e-06,
"loss": 0.8649,
"step": 125
},
{
"epoch": 0.04,
"learning_rate": 2.3359999999999997e-06,
"loss": 0.7442,
"step": 150
},
{
"epoch": 0.05,
"learning_rate": 2.736e-06,
"loss": 0.7494,
"step": 175
},
{
"epoch": 0.05,
"learning_rate": 3.136e-06,
"loss": 0.6801,
"step": 200
},
{
"epoch": 0.06,
"learning_rate": 3.5359999999999997e-06,
"loss": 0.6158,
"step": 225
},
{
"epoch": 0.06,
"learning_rate": 3.936e-06,
"loss": 0.6206,
"step": 250
},
{
"epoch": 0.07,
"learning_rate": 4.3360000000000005e-06,
"loss": 0.5527,
"step": 275
},
{
"epoch": 0.08,
"learning_rate": 4.735999999999999e-06,
"loss": 0.5373,
"step": 300
},
{
"epoch": 0.08,
"learning_rate": 5.136e-06,
"loss": 0.4616,
"step": 325
},
{
"epoch": 0.09,
"learning_rate": 5.535999999999999e-06,
"loss": 0.4252,
"step": 350
},
{
"epoch": 0.1,
"learning_rate": 5.936e-06,
"loss": 0.3137,
"step": 375
},
{
"epoch": 0.1,
"learning_rate": 6.336e-06,
"loss": 0.2815,
"step": 400
},
{
"epoch": 0.11,
"learning_rate": 6.7359999999999995e-06,
"loss": 0.2738,
"step": 425
},
{
"epoch": 0.12,
"learning_rate": 7.136e-06,
"loss": 0.3058,
"step": 450
},
{
"epoch": 0.12,
"learning_rate": 7.5359999999999995e-06,
"loss": 0.2604,
"step": 475
},
{
"epoch": 0.13,
"learning_rate": 7.936e-06,
"loss": 0.2547,
"step": 500
},
{
"epoch": 0.13,
"eval_loss": 0.2495306134223938,
"eval_runtime": 4903.6311,
"eval_samples_per_second": 2.224,
"eval_steps_per_second": 0.278,
"eval_wer": 61.019581505656184,
"step": 500
},
{
"epoch": 0.14,
"learning_rate": 7.984853948791921e-06,
"loss": 0.2135,
"step": 525
},
{
"epoch": 0.14,
"learning_rate": 7.966822935448972e-06,
"loss": 0.2514,
"step": 550
},
{
"epoch": 0.15,
"learning_rate": 7.948791922106022e-06,
"loss": 0.238,
"step": 575
},
{
"epoch": 0.16,
"learning_rate": 7.930760908763071e-06,
"loss": 0.197,
"step": 600
},
{
"epoch": 0.16,
"learning_rate": 7.912729895420122e-06,
"loss": 0.2452,
"step": 625
},
{
"epoch": 0.17,
"learning_rate": 7.894698882077172e-06,
"loss": 0.2489,
"step": 650
},
{
"epoch": 0.17,
"learning_rate": 7.876667868734223e-06,
"loss": 0.2276,
"step": 675
},
{
"epoch": 0.18,
"learning_rate": 7.858636855391273e-06,
"loss": 0.2024,
"step": 700
},
{
"epoch": 0.19,
"learning_rate": 7.840605842048324e-06,
"loss": 0.2235,
"step": 725
},
{
"epoch": 0.19,
"learning_rate": 7.822574828705373e-06,
"loss": 0.1964,
"step": 750
},
{
"epoch": 0.2,
"learning_rate": 7.804543815362423e-06,
"loss": 0.205,
"step": 775
},
{
"epoch": 0.21,
"learning_rate": 7.786512802019472e-06,
"loss": 0.2044,
"step": 800
},
{
"epoch": 0.21,
"learning_rate": 7.768481788676523e-06,
"loss": 0.1805,
"step": 825
},
{
"epoch": 0.22,
"learning_rate": 7.750450775333573e-06,
"loss": 0.1725,
"step": 850
},
{
"epoch": 0.23,
"learning_rate": 7.732419761990624e-06,
"loss": 0.1888,
"step": 875
},
{
"epoch": 0.23,
"learning_rate": 7.714388748647674e-06,
"loss": 0.1867,
"step": 900
},
{
"epoch": 0.24,
"learning_rate": 7.696357735304725e-06,
"loss": 0.2047,
"step": 925
},
{
"epoch": 0.25,
"learning_rate": 7.678326721961773e-06,
"loss": 0.1637,
"step": 950
},
{
"epoch": 0.25,
"learning_rate": 7.660295708618824e-06,
"loss": 0.1941,
"step": 975
},
{
"epoch": 0.26,
"learning_rate": 7.642264695275874e-06,
"loss": 0.1726,
"step": 1000
},
{
"epoch": 0.26,
"eval_loss": 0.18191906809806824,
"eval_runtime": 5156.4322,
"eval_samples_per_second": 2.115,
"eval_steps_per_second": 0.265,
"eval_wer": 47.49931790039247,
"step": 1000
},
{
"epoch": 0.27,
"learning_rate": 7.624233681932924e-06,
"loss": 0.1846,
"step": 1025
},
{
"epoch": 0.27,
"learning_rate": 7.606202668589975e-06,
"loss": 0.1819,
"step": 1050
},
{
"epoch": 0.28,
"learning_rate": 7.588171655247024e-06,
"loss": 0.1714,
"step": 1075
},
{
"epoch": 0.28,
"learning_rate": 7.570140641904075e-06,
"loss": 0.1706,
"step": 1100
},
{
"epoch": 0.29,
"learning_rate": 7.552109628561125e-06,
"loss": 0.1503,
"step": 1125
},
{
"epoch": 0.3,
"learning_rate": 7.534078615218175e-06,
"loss": 0.1544,
"step": 1150
},
{
"epoch": 0.3,
"learning_rate": 7.5160476018752255e-06,
"loss": 0.1811,
"step": 1175
},
{
"epoch": 0.31,
"learning_rate": 7.498016588532275e-06,
"loss": 0.1715,
"step": 1200
},
{
"epoch": 0.32,
"learning_rate": 7.479985575189326e-06,
"loss": 0.1732,
"step": 1225
},
{
"epoch": 0.32,
"learning_rate": 7.461954561846375e-06,
"loss": 0.1624,
"step": 1250
},
{
"epoch": 0.33,
"learning_rate": 7.443923548503425e-06,
"loss": 0.1469,
"step": 1275
},
{
"epoch": 0.34,
"learning_rate": 7.4258925351604755e-06,
"loss": 0.1764,
"step": 1300
},
{
"epoch": 0.34,
"learning_rate": 7.407861521817526e-06,
"loss": 0.1604,
"step": 1325
},
{
"epoch": 0.35,
"learning_rate": 7.389830508474576e-06,
"loss": 0.1561,
"step": 1350
},
{
"epoch": 0.36,
"learning_rate": 7.371799495131626e-06,
"loss": 0.1661,
"step": 1375
},
{
"epoch": 0.36,
"learning_rate": 7.353768481788676e-06,
"loss": 0.1534,
"step": 1400
},
{
"epoch": 0.37,
"learning_rate": 7.335737468445726e-06,
"loss": 0.1423,
"step": 1425
},
{
"epoch": 0.38,
"learning_rate": 7.317706455102777e-06,
"loss": 0.1806,
"step": 1450
},
{
"epoch": 0.38,
"learning_rate": 7.299675441759827e-06,
"loss": 0.1488,
"step": 1475
},
{
"epoch": 0.39,
"learning_rate": 7.281644428416877e-06,
"loss": 0.1877,
"step": 1500
},
{
"epoch": 0.39,
"eval_loss": 0.1481647789478302,
"eval_runtime": 4672.6288,
"eval_samples_per_second": 2.334,
"eval_steps_per_second": 0.292,
"eval_wer": 40.243876844292394,
"step": 1500
},
{
"epoch": 0.39,
"learning_rate": 7.263613415073928e-06,
"loss": 0.1544,
"step": 1525
},
{
"epoch": 0.4,
"learning_rate": 7.2455824017309764e-06,
"loss": 0.1771,
"step": 1550
},
{
"epoch": 0.41,
"learning_rate": 7.227551388388027e-06,
"loss": 0.141,
"step": 1575
},
{
"epoch": 0.41,
"learning_rate": 7.209520375045077e-06,
"loss": 0.1359,
"step": 1600
},
{
"epoch": 0.42,
"learning_rate": 7.191489361702127e-06,
"loss": 0.1242,
"step": 1625
},
{
"epoch": 0.43,
"learning_rate": 7.173458348359178e-06,
"loss": 0.1281,
"step": 1650
},
{
"epoch": 0.43,
"learning_rate": 7.155427335016227e-06,
"loss": 0.1404,
"step": 1675
},
{
"epoch": 0.44,
"learning_rate": 7.137396321673278e-06,
"loss": 0.1502,
"step": 1700
},
{
"epoch": 0.45,
"learning_rate": 7.119365308330328e-06,
"loss": 0.1348,
"step": 1725
},
{
"epoch": 0.45,
"learning_rate": 7.101334294987378e-06,
"loss": 0.1473,
"step": 1750
},
{
"epoch": 0.46,
"learning_rate": 7.0833032816444285e-06,
"loss": 0.1401,
"step": 1775
},
{
"epoch": 0.47,
"learning_rate": 7.065272268301478e-06,
"loss": 0.1089,
"step": 1800
},
{
"epoch": 0.47,
"learning_rate": 7.047241254958528e-06,
"loss": 0.1416,
"step": 1825
},
{
"epoch": 0.48,
"learning_rate": 7.029210241615578e-06,
"loss": 0.1239,
"step": 1850
},
{
"epoch": 0.49,
"learning_rate": 7.011179228272628e-06,
"loss": 0.133,
"step": 1875
},
{
"epoch": 0.49,
"learning_rate": 6.993148214929679e-06,
"loss": 0.1305,
"step": 1900
},
{
"epoch": 0.5,
"learning_rate": 6.975117201586729e-06,
"loss": 0.1352,
"step": 1925
},
{
"epoch": 0.5,
"learning_rate": 6.957086188243779e-06,
"loss": 0.1567,
"step": 1950
},
{
"epoch": 0.51,
"learning_rate": 6.939055174900829e-06,
"loss": 0.1324,
"step": 1975
},
{
"epoch": 0.52,
"learning_rate": 6.921024161557879e-06,
"loss": 0.1354,
"step": 2000
},
{
"epoch": 0.52,
"eval_loss": 0.12738725543022156,
"eval_runtime": 4615.525,
"eval_samples_per_second": 2.363,
"eval_steps_per_second": 0.296,
"eval_wer": 34.36732637941528,
"step": 2000
},
{
"epoch": 0.52,
"learning_rate": 6.9029931482149295e-06,
"loss": 0.14,
"step": 2025
},
{
"epoch": 0.53,
"learning_rate": 6.88496213487198e-06,
"loss": 0.1316,
"step": 2050
},
{
"epoch": 0.54,
"learning_rate": 6.86693112152903e-06,
"loss": 0.1072,
"step": 2075
},
{
"epoch": 0.54,
"learning_rate": 6.84890010818608e-06,
"loss": 0.108,
"step": 2100
},
{
"epoch": 0.55,
"learning_rate": 6.83086909484313e-06,
"loss": 0.0992,
"step": 2125
},
{
"epoch": 0.56,
"learning_rate": 6.8128380815001795e-06,
"loss": 0.1377,
"step": 2150
},
{
"epoch": 0.56,
"learning_rate": 6.79480706815723e-06,
"loss": 0.1154,
"step": 2175
},
{
"epoch": 0.57,
"learning_rate": 6.77677605481428e-06,
"loss": 0.1272,
"step": 2200
},
{
"epoch": 0.58,
"learning_rate": 6.75874504147133e-06,
"loss": 0.1183,
"step": 2225
},
{
"epoch": 0.58,
"learning_rate": 6.740714028128381e-06,
"loss": 0.1131,
"step": 2250
},
{
"epoch": 0.59,
"learning_rate": 6.72268301478543e-06,
"loss": 0.1208,
"step": 2275
},
{
"epoch": 0.6,
"learning_rate": 6.704652001442481e-06,
"loss": 0.1224,
"step": 2300
},
{
"epoch": 0.6,
"learning_rate": 6.686620988099531e-06,
"loss": 0.1313,
"step": 2325
},
{
"epoch": 0.61,
"learning_rate": 6.668589974756581e-06,
"loss": 0.1284,
"step": 2350
},
{
"epoch": 0.61,
"learning_rate": 6.650558961413632e-06,
"loss": 0.1191,
"step": 2375
},
{
"epoch": 0.62,
"learning_rate": 6.6325279480706804e-06,
"loss": 0.1103,
"step": 2400
},
{
"epoch": 0.63,
"learning_rate": 6.614496934727731e-06,
"loss": 0.1218,
"step": 2425
},
{
"epoch": 0.63,
"learning_rate": 6.5964659213847815e-06,
"loss": 0.1555,
"step": 2450
},
{
"epoch": 0.64,
"learning_rate": 6.578434908041831e-06,
"loss": 0.1221,
"step": 2475
},
{
"epoch": 0.65,
"learning_rate": 6.560403894698882e-06,
"loss": 0.1186,
"step": 2500
},
{
"epoch": 0.65,
"eval_loss": 0.11205455660820007,
"eval_runtime": 4616.7031,
"eval_samples_per_second": 2.363,
"eval_steps_per_second": 0.295,
"eval_wer": 31.03028522257435,
"step": 2500
},
{
"epoch": 0.65,
"learning_rate": 6.542372881355932e-06,
"loss": 0.1156,
"step": 2525
},
{
"epoch": 0.66,
"learning_rate": 6.524341868012982e-06,
"loss": 0.1085,
"step": 2550
},
{
"epoch": 0.67,
"learning_rate": 6.506310854670032e-06,
"loss": 0.1327,
"step": 2575
},
{
"epoch": 0.67,
"learning_rate": 6.488279841327083e-06,
"loss": 0.1383,
"step": 2600
},
{
"epoch": 0.68,
"learning_rate": 6.4702488279841325e-06,
"loss": 0.1128,
"step": 2625
},
{
"epoch": 0.69,
"learning_rate": 6.452217814641183e-06,
"loss": 0.1116,
"step": 2650
},
{
"epoch": 0.69,
"learning_rate": 6.434186801298233e-06,
"loss": 0.1082,
"step": 2675
},
{
"epoch": 0.7,
"learning_rate": 6.416155787955282e-06,
"loss": 0.1235,
"step": 2700
},
{
"epoch": 0.71,
"learning_rate": 6.398124774612333e-06,
"loss": 0.1142,
"step": 2725
},
{
"epoch": 0.71,
"learning_rate": 6.3800937612693826e-06,
"loss": 0.1218,
"step": 2750
},
{
"epoch": 0.72,
"learning_rate": 6.362062747926433e-06,
"loss": 0.1019,
"step": 2775
},
{
"epoch": 0.72,
"learning_rate": 6.344031734583484e-06,
"loss": 0.1024,
"step": 2800
},
{
"epoch": 0.73,
"learning_rate": 6.326000721240533e-06,
"loss": 0.1283,
"step": 2825
},
{
"epoch": 0.74,
"learning_rate": 6.307969707897584e-06,
"loss": 0.0894,
"step": 2850
},
{
"epoch": 0.74,
"learning_rate": 6.2899386945546334e-06,
"loss": 0.1031,
"step": 2875
},
{
"epoch": 0.75,
"learning_rate": 6.271907681211684e-06,
"loss": 0.0935,
"step": 2900
},
{
"epoch": 0.76,
"learning_rate": 6.2538766678687345e-06,
"loss": 0.1256,
"step": 2925
},
{
"epoch": 0.76,
"learning_rate": 6.235845654525784e-06,
"loss": 0.0871,
"step": 2950
},
{
"epoch": 0.77,
"learning_rate": 6.217814641182835e-06,
"loss": 0.099,
"step": 2975
},
{
"epoch": 0.78,
"learning_rate": 6.199783627839884e-06,
"loss": 0.1367,
"step": 3000
},
{
"epoch": 0.78,
"eval_loss": 0.10175555944442749,
"eval_runtime": 4617.2532,
"eval_samples_per_second": 2.362,
"eval_steps_per_second": 0.295,
"eval_wer": 28.322874472684532,
"step": 3000
},
{
"epoch": 0.78,
"learning_rate": 6.181752614496934e-06,
"loss": 0.1023,
"step": 3025
},
{
"epoch": 0.79,
"learning_rate": 6.1637216011539845e-06,
"loss": 0.0861,
"step": 3050
},
{
"epoch": 0.8,
"learning_rate": 6.145690587811034e-06,
"loss": 0.1037,
"step": 3075
},
{
"epoch": 0.8,
"learning_rate": 6.127659574468085e-06,
"loss": 0.098,
"step": 3100
},
{
"epoch": 0.81,
"learning_rate": 6.109628561125135e-06,
"loss": 0.1019,
"step": 3125
},
{
"epoch": 0.82,
"learning_rate": 6.091597547782185e-06,
"loss": 0.0791,
"step": 3150
},
{
"epoch": 0.82,
"learning_rate": 6.073566534439235e-06,
"loss": 0.1013,
"step": 3175
},
{
"epoch": 0.83,
"learning_rate": 6.055535521096286e-06,
"loss": 0.0994,
"step": 3200
},
{
"epoch": 0.83,
"learning_rate": 6.037504507753336e-06,
"loss": 0.0995,
"step": 3225
},
{
"epoch": 0.84,
"learning_rate": 6.019473494410386e-06,
"loss": 0.1041,
"step": 3250
},
{
"epoch": 0.85,
"learning_rate": 6.001442481067435e-06,
"loss": 0.1038,
"step": 3275
},
{
"epoch": 0.85,
"learning_rate": 5.9834114677244854e-06,
"loss": 0.0966,
"step": 3300
},
{
"epoch": 0.86,
"learning_rate": 5.965380454381536e-06,
"loss": 0.0625,
"step": 3325
},
{
"epoch": 0.87,
"learning_rate": 5.947349441038586e-06,
"loss": 0.0919,
"step": 3350
},
{
"epoch": 0.87,
"learning_rate": 5.929318427695636e-06,
"loss": 0.0843,
"step": 3375
},
{
"epoch": 0.88,
"learning_rate": 5.911287414352687e-06,
"loss": 0.0989,
"step": 3400
},
{
"epoch": 0.89,
"learning_rate": 5.893256401009736e-06,
"loss": 0.0968,
"step": 3425
},
{
"epoch": 0.89,
"learning_rate": 5.875225387666787e-06,
"loss": 0.0817,
"step": 3450
},
{
"epoch": 0.9,
"learning_rate": 5.8571943743238365e-06,
"loss": 0.0778,
"step": 3475
},
{
"epoch": 0.91,
"learning_rate": 5.839163360980887e-06,
"loss": 0.1058,
"step": 3500
},
{
"epoch": 0.91,
"eval_loss": 0.09466591477394104,
"eval_runtime": 4606.7225,
"eval_samples_per_second": 2.368,
"eval_steps_per_second": 0.296,
"eval_wer": 27.83805905933217,
"step": 3500
},
{
"epoch": 0.91,
"learning_rate": 5.8211323476379375e-06,
"loss": 0.1071,
"step": 3525
},
{
"epoch": 0.92,
"learning_rate": 5.803101334294987e-06,
"loss": 0.088,
"step": 3550
},
{
"epoch": 0.93,
"learning_rate": 5.785070320952037e-06,
"loss": 0.0747,
"step": 3575
},
{
"epoch": 0.93,
"learning_rate": 5.767039307609087e-06,
"loss": 0.0716,
"step": 3600
},
{
"epoch": 0.94,
"learning_rate": 5.749008294266137e-06,
"loss": 0.0809,
"step": 3625
},
{
"epoch": 0.94,
"learning_rate": 5.730977280923188e-06,
"loss": 0.0824,
"step": 3650
},
{
"epoch": 0.95,
"learning_rate": 5.712946267580237e-06,
"loss": 0.0794,
"step": 3675
},
{
"epoch": 0.96,
"learning_rate": 5.694915254237288e-06,
"loss": 0.0896,
"step": 3700
},
{
"epoch": 0.96,
"learning_rate": 5.676884240894338e-06,
"loss": 0.0888,
"step": 3725
},
{
"epoch": 0.97,
"learning_rate": 5.658853227551388e-06,
"loss": 0.0932,
"step": 3750
},
{
"epoch": 0.98,
"learning_rate": 5.6408222142084385e-06,
"loss": 0.1099,
"step": 3775
},
{
"epoch": 0.98,
"learning_rate": 5.622791200865489e-06,
"loss": 0.0872,
"step": 3800
},
{
"epoch": 0.99,
"learning_rate": 5.604760187522539e-06,
"loss": 0.0971,
"step": 3825
},
{
"epoch": 1.0,
"learning_rate": 5.586729174179589e-06,
"loss": 0.0888,
"step": 3850
},
{
"epoch": 1.0,
"learning_rate": 5.568698160836638e-06,
"loss": 0.074,
"step": 3875
},
{
"epoch": 1.01,
"learning_rate": 5.5506671474936885e-06,
"loss": 0.0521,
"step": 3900
},
{
"epoch": 1.02,
"learning_rate": 5.532636134150739e-06,
"loss": 0.0707,
"step": 3925
},
{
"epoch": 1.02,
"learning_rate": 5.514605120807789e-06,
"loss": 0.0581,
"step": 3950
},
{
"epoch": 1.03,
"learning_rate": 5.496574107464839e-06,
"loss": 0.0725,
"step": 3975
},
{
"epoch": 1.04,
"learning_rate": 5.47854309412189e-06,
"loss": 0.0807,
"step": 4000
},
{
"epoch": 1.04,
"eval_loss": 0.08684225380420685,
"eval_runtime": 4613.5658,
"eval_samples_per_second": 2.364,
"eval_steps_per_second": 0.296,
"eval_wer": 24.383486893193695,
"step": 4000
},
{
"epoch": 1.04,
"learning_rate": 5.460512080778939e-06,
"loss": 0.0662,
"step": 4025
},
{
"epoch": 1.05,
"learning_rate": 5.44248106743599e-06,
"loss": 0.0821,
"step": 4050
},
{
"epoch": 1.05,
"learning_rate": 5.4244500540930396e-06,
"loss": 0.0688,
"step": 4075
},
{
"epoch": 1.06,
"learning_rate": 5.40641904075009e-06,
"loss": 0.0563,
"step": 4100
},
{
"epoch": 1.07,
"learning_rate": 5.388388027407141e-06,
"loss": 0.0651,
"step": 4125
},
{
"epoch": 1.07,
"learning_rate": 5.370357014064189e-06,
"loss": 0.0619,
"step": 4150
},
{
"epoch": 1.08,
"learning_rate": 5.35232600072124e-06,
"loss": 0.0745,
"step": 4175
},
{
"epoch": 1.09,
"learning_rate": 5.3342949873782905e-06,
"loss": 0.0664,
"step": 4200
},
{
"epoch": 1.09,
"learning_rate": 5.31626397403534e-06,
"loss": 0.098,
"step": 4225
},
{
"epoch": 1.1,
"learning_rate": 5.298232960692391e-06,
"loss": 0.0801,
"step": 4250
},
{
"epoch": 1.11,
"learning_rate": 5.280201947349441e-06,
"loss": 0.0575,
"step": 4275
},
{
"epoch": 1.11,
"learning_rate": 5.262170934006491e-06,
"loss": 0.0558,
"step": 4300
},
{
"epoch": 1.12,
"learning_rate": 5.244139920663541e-06,
"loss": 0.0637,
"step": 4325
},
{
"epoch": 1.13,
"learning_rate": 5.226108907320591e-06,
"loss": 0.0697,
"step": 4350
},
{
"epoch": 1.13,
"learning_rate": 5.2080778939776415e-06,
"loss": 0.0576,
"step": 4375
},
{
"epoch": 1.14,
"learning_rate": 5.190046880634692e-06,
"loss": 0.0714,
"step": 4400
},
{
"epoch": 1.15,
"learning_rate": 5.172015867291742e-06,
"loss": 0.0675,
"step": 4425
},
{
"epoch": 1.15,
"learning_rate": 5.153984853948791e-06,
"loss": 0.0555,
"step": 4450
},
{
"epoch": 1.16,
"learning_rate": 5.135953840605842e-06,
"loss": 0.0568,
"step": 4475
},
{
"epoch": 1.16,
"learning_rate": 5.1179228272628916e-06,
"loss": 0.0698,
"step": 4500
},
{
"epoch": 1.16,
"eval_loss": 0.08182203769683838,
"eval_runtime": 4681.2354,
"eval_samples_per_second": 2.33,
"eval_steps_per_second": 0.291,
"eval_wer": 23.162003903708523,
"step": 4500
},
{
"epoch": 1.17,
"learning_rate": 5.099891813919942e-06,
"loss": 0.06,
"step": 4525
},
{
"epoch": 1.18,
"learning_rate": 5.081860800576992e-06,
"loss": 0.0637,
"step": 4550
},
{
"epoch": 1.18,
"learning_rate": 5.063829787234042e-06,
"loss": 0.0519,
"step": 4575
},
{
"epoch": 1.19,
"learning_rate": 5.045798773891093e-06,
"loss": 0.04,
"step": 4600
},
{
"epoch": 1.2,
"learning_rate": 5.0277677605481424e-06,
"loss": 0.0714,
"step": 4625
},
{
"epoch": 1.2,
"learning_rate": 5.009736747205193e-06,
"loss": 0.049,
"step": 4650
},
{
"epoch": 1.21,
"learning_rate": 4.9917057338622435e-06,
"loss": 0.0687,
"step": 4675
},
{
"epoch": 1.22,
"learning_rate": 4.973674720519293e-06,
"loss": 0.0469,
"step": 4700
},
{
"epoch": 1.22,
"learning_rate": 4.955643707176344e-06,
"loss": 0.073,
"step": 4725
},
{
"epoch": 1.23,
"learning_rate": 4.9376126938333925e-06,
"loss": 0.0583,
"step": 4750
},
{
"epoch": 1.24,
"learning_rate": 4.919581680490443e-06,
"loss": 0.0631,
"step": 4775
},
{
"epoch": 1.24,
"learning_rate": 4.9015506671474935e-06,
"loss": 0.0599,
"step": 4800
},
{
"epoch": 1.25,
"learning_rate": 4.883519653804543e-06,
"loss": 0.0597,
"step": 4825
},
{
"epoch": 1.26,
"learning_rate": 4.865488640461594e-06,
"loss": 0.0697,
"step": 4850
},
{
"epoch": 1.26,
"learning_rate": 4.847457627118644e-06,
"loss": 0.0638,
"step": 4875
},
{
"epoch": 1.27,
"learning_rate": 4.829426613775694e-06,
"loss": 0.0492,
"step": 4900
},
{
"epoch": 1.27,
"learning_rate": 4.811395600432744e-06,
"loss": 0.0576,
"step": 4925
},
{
"epoch": 1.28,
"learning_rate": 4.793364587089794e-06,
"loss": 0.0631,
"step": 4950
},
{
"epoch": 1.29,
"learning_rate": 4.775333573746845e-06,
"loss": 0.0616,
"step": 4975
},
{
"epoch": 1.29,
"learning_rate": 4.757302560403895e-06,
"loss": 0.0434,
"step": 5000
},
{
"epoch": 1.29,
"eval_loss": 0.07633961737155914,
"eval_runtime": 4650.315,
"eval_samples_per_second": 2.345,
"eval_steps_per_second": 0.293,
"eval_wer": 22.19027430898063,
"step": 5000
},
{
"epoch": 1.3,
"learning_rate": 4.739271547060944e-06,
"loss": 0.0745,
"step": 5025
},
{
"epoch": 1.31,
"learning_rate": 4.7212405337179944e-06,
"loss": 0.0392,
"step": 5050
},
{
"epoch": 1.31,
"learning_rate": 4.703209520375045e-06,
"loss": 0.0609,
"step": 5075
},
{
"epoch": 1.32,
"learning_rate": 4.685178507032095e-06,
"loss": 0.0632,
"step": 5100
},
{
"epoch": 1.33,
"learning_rate": 4.667147493689145e-06,
"loss": 0.0586,
"step": 5125
},
{
"epoch": 1.33,
"learning_rate": 4.649116480346195e-06,
"loss": 0.0514,
"step": 5150
},
{
"epoch": 1.34,
"learning_rate": 4.631085467003245e-06,
"loss": 0.0732,
"step": 5175
},
{
"epoch": 1.35,
"learning_rate": 4.613054453660296e-06,
"loss": 0.0731,
"step": 5200
},
{
"epoch": 1.35,
"learning_rate": 4.5950234403173455e-06,
"loss": 0.0773,
"step": 5225
},
{
"epoch": 1.36,
"learning_rate": 4.576992426974396e-06,
"loss": 0.0475,
"step": 5250
},
{
"epoch": 1.37,
"learning_rate": 4.5589614136314465e-06,
"loss": 0.0695,
"step": 5275
},
{
"epoch": 1.37,
"learning_rate": 4.540930400288496e-06,
"loss": 0.0624,
"step": 5300
},
{
"epoch": 1.38,
"learning_rate": 4.522899386945546e-06,
"loss": 0.0516,
"step": 5325
},
{
"epoch": 1.38,
"learning_rate": 4.5048683736025955e-06,
"loss": 0.0582,
"step": 5350
},
{
"epoch": 1.39,
"learning_rate": 4.486837360259646e-06,
"loss": 0.0495,
"step": 5375
},
{
"epoch": 1.4,
"learning_rate": 4.4688063469166966e-06,
"loss": 0.0689,
"step": 5400
},
{
"epoch": 1.4,
"learning_rate": 4.450775333573746e-06,
"loss": 0.0634,
"step": 5425
},
{
"epoch": 1.41,
"learning_rate": 4.432744320230797e-06,
"loss": 0.0379,
"step": 5450
},
{
"epoch": 1.42,
"learning_rate": 4.414713306887847e-06,
"loss": 0.0701,
"step": 5475
},
{
"epoch": 1.42,
"learning_rate": 4.396682293544897e-06,
"loss": 0.0536,
"step": 5500
},
{
"epoch": 1.42,
"eval_loss": 0.07404134422540665,
"eval_runtime": 4631.2871,
"eval_samples_per_second": 2.355,
"eval_steps_per_second": 0.295,
"eval_wer": 22.3413856066489,
"step": 5500
},
{
"epoch": 1.43,
"learning_rate": 4.3786512802019475e-06,
"loss": 0.0549,
"step": 5525
},
{
"epoch": 1.44,
"learning_rate": 4.360620266858997e-06,
"loss": 0.0664,
"step": 5550
},
{
"epoch": 1.44,
"learning_rate": 4.342589253516048e-06,
"loss": 0.0526,
"step": 5575
},
{
"epoch": 1.45,
"learning_rate": 4.324558240173098e-06,
"loss": 0.0581,
"step": 5600
},
{
"epoch": 1.46,
"learning_rate": 4.306527226830147e-06,
"loss": 0.0379,
"step": 5625
},
{
"epoch": 1.46,
"learning_rate": 4.2884962134871975e-06,
"loss": 0.079,
"step": 5650
},
{
"epoch": 1.47,
"learning_rate": 4.270465200144248e-06,
"loss": 0.061,
"step": 5675
},
{
"epoch": 1.48,
"learning_rate": 4.252434186801298e-06,
"loss": 0.0581,
"step": 5700
},
{
"epoch": 1.48,
"learning_rate": 4.234403173458348e-06,
"loss": 0.0695,
"step": 5725
},
{
"epoch": 1.49,
"learning_rate": 4.216372160115398e-06,
"loss": 0.047,
"step": 5750
},
{
"epoch": 1.49,
"learning_rate": 4.198341146772448e-06,
"loss": 0.0497,
"step": 5775
},
{
"epoch": 1.5,
"learning_rate": 4.180310133429499e-06,
"loss": 0.0632,
"step": 5800
},
{
"epoch": 1.51,
"learning_rate": 4.1622791200865486e-06,
"loss": 0.0566,
"step": 5825
},
{
"epoch": 1.51,
"learning_rate": 4.144248106743599e-06,
"loss": 0.0582,
"step": 5850
},
{
"epoch": 1.52,
"learning_rate": 4.12621709340065e-06,
"loss": 0.0624,
"step": 5875
},
{
"epoch": 1.53,
"learning_rate": 4.108186080057699e-06,
"loss": 0.0523,
"step": 5900
},
{
"epoch": 1.53,
"learning_rate": 4.090155066714749e-06,
"loss": 0.0498,
"step": 5925
},
{
"epoch": 1.54,
"learning_rate": 4.072124053371799e-06,
"loss": 0.0561,
"step": 5950
},
{
"epoch": 1.55,
"learning_rate": 4.054093040028849e-06,
"loss": 0.0548,
"step": 5975
},
{
"epoch": 1.55,
"learning_rate": 4.0360620266859e-06,
"loss": 0.0403,
"step": 6000
},
{
"epoch": 1.55,
"eval_loss": 0.07143695652484894,
"eval_runtime": 4576.9308,
"eval_samples_per_second": 2.383,
"eval_steps_per_second": 0.298,
"eval_wer": 20.996075303796673,
"step": 6000
},
{
"epoch": 1.56,
"learning_rate": 4.018031013342949e-06,
"loss": 0.047,
"step": 6025
},
{
"epoch": 1.57,
"learning_rate": 4e-06,
"loss": 0.0445,
"step": 6050
},
{
"epoch": 1.57,
"learning_rate": 3.9819689866570495e-06,
"loss": 0.0459,
"step": 6075
},
{
"epoch": 1.58,
"learning_rate": 3.9639379733141e-06,
"loss": 0.042,
"step": 6100
},
{
"epoch": 1.59,
"learning_rate": 3.9459069599711505e-06,
"loss": 0.0513,
"step": 6125
},
{
"epoch": 1.59,
"learning_rate": 3.9278759466282e-06,
"loss": 0.055,
"step": 6150
},
{
"epoch": 1.6,
"learning_rate": 3.909844933285251e-06,
"loss": 0.0557,
"step": 6175
},
{
"epoch": 1.6,
"learning_rate": 3.8918139199423e-06,
"loss": 0.0646,
"step": 6200
},
{
"epoch": 1.61,
"learning_rate": 3.873782906599351e-06,
"loss": 0.0493,
"step": 6225
},
{
"epoch": 1.62,
"learning_rate": 3.8557518932564006e-06,
"loss": 0.0523,
"step": 6250
},
{
"epoch": 1.62,
"learning_rate": 3.837720879913451e-06,
"loss": 0.0515,
"step": 6275
},
{
"epoch": 1.63,
"learning_rate": 3.819689866570502e-06,
"loss": 0.054,
"step": 6300
},
{
"epoch": 1.64,
"learning_rate": 3.8016588532275513e-06,
"loss": 0.0643,
"step": 6325
},
{
"epoch": 1.64,
"learning_rate": 3.7836278398846013e-06,
"loss": 0.0699,
"step": 6350
},
{
"epoch": 1.65,
"learning_rate": 3.7655968265416514e-06,
"loss": 0.0474,
"step": 6375
},
{
"epoch": 1.66,
"learning_rate": 3.747565813198702e-06,
"loss": 0.0359,
"step": 6400
},
{
"epoch": 1.66,
"learning_rate": 3.7295347998557516e-06,
"loss": 0.0587,
"step": 6425
},
{
"epoch": 1.67,
"learning_rate": 3.7115037865128017e-06,
"loss": 0.0615,
"step": 6450
},
{
"epoch": 1.68,
"learning_rate": 3.693472773169852e-06,
"loss": 0.0482,
"step": 6475
},
{
"epoch": 1.68,
"learning_rate": 3.6754417598269023e-06,
"loss": 0.0526,
"step": 6500
},
{
"epoch": 1.68,
"eval_loss": 0.06842584162950516,
"eval_runtime": 4579.0646,
"eval_samples_per_second": 2.382,
"eval_steps_per_second": 0.298,
"eval_wer": 20.52385249858333,
"step": 6500
},
{
"epoch": 1.69,
"learning_rate": 3.6574107464839524e-06,
"loss": 0.0636,
"step": 6525
},
{
"epoch": 1.7,
"learning_rate": 3.6393797331410025e-06,
"loss": 0.0671,
"step": 6550
},
{
"epoch": 1.7,
"learning_rate": 3.621348719798052e-06,
"loss": 0.0443,
"step": 6575
},
{
"epoch": 1.71,
"learning_rate": 3.6033177064551027e-06,
"loss": 0.0484,
"step": 6600
},
{
"epoch": 1.71,
"learning_rate": 3.5852866931121528e-06,
"loss": 0.0734,
"step": 6625
},
{
"epoch": 1.72,
"learning_rate": 3.567255679769203e-06,
"loss": 0.0705,
"step": 6650
},
{
"epoch": 1.73,
"learning_rate": 3.549224666426253e-06,
"loss": 0.0559,
"step": 6675
},
{
"epoch": 1.73,
"learning_rate": 3.5311936530833035e-06,
"loss": 0.0569,
"step": 6700
},
{
"epoch": 1.74,
"learning_rate": 3.513162639740353e-06,
"loss": 0.0457,
"step": 6725
},
{
"epoch": 1.75,
"learning_rate": 3.4951316263974032e-06,
"loss": 0.0558,
"step": 6750
},
{
"epoch": 1.75,
"learning_rate": 3.4771006130544533e-06,
"loss": 0.0449,
"step": 6775
},
{
"epoch": 1.76,
"learning_rate": 3.459069599711504e-06,
"loss": 0.0493,
"step": 6800
},
{
"epoch": 1.77,
"learning_rate": 3.441038586368554e-06,
"loss": 0.0608,
"step": 6825
},
{
"epoch": 1.77,
"learning_rate": 3.423007573025604e-06,
"loss": 0.0444,
"step": 6850
},
{
"epoch": 1.78,
"learning_rate": 3.4049765596826537e-06,
"loss": 0.0569,
"step": 6875
},
{
"epoch": 1.79,
"learning_rate": 3.3869455463397042e-06,
"loss": 0.0657,
"step": 6900
},
{
"epoch": 1.79,
"learning_rate": 3.3689145329967543e-06,
"loss": 0.0662,
"step": 6925
},
{
"epoch": 1.8,
"learning_rate": 3.3508835196538044e-06,
"loss": 0.0561,
"step": 6950
},
{
"epoch": 1.81,
"learning_rate": 3.3328525063108545e-06,
"loss": 0.0518,
"step": 6975
},
{
"epoch": 1.81,
"learning_rate": 3.314821492967905e-06,
"loss": 0.0505,
"step": 7000
},
{
"epoch": 1.81,
"eval_loss": 0.06526884436607361,
"eval_runtime": 4621.9984,
"eval_samples_per_second": 2.36,
"eval_steps_per_second": 0.295,
"eval_wer": 19.052616114340882,
"step": 7000
},
{
"epoch": 1.82,
"learning_rate": 3.2967904796249547e-06,
"loss": 0.0534,
"step": 7025
},
{
"epoch": 1.82,
"learning_rate": 3.2787594662820048e-06,
"loss": 0.0494,
"step": 7050
},
{
"epoch": 1.83,
"learning_rate": 3.260728452939055e-06,
"loss": 0.036,
"step": 7075
},
{
"epoch": 1.84,
"learning_rate": 3.2426974395961054e-06,
"loss": 0.0541,
"step": 7100
},
{
"epoch": 1.84,
"learning_rate": 3.2246664262531555e-06,
"loss": 0.0473,
"step": 7125
},
{
"epoch": 1.85,
"learning_rate": 3.206635412910205e-06,
"loss": 0.0508,
"step": 7150
},
{
"epoch": 1.86,
"learning_rate": 3.1886043995672552e-06,
"loss": 0.0481,
"step": 7175
},
{
"epoch": 1.86,
"learning_rate": 3.1705733862243058e-06,
"loss": 0.057,
"step": 7200
},
{
"epoch": 1.87,
"learning_rate": 3.152542372881356e-06,
"loss": 0.0283,
"step": 7225
},
{
"epoch": 1.88,
"learning_rate": 3.134511359538406e-06,
"loss": 0.0436,
"step": 7250
},
{
"epoch": 1.88,
"learning_rate": 3.116480346195456e-06,
"loss": 0.0687,
"step": 7275
},
{
"epoch": 1.89,
"learning_rate": 3.098449332852506e-06,
"loss": 0.0441,
"step": 7300
},
{
"epoch": 1.9,
"learning_rate": 3.0804183195095562e-06,
"loss": 0.0504,
"step": 7325
},
{
"epoch": 1.9,
"learning_rate": 3.0623873061666063e-06,
"loss": 0.0384,
"step": 7350
},
{
"epoch": 1.91,
"learning_rate": 3.0443562928236564e-06,
"loss": 0.04,
"step": 7375
},
{
"epoch": 1.92,
"learning_rate": 3.026325279480707e-06,
"loss": 0.0482,
"step": 7400
},
{
"epoch": 1.92,
"learning_rate": 3.008294266137757e-06,
"loss": 0.0553,
"step": 7425
},
{
"epoch": 1.93,
"learning_rate": 2.9902632527948067e-06,
"loss": 0.0712,
"step": 7450
},
{
"epoch": 1.93,
"learning_rate": 2.9722322394518568e-06,
"loss": 0.0648,
"step": 7475
},
{
"epoch": 1.94,
"learning_rate": 2.9542012261089073e-06,
"loss": 0.053,
"step": 7500
},
{
"epoch": 1.94,
"eval_loss": 0.06240825355052948,
"eval_runtime": 4653.2425,
"eval_samples_per_second": 2.344,
"eval_steps_per_second": 0.293,
"eval_wer": 18.697924318425084,
"step": 7500
},
{
"epoch": 1.95,
"learning_rate": 2.9361702127659574e-06,
"loss": 0.0613,
"step": 7525
},
{
"epoch": 1.95,
"learning_rate": 2.9181391994230075e-06,
"loss": 0.0492,
"step": 7550
},
{
"epoch": 1.96,
"learning_rate": 2.9001081860800576e-06,
"loss": 0.0372,
"step": 7575
},
{
"epoch": 1.97,
"learning_rate": 2.8820771727371077e-06,
"loss": 0.0474,
"step": 7600
},
{
"epoch": 1.97,
"learning_rate": 2.8640461593941577e-06,
"loss": 0.0472,
"step": 7625
},
{
"epoch": 1.98,
"learning_rate": 2.846015146051208e-06,
"loss": 0.0456,
"step": 7650
},
{
"epoch": 1.99,
"learning_rate": 2.827984132708258e-06,
"loss": 0.0496,
"step": 7675
},
{
"epoch": 1.99,
"learning_rate": 2.8099531193653084e-06,
"loss": 0.0517,
"step": 7700
},
{
"epoch": 2.0,
"learning_rate": 2.7919221060223585e-06,
"loss": 0.048,
"step": 7725
},
{
"epoch": 2.01,
"learning_rate": 2.773891092679408e-06,
"loss": 0.0342,
"step": 7750
},
{
"epoch": 2.01,
"learning_rate": 2.7558600793364583e-06,
"loss": 0.0259,
"step": 7775
},
{
"epoch": 2.02,
"learning_rate": 2.737829065993509e-06,
"loss": 0.0323,
"step": 7800
},
{
"epoch": 2.03,
"learning_rate": 2.719798052650559e-06,
"loss": 0.0338,
"step": 7825
},
{
"epoch": 2.03,
"learning_rate": 2.701767039307609e-06,
"loss": 0.0356,
"step": 7850
},
{
"epoch": 2.04,
"learning_rate": 2.6837360259646595e-06,
"loss": 0.0241,
"step": 7875
},
{
"epoch": 2.04,
"learning_rate": 2.665705012621709e-06,
"loss": 0.0359,
"step": 7900
},
{
"epoch": 2.05,
"learning_rate": 2.6476739992787593e-06,
"loss": 0.0336,
"step": 7925
},
{
"epoch": 2.06,
"learning_rate": 2.6296429859358094e-06,
"loss": 0.0395,
"step": 7950
},
{
"epoch": 2.06,
"learning_rate": 2.61161197259286e-06,
"loss": 0.0319,
"step": 7975
},
{
"epoch": 2.07,
"learning_rate": 2.59358095924991e-06,
"loss": 0.0234,
"step": 8000
},
{
"epoch": 2.07,
"eval_loss": 0.0625641718506813,
"eval_runtime": 4589.637,
"eval_samples_per_second": 2.376,
"eval_steps_per_second": 0.297,
"eval_wer": 18.46076353180683,
"step": 8000
},
{
"epoch": 2.08,
"learning_rate": 2.5755499459069596e-06,
"loss": 0.0411,
"step": 8025
},
{
"epoch": 2.08,
"learning_rate": 2.5575189325640097e-06,
"loss": 0.0183,
"step": 8050
},
{
"epoch": 2.09,
"learning_rate": 2.5394879192210603e-06,
"loss": 0.0457,
"step": 8075
},
{
"epoch": 2.1,
"learning_rate": 2.5214569058781103e-06,
"loss": 0.0464,
"step": 8100
},
{
"epoch": 2.1,
"learning_rate": 2.5034258925351604e-06,
"loss": 0.0334,
"step": 8125
},
{
"epoch": 2.11,
"learning_rate": 2.4853948791922105e-06,
"loss": 0.0239,
"step": 8150
},
{
"epoch": 2.12,
"learning_rate": 2.4673638658492606e-06,
"loss": 0.0329,
"step": 8175
},
{
"epoch": 2.12,
"learning_rate": 2.4493328525063107e-06,
"loss": 0.0261,
"step": 8200
},
{
"epoch": 2.13,
"learning_rate": 2.431301839163361e-06,
"loss": 0.0294,
"step": 8225
},
{
"epoch": 2.14,
"learning_rate": 2.413270825820411e-06,
"loss": 0.0277,
"step": 8250
},
{
"epoch": 2.14,
"learning_rate": 2.3952398124774614e-06,
"loss": 0.037,
"step": 8275
},
{
"epoch": 2.15,
"learning_rate": 2.3772087991345115e-06,
"loss": 0.0258,
"step": 8300
},
{
"epoch": 2.15,
"learning_rate": 2.359177785791561e-06,
"loss": 0.0354,
"step": 8325
},
{
"epoch": 2.16,
"learning_rate": 2.3411467724486113e-06,
"loss": 0.0234,
"step": 8350
},
{
"epoch": 2.17,
"learning_rate": 2.3231157591056618e-06,
"loss": 0.0316,
"step": 8375
},
{
"epoch": 2.17,
"learning_rate": 2.305084745762712e-06,
"loss": 0.0358,
"step": 8400
},
{
"epoch": 2.18,
"learning_rate": 2.287053732419762e-06,
"loss": 0.0226,
"step": 8425
},
{
"epoch": 2.19,
"learning_rate": 2.269022719076812e-06,
"loss": 0.0265,
"step": 8450
},
{
"epoch": 2.19,
"learning_rate": 2.25171294626758e-06,
"loss": 0.0345,
"step": 8475
},
{
"epoch": 2.2,
"learning_rate": 2.23368193292463e-06,
"loss": 0.0268,
"step": 8500
},
{
"epoch": 2.2,
"eval_loss": 0.0614020936191082,
"eval_runtime": 4575.3897,
"eval_samples_per_second": 2.384,
"eval_steps_per_second": 0.298,
"eval_wer": 19.62138224862006,
"step": 8500
},
{
"epoch": 2.21,
"learning_rate": 2.2156509195816806e-06,
"loss": 0.0329,
"step": 8525
},
{
"epoch": 2.21,
"learning_rate": 2.1976199062387307e-06,
"loss": 0.0471,
"step": 8550
},
{
"epoch": 2.22,
"learning_rate": 2.1795888928957808e-06,
"loss": 0.0366,
"step": 8575
},
{
"epoch": 2.23,
"learning_rate": 2.161557879552831e-06,
"loss": 0.0247,
"step": 8600
},
{
"epoch": 2.23,
"learning_rate": 2.143526866209881e-06,
"loss": 0.0257,
"step": 8625
},
{
"epoch": 2.24,
"learning_rate": 2.125495852866931e-06,
"loss": 0.0532,
"step": 8650
},
{
"epoch": 2.25,
"learning_rate": 2.107464839523981e-06,
"loss": 0.0343,
"step": 8675
},
{
"epoch": 2.25,
"learning_rate": 2.0894338261810312e-06,
"loss": 0.027,
"step": 8700
},
{
"epoch": 2.26,
"learning_rate": 2.0714028128380817e-06,
"loss": 0.031,
"step": 8725
},
{
"epoch": 2.26,
"learning_rate": 2.053371799495132e-06,
"loss": 0.0467,
"step": 8750
},
{
"epoch": 2.27,
"learning_rate": 2.0353407861521815e-06,
"loss": 0.029,
"step": 8775
},
{
"epoch": 2.28,
"learning_rate": 2.0173097728092316e-06,
"loss": 0.0422,
"step": 8800
},
{
"epoch": 2.28,
"learning_rate": 1.9992787594662817e-06,
"loss": 0.0328,
"step": 8825
},
{
"epoch": 2.29,
"learning_rate": 1.981247746123332e-06,
"loss": 0.0363,
"step": 8850
},
{
"epoch": 2.3,
"learning_rate": 1.9632167327803823e-06,
"loss": 0.031,
"step": 8875
},
{
"epoch": 2.3,
"learning_rate": 1.9451857194374324e-06,
"loss": 0.0302,
"step": 8900
},
{
"epoch": 2.31,
"learning_rate": 1.9271547060944825e-06,
"loss": 0.0248,
"step": 8925
},
{
"epoch": 2.32,
"learning_rate": 1.9091236927515326e-06,
"loss": 0.0307,
"step": 8950
},
{
"epoch": 2.32,
"learning_rate": 1.8910926794085827e-06,
"loss": 0.0344,
"step": 8975
},
{
"epoch": 2.33,
"learning_rate": 1.873061666065633e-06,
"loss": 0.0403,
"step": 9000
},
{
"epoch": 2.33,
"eval_loss": 0.05997191742062569,
"eval_runtime": 4588.7385,
"eval_samples_per_second": 2.377,
"eval_steps_per_second": 0.297,
"eval_wer": 17.98014565450081,
"step": 9000
},
{
"epoch": 2.34,
"learning_rate": 1.8550306527226828e-06,
"loss": 0.0326,
"step": 9025
},
{
"epoch": 2.34,
"learning_rate": 1.8369996393797331e-06,
"loss": 0.0307,
"step": 9050
},
{
"epoch": 2.35,
"learning_rate": 1.8189686260367832e-06,
"loss": 0.0396,
"step": 9075
},
{
"epoch": 2.36,
"learning_rate": 1.8009376126938333e-06,
"loss": 0.044,
"step": 9100
},
{
"epoch": 2.36,
"learning_rate": 1.7829065993508834e-06,
"loss": 0.0292,
"step": 9125
},
{
"epoch": 2.37,
"learning_rate": 1.7648755860079337e-06,
"loss": 0.0436,
"step": 9150
},
{
"epoch": 2.37,
"learning_rate": 1.7468445726649836e-06,
"loss": 0.0318,
"step": 9175
},
{
"epoch": 2.38,
"learning_rate": 1.728813559322034e-06,
"loss": 0.0347,
"step": 9200
},
{
"epoch": 2.39,
"learning_rate": 1.7107825459790838e-06,
"loss": 0.0169,
"step": 9225
},
{
"epoch": 2.39,
"learning_rate": 1.692751532636134e-06,
"loss": 0.0287,
"step": 9250
},
{
"epoch": 2.4,
"learning_rate": 1.6747205192931842e-06,
"loss": 0.0552,
"step": 9275
},
{
"epoch": 2.41,
"learning_rate": 1.6566895059502343e-06,
"loss": 0.0326,
"step": 9300
},
{
"epoch": 2.41,
"learning_rate": 1.6386584926072844e-06,
"loss": 0.0239,
"step": 9325
},
{
"epoch": 2.42,
"learning_rate": 1.6206274792643347e-06,
"loss": 0.0329,
"step": 9350
},
{
"epoch": 2.43,
"learning_rate": 1.6025964659213846e-06,
"loss": 0.0286,
"step": 9375
},
{
"epoch": 2.43,
"learning_rate": 1.5845654525784349e-06,
"loss": 0.0298,
"step": 9400
},
{
"epoch": 2.44,
"learning_rate": 1.566534439235485e-06,
"loss": 0.0391,
"step": 9425
},
{
"epoch": 2.45,
"learning_rate": 1.548503425892535e-06,
"loss": 0.0173,
"step": 9450
},
{
"epoch": 2.45,
"learning_rate": 1.5304724125495851e-06,
"loss": 0.0329,
"step": 9475
},
{
"epoch": 2.46,
"learning_rate": 1.5124413992066354e-06,
"loss": 0.0419,
"step": 9500
},
{
"epoch": 2.46,
"eval_loss": 0.05825132131576538,
"eval_runtime": 4572.6194,
"eval_samples_per_second": 2.385,
"eval_steps_per_second": 0.298,
"eval_wer": 17.92767645392155,
"step": 9500
},
{
"epoch": 2.47,
"learning_rate": 1.4944103858636853e-06,
"loss": 0.0519,
"step": 9525
},
{
"epoch": 2.47,
"learning_rate": 1.4763793725207356e-06,
"loss": 0.0319,
"step": 9550
},
{
"epoch": 2.48,
"learning_rate": 1.4583483591777857e-06,
"loss": 0.0206,
"step": 9575
},
{
"epoch": 2.48,
"learning_rate": 1.4403173458348358e-06,
"loss": 0.0339,
"step": 9600
},
{
"epoch": 2.49,
"learning_rate": 1.422286332491886e-06,
"loss": 0.0218,
"step": 9625
},
{
"epoch": 2.5,
"learning_rate": 1.4042553191489362e-06,
"loss": 0.0353,
"step": 9650
},
{
"epoch": 2.5,
"learning_rate": 1.386224305805986e-06,
"loss": 0.022,
"step": 9675
},
{
"epoch": 2.51,
"learning_rate": 1.3681932924630364e-06,
"loss": 0.0531,
"step": 9700
},
{
"epoch": 2.52,
"learning_rate": 1.3501622791200865e-06,
"loss": 0.0302,
"step": 9725
},
{
"epoch": 2.52,
"learning_rate": 1.3321312657771366e-06,
"loss": 0.0239,
"step": 9750
},
{
"epoch": 2.53,
"learning_rate": 1.3141002524341867e-06,
"loss": 0.0207,
"step": 9775
},
{
"epoch": 2.54,
"learning_rate": 1.296069239091237e-06,
"loss": 0.0271,
"step": 9800
},
{
"epoch": 2.54,
"learning_rate": 1.2780382257482868e-06,
"loss": 0.0245,
"step": 9825
},
{
"epoch": 2.55,
"learning_rate": 1.2600072124053372e-06,
"loss": 0.0217,
"step": 9850
},
{
"epoch": 2.56,
"learning_rate": 1.2419761990623872e-06,
"loss": 0.0297,
"step": 9875
},
{
"epoch": 2.56,
"learning_rate": 1.2239451857194373e-06,
"loss": 0.0313,
"step": 9900
},
{
"epoch": 2.57,
"learning_rate": 1.2059141723764874e-06,
"loss": 0.0269,
"step": 9925
},
{
"epoch": 2.58,
"learning_rate": 1.1878831590335377e-06,
"loss": 0.0273,
"step": 9950
},
{
"epoch": 2.58,
"learning_rate": 1.1698521456905876e-06,
"loss": 0.033,
"step": 9975
},
{
"epoch": 2.59,
"learning_rate": 1.151821132347638e-06,
"loss": 0.0333,
"step": 10000
},
{
"epoch": 2.59,
"eval_loss": 0.057692721486091614,
"eval_runtime": 4599.0725,
"eval_samples_per_second": 2.372,
"eval_steps_per_second": 0.297,
"eval_wer": 17.71779965160451,
"step": 10000
},
{
"epoch": 2.59,
"learning_rate": 1.133790119004688e-06,
"loss": 0.0359,
"step": 10025
},
{
"epoch": 2.6,
"learning_rate": 1.115759105661738e-06,
"loss": 0.0413,
"step": 10050
},
{
"epoch": 2.61,
"learning_rate": 1.0977280923187882e-06,
"loss": 0.0304,
"step": 10075
},
{
"epoch": 2.61,
"learning_rate": 1.0796970789758385e-06,
"loss": 0.0297,
"step": 10100
},
{
"epoch": 2.62,
"learning_rate": 1.0616660656328886e-06,
"loss": 0.0365,
"step": 10125
},
{
"epoch": 2.63,
"learning_rate": 1.0436350522899387e-06,
"loss": 0.041,
"step": 10150
},
{
"epoch": 2.63,
"learning_rate": 1.025604038946989e-06,
"loss": 0.0273,
"step": 10175
},
{
"epoch": 2.64,
"learning_rate": 1.0075730256040389e-06,
"loss": 0.0293,
"step": 10200
},
{
"epoch": 2.65,
"learning_rate": 9.89542012261089e-07,
"loss": 0.0254,
"step": 10225
},
{
"epoch": 2.65,
"learning_rate": 9.71510998918139e-07,
"loss": 0.0233,
"step": 10250
},
{
"epoch": 2.66,
"learning_rate": 9.534799855751892e-07,
"loss": 0.0252,
"step": 10275
},
{
"epoch": 2.67,
"learning_rate": 9.354489722322394e-07,
"loss": 0.0246,
"step": 10300
},
{
"epoch": 2.67,
"learning_rate": 9.174179588892895e-07,
"loss": 0.0196,
"step": 10325
},
{
"epoch": 2.68,
"learning_rate": 8.993869455463396e-07,
"loss": 0.0354,
"step": 10350
},
{
"epoch": 2.69,
"learning_rate": 8.813559322033897e-07,
"loss": 0.0352,
"step": 10375
},
{
"epoch": 2.69,
"learning_rate": 8.633249188604399e-07,
"loss": 0.0279,
"step": 10400
},
{
"epoch": 2.7,
"learning_rate": 8.4529390551749e-07,
"loss": 0.0329,
"step": 10425
},
{
"epoch": 2.7,
"learning_rate": 8.272628921745401e-07,
"loss": 0.0247,
"step": 10450
},
{
"epoch": 2.71,
"learning_rate": 8.092318788315903e-07,
"loss": 0.0222,
"step": 10475
},
{
"epoch": 2.72,
"learning_rate": 7.912008654886404e-07,
"loss": 0.0264,
"step": 10500
},
{
"epoch": 2.72,
"eval_loss": 0.05672454833984375,
"eval_runtime": 4593.3082,
"eval_samples_per_second": 2.375,
"eval_steps_per_second": 0.297,
"eval_wer": 17.501626545217956,
"step": 10500
},
{
"epoch": 2.72,
"learning_rate": 7.731698521456905e-07,
"loss": 0.0232,
"step": 10525
},
{
"epoch": 2.73,
"learning_rate": 7.551388388027407e-07,
"loss": 0.0227,
"step": 10550
},
{
"epoch": 2.74,
"learning_rate": 7.371078254597908e-07,
"loss": 0.0385,
"step": 10575
},
{
"epoch": 2.74,
"learning_rate": 7.190768121168409e-07,
"loss": 0.0323,
"step": 10600
},
{
"epoch": 2.75,
"learning_rate": 7.010457987738911e-07,
"loss": 0.0201,
"step": 10625
},
{
"epoch": 2.76,
"learning_rate": 6.830147854309412e-07,
"loss": 0.0336,
"step": 10650
},
{
"epoch": 2.76,
"learning_rate": 6.649837720879913e-07,
"loss": 0.0348,
"step": 10675
},
{
"epoch": 2.77,
"learning_rate": 6.469527587450415e-07,
"loss": 0.0238,
"step": 10700
},
{
"epoch": 2.78,
"learning_rate": 6.289217454020915e-07,
"loss": 0.0223,
"step": 10725
},
{
"epoch": 2.78,
"learning_rate": 6.108907320591416e-07,
"loss": 0.0294,
"step": 10750
},
{
"epoch": 2.79,
"learning_rate": 5.928597187161918e-07,
"loss": 0.0273,
"step": 10775
},
{
"epoch": 2.8,
"learning_rate": 5.748287053732419e-07,
"loss": 0.035,
"step": 10800
},
{
"epoch": 2.8,
"learning_rate": 5.56797692030292e-07,
"loss": 0.0388,
"step": 10825
},
{
"epoch": 2.81,
"learning_rate": 5.387666786873421e-07,
"loss": 0.0218,
"step": 10850
},
{
"epoch": 2.81,
"learning_rate": 5.207356653443923e-07,
"loss": 0.0347,
"step": 10875
},
{
"epoch": 2.82,
"learning_rate": 5.027046520014424e-07,
"loss": 0.0211,
"step": 10900
},
{
"epoch": 2.83,
"learning_rate": 4.846736386584926e-07,
"loss": 0.0265,
"step": 10925
},
{
"epoch": 2.83,
"learning_rate": 4.666426253155427e-07,
"loss": 0.0362,
"step": 10950
},
{
"epoch": 2.84,
"learning_rate": 4.4861161197259284e-07,
"loss": 0.0253,
"step": 10975
},
{
"epoch": 2.85,
"learning_rate": 4.30580598629643e-07,
"loss": 0.0274,
"step": 11000
},
{
"epoch": 2.85,
"eval_loss": 0.055780503898859024,
"eval_runtime": 4593.0706,
"eval_samples_per_second": 2.375,
"eval_steps_per_second": 0.297,
"eval_wer": 17.449157344638696,
"step": 11000
}
],
"max_steps": 11592,
"num_train_epochs": 3,
"total_flos": 5.078872177311744e+19,
"trial_name": null,
"trial_params": null
}