|
{ |
|
"best_metric": 17.189821693907874, |
|
"best_model_checkpoint": "./checkpoint-5000", |
|
"epoch": 46.728971962616825, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5136083400296205e-06, |
|
"loss": 0.0026, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.8687587131475301e-06, |
|
"loss": 0.0022, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.0711488350670174e-06, |
|
"loss": 0.0021, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.213317753617305e-06, |
|
"loss": 0.0021, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.3230029693718747e-06, |
|
"loss": 0.0016, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.412322158351148e-06, |
|
"loss": 0.0017, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.4876668872198717e-06, |
|
"loss": 0.0017, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.552824062407326e-06, |
|
"loss": 0.0014, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.610223373296667e-06, |
|
"loss": 0.0015, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.661517182828361e-06, |
|
"loss": 0.0017, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.7078803874740543e-06, |
|
"loss": 0.002, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.750178319990197e-06, |
|
"loss": 0.0021, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 2.7890667754365044e-06, |
|
"loss": 0.0016, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 2.8250546392106077e-06, |
|
"loss": 0.0013, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 2.8585447348549113e-06, |
|
"loss": 0.0017, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 2.889861392935294e-06, |
|
"loss": 0.0014, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 2.9192696063561725e-06, |
|
"loss": 0.0015, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 2.946988676871634e-06, |
|
"loss": 0.0013, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 2.973202150939645e-06, |
|
"loss": 0.0014, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 2.998065193492142e-06, |
|
"loss": 0.0013, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0019, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0018, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0016, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 3e-06, |
|
"loss": 0.002, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0023, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0014, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0019, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0017, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0016, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0022, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0017, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0014, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0015, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0017, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0016, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0011, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0015, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"eval_loss": 0.54052734375, |
|
"eval_runtime": 168.3935, |
|
"eval_samples_per_second": 10.072, |
|
"eval_steps_per_second": 0.629, |
|
"eval_wer": 17.691307578008917, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0027, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 10.05, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0015, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 10.28, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0016, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.51, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0011, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0017, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0018, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0017, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0021, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0017, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 11.92, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0015, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0012, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.38, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0016, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 3e-06, |
|
"loss": 0.001, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 13.08, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0011, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 13.55, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 13.79, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0015, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0016, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0011, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 14.72, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0014, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 14.95, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0016, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 15.42, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0011, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0012, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 16.12, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0012, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 16.59, |
|
"learning_rate": 3e-06, |
|
"loss": 0.001, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 16.82, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0014, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 17.06, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0016, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0011, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0015, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0014, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0012, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 18.22, |
|
"learning_rate": 3e-06, |
|
"loss": 0.001, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"learning_rate": 3e-06, |
|
"loss": 0.001, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"eval_loss": 0.53955078125, |
|
"eval_runtime": 168.0749, |
|
"eval_samples_per_second": 10.091, |
|
"eval_steps_per_second": 0.631, |
|
"eval_wer": 17.561292719167902, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0015, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 19.16, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0011, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 19.39, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 19.63, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0012, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 19.86, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0017, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 20.09, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 20.33, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 20.56, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0014, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 20.79, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0014, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 21.03, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 21.26, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0011, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 21.5, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0016, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 21.73, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0017, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 21.96, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0012, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 22.2, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 22.43, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 22.66, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 22.9, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0015, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 23.13, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0015, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 23.36, |
|
"learning_rate": 3e-06, |
|
"loss": 0.001, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 23.83, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 24.07, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0008, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 24.3, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0008, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0012, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 24.77, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0008, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0008, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 25.23, |
|
"learning_rate": 3e-06, |
|
"loss": 0.001, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0008, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 25.7, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 25.93, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0008, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 26.17, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0012, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 26.4, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0007, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 26.64, |
|
"learning_rate": 3e-06, |
|
"loss": 0.001, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 26.87, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0011, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 27.34, |
|
"learning_rate": 3e-06, |
|
"loss": 0.001, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 27.57, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 27.8, |
|
"learning_rate": 3e-06, |
|
"loss": 0.001, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 28.04, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 28.04, |
|
"eval_loss": 0.56201171875, |
|
"eval_runtime": 168.2778, |
|
"eval_samples_per_second": 10.079, |
|
"eval_steps_per_second": 0.63, |
|
"eval_wer": 17.496285289747398, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 28.27, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0015, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 28.5, |
|
"learning_rate": 3e-06, |
|
"loss": 0.001, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 28.74, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0008, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 28.97, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0011, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 29.21, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 29.44, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 29.67, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 29.91, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0011, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 30.14, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 30.37, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0012, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 30.61, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0012, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 30.84, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0008, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 31.07, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 31.31, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 31.54, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0008, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 31.78, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 32.01, |
|
"learning_rate": 3e-06, |
|
"loss": 0.002, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 32.24, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 32.48, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0014, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 32.71, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 32.94, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 33.18, |
|
"learning_rate": 3e-06, |
|
"loss": 0.001, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 33.41, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0008, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 33.64, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0012, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 33.88, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0011, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 34.11, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0012, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 34.35, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0008, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 34.58, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0017, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 34.81, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0018, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 35.05, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0008, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 35.28, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0012, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 35.51, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0026, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 35.75, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 35.98, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0015, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 36.21, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0012, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 36.45, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0017, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 36.68, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0009, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 36.92, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0017, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 37.15, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0007, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 37.38, |
|
"learning_rate": 3e-06, |
|
"loss": 0.001, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 37.38, |
|
"eval_loss": 0.54541015625, |
|
"eval_runtime": 168.5354, |
|
"eval_samples_per_second": 10.063, |
|
"eval_steps_per_second": 0.629, |
|
"eval_wer": 17.459138187221395, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 37.62, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0013, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 37.85, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0016, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 38.08, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0018, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 38.32, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0017, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 38.55, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0023, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 38.79, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0019, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 39.02, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0026, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 39.25, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0025, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 39.49, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0022, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 39.72, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0018, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 39.95, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0024, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 40.19, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0023, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 40.42, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0017, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 40.65, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0023, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 40.89, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0029, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 41.12, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0024, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 41.36, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0016, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 41.59, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0022, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 41.82, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0021, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 42.06, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0026, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 42.29, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0023, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 42.52, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0019, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 42.76, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0025, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 42.99, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0019, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 43.22, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0023, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 43.46, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0022, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 43.69, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0032, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 43.93, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0021, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 44.16, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0017, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 44.39, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0022, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 44.63, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0026, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 44.86, |
|
"learning_rate": 3e-06, |
|
"loss": 0.002, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 45.09, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0019, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 45.33, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0022, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 45.56, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0016, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 45.79, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0029, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 46.03, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0019, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 46.26, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0023, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 46.5, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0019, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 46.73, |
|
"learning_rate": 3e-06, |
|
"loss": 0.0015, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 46.73, |
|
"eval_loss": 0.54296875, |
|
"eval_runtime": 168.1004, |
|
"eval_samples_per_second": 10.089, |
|
"eval_steps_per_second": 0.631, |
|
"eval_wer": 17.189821693907874, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 46.73, |
|
"step": 5000, |
|
"total_flos": 4.5881616269897105e+19, |
|
"train_loss": 0.0014968906164169312, |
|
"train_runtime": 9420.014, |
|
"train_samples_per_second": 16.985, |
|
"train_steps_per_second": 0.531 |
|
} |
|
], |
|
"max_steps": 5000, |
|
"num_train_epochs": 47, |
|
"total_flos": 4.5881616269897105e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|