|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 29.997830802603037, |
|
"global_step": 6900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 3.3600032329559326, |
|
"eval_runtime": 177.4912, |
|
"eval_samples_per_second": 27.601, |
|
"eval_steps_per_second": 0.868, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 3.088737726211548, |
|
"eval_runtime": 176.8112, |
|
"eval_samples_per_second": 27.708, |
|
"eval_steps_per_second": 0.871, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 3.0779149532318115, |
|
"eval_runtime": 176.3294, |
|
"eval_samples_per_second": 27.783, |
|
"eval_steps_per_second": 0.873, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_loss": 3.05513334274292, |
|
"eval_runtime": 176.7029, |
|
"eval_samples_per_second": 27.724, |
|
"eval_steps_per_second": 0.872, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 4.8553, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_loss": 3.052617311477661, |
|
"eval_runtime": 176.6413, |
|
"eval_samples_per_second": 27.734, |
|
"eval_steps_per_second": 0.872, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_loss": 3.0559935569763184, |
|
"eval_runtime": 175.4042, |
|
"eval_samples_per_second": 27.93, |
|
"eval_steps_per_second": 0.878, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_loss": 3.125081777572632, |
|
"eval_runtime": 174.6598, |
|
"eval_samples_per_second": 28.049, |
|
"eval_steps_per_second": 0.882, |
|
"eval_wer": 1.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"eval_loss": 3.087021589279175, |
|
"eval_runtime": 177.8472, |
|
"eval_samples_per_second": 27.546, |
|
"eval_steps_per_second": 0.866, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_loss": 3.08219575881958, |
|
"eval_runtime": 180.0973, |
|
"eval_samples_per_second": 27.202, |
|
"eval_steps_per_second": 0.855, |
|
"eval_wer": 1.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.00027679687499999997, |
|
"loss": 3.1133, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_loss": 3.048403739929199, |
|
"eval_runtime": 177.987, |
|
"eval_samples_per_second": 27.524, |
|
"eval_steps_per_second": 0.865, |
|
"eval_wer": 1.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_loss": 3.0558109283447266, |
|
"eval_runtime": 176.2514, |
|
"eval_samples_per_second": 27.796, |
|
"eval_steps_per_second": 0.874, |
|
"eval_wer": 1.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 3.1018614768981934, |
|
"eval_runtime": 174.5505, |
|
"eval_samples_per_second": 28.066, |
|
"eval_steps_per_second": 0.882, |
|
"eval_wer": 1.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"eval_loss": 3.0914077758789062, |
|
"eval_runtime": 174.0307, |
|
"eval_samples_per_second": 28.15, |
|
"eval_steps_per_second": 0.885, |
|
"eval_wer": 1.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"eval_loss": 3.069120168685913, |
|
"eval_runtime": 175.4381, |
|
"eval_samples_per_second": 27.924, |
|
"eval_steps_per_second": 0.878, |
|
"eval_wer": 1.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00025335937499999995, |
|
"loss": 3.109, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_loss": 3.0588901042938232, |
|
"eval_runtime": 175.6572, |
|
"eval_samples_per_second": 27.89, |
|
"eval_steps_per_second": 0.877, |
|
"eval_wer": 1.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"eval_loss": 3.050849199295044, |
|
"eval_runtime": 175.685, |
|
"eval_samples_per_second": 27.885, |
|
"eval_steps_per_second": 0.877, |
|
"eval_wer": 1.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"eval_loss": 3.054013252258301, |
|
"eval_runtime": 177.236, |
|
"eval_samples_per_second": 27.641, |
|
"eval_steps_per_second": 0.869, |
|
"eval_wer": 1.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"eval_loss": 3.0545613765716553, |
|
"eval_runtime": 177.8822, |
|
"eval_samples_per_second": 27.541, |
|
"eval_steps_per_second": 0.866, |
|
"eval_wer": 1.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"eval_loss": 3.0523643493652344, |
|
"eval_runtime": 172.9222, |
|
"eval_samples_per_second": 28.331, |
|
"eval_steps_per_second": 0.891, |
|
"eval_wer": 1.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 0.00022992187499999996, |
|
"loss": 3.1106, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"eval_loss": 3.056912422180176, |
|
"eval_runtime": 175.8694, |
|
"eval_samples_per_second": 27.856, |
|
"eval_steps_per_second": 0.876, |
|
"eval_wer": 1.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"eval_loss": 3.0621554851531982, |
|
"eval_runtime": 175.7147, |
|
"eval_samples_per_second": 27.88, |
|
"eval_steps_per_second": 0.876, |
|
"eval_wer": 1.0, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"eval_loss": 3.0517823696136475, |
|
"eval_runtime": 174.9875, |
|
"eval_samples_per_second": 27.996, |
|
"eval_steps_per_second": 0.88, |
|
"eval_wer": 1.0, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 3.0749499797821045, |
|
"eval_runtime": 176.3933, |
|
"eval_samples_per_second": 27.773, |
|
"eval_steps_per_second": 0.873, |
|
"eval_wer": 1.0, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"eval_loss": 3.0697524547576904, |
|
"eval_runtime": 175.8486, |
|
"eval_samples_per_second": 27.859, |
|
"eval_steps_per_second": 0.876, |
|
"eval_wer": 1.0, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 0.00020648437499999996, |
|
"loss": 3.1058, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"eval_loss": 3.0664749145507812, |
|
"eval_runtime": 176.8396, |
|
"eval_samples_per_second": 27.703, |
|
"eval_steps_per_second": 0.871, |
|
"eval_wer": 1.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"eval_loss": 3.055528402328491, |
|
"eval_runtime": 176.8954, |
|
"eval_samples_per_second": 27.694, |
|
"eval_steps_per_second": 0.871, |
|
"eval_wer": 1.0, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"eval_loss": 3.0589022636413574, |
|
"eval_runtime": 177.2054, |
|
"eval_samples_per_second": 27.646, |
|
"eval_steps_per_second": 0.869, |
|
"eval_wer": 1.0, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"eval_loss": 3.061063051223755, |
|
"eval_runtime": 176.5606, |
|
"eval_samples_per_second": 27.747, |
|
"eval_steps_per_second": 0.872, |
|
"eval_wer": 1.0, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"eval_loss": 3.056131601333618, |
|
"eval_runtime": 175.9193, |
|
"eval_samples_per_second": 27.848, |
|
"eval_steps_per_second": 0.875, |
|
"eval_wer": 1.0, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 0.00018304687499999997, |
|
"loss": 3.1071, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"eval_loss": 3.0480217933654785, |
|
"eval_runtime": 175.6518, |
|
"eval_samples_per_second": 27.89, |
|
"eval_steps_per_second": 0.877, |
|
"eval_wer": 1.0, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 13.48, |
|
"eval_loss": 3.0491693019866943, |
|
"eval_runtime": 173.0223, |
|
"eval_samples_per_second": 28.314, |
|
"eval_steps_per_second": 0.89, |
|
"eval_wer": 1.0, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"eval_loss": 3.057448387145996, |
|
"eval_runtime": 175.4684, |
|
"eval_samples_per_second": 27.92, |
|
"eval_steps_per_second": 0.878, |
|
"eval_wer": 1.0, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 14.35, |
|
"eval_loss": 3.053784132003784, |
|
"eval_runtime": 176.0074, |
|
"eval_samples_per_second": 27.834, |
|
"eval_steps_per_second": 0.875, |
|
"eval_wer": 1.0, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"eval_loss": 3.050539016723633, |
|
"eval_runtime": 175.3243, |
|
"eval_samples_per_second": 27.943, |
|
"eval_steps_per_second": 0.878, |
|
"eval_wer": 1.0, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 0.00015960937499999997, |
|
"loss": 3.1061, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"eval_loss": 3.059952735900879, |
|
"eval_runtime": 176.5589, |
|
"eval_samples_per_second": 27.747, |
|
"eval_steps_per_second": 0.872, |
|
"eval_wer": 1.0, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 15.65, |
|
"eval_loss": 3.0595669746398926, |
|
"eval_runtime": 177.5778, |
|
"eval_samples_per_second": 27.588, |
|
"eval_steps_per_second": 0.867, |
|
"eval_wer": 1.0, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"eval_loss": 3.0623462200164795, |
|
"eval_runtime": 174.5228, |
|
"eval_samples_per_second": 28.071, |
|
"eval_steps_per_second": 0.882, |
|
"eval_wer": 1.0, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 16.52, |
|
"eval_loss": 3.079986095428467, |
|
"eval_runtime": 175.7994, |
|
"eval_samples_per_second": 27.867, |
|
"eval_steps_per_second": 0.876, |
|
"eval_wer": 1.0, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"eval_loss": 3.0583465099334717, |
|
"eval_runtime": 176.6021, |
|
"eval_samples_per_second": 27.74, |
|
"eval_steps_per_second": 0.872, |
|
"eval_wer": 1.0, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 0.00013617187499999998, |
|
"loss": 3.1036, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"eval_loss": 3.053365468978882, |
|
"eval_runtime": 175.9714, |
|
"eval_samples_per_second": 27.84, |
|
"eval_steps_per_second": 0.875, |
|
"eval_wer": 1.0, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 17.82, |
|
"eval_loss": 3.0563225746154785, |
|
"eval_runtime": 175.7204, |
|
"eval_samples_per_second": 27.88, |
|
"eval_steps_per_second": 0.876, |
|
"eval_wer": 1.0, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"eval_loss": 3.0481250286102295, |
|
"eval_runtime": 175.8171, |
|
"eval_samples_per_second": 27.864, |
|
"eval_steps_per_second": 0.876, |
|
"eval_wer": 1.0, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 18.69, |
|
"eval_loss": 3.0476744174957275, |
|
"eval_runtime": 175.2697, |
|
"eval_samples_per_second": 27.951, |
|
"eval_steps_per_second": 0.879, |
|
"eval_wer": 1.0, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 19.13, |
|
"eval_loss": 3.0504729747772217, |
|
"eval_runtime": 175.8225, |
|
"eval_samples_per_second": 27.863, |
|
"eval_steps_per_second": 0.876, |
|
"eval_wer": 1.0, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 19.56, |
|
"learning_rate": 0.00011273437499999999, |
|
"loss": 3.1086, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 19.56, |
|
"eval_loss": 3.0484793186187744, |
|
"eval_runtime": 176.184, |
|
"eval_samples_per_second": 27.806, |
|
"eval_steps_per_second": 0.874, |
|
"eval_wer": 1.0, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 3.0480639934539795, |
|
"eval_runtime": 175.5218, |
|
"eval_samples_per_second": 27.911, |
|
"eval_steps_per_second": 0.877, |
|
"eval_wer": 1.0, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"eval_loss": 3.061495780944824, |
|
"eval_runtime": 176.4313, |
|
"eval_samples_per_second": 27.767, |
|
"eval_steps_per_second": 0.873, |
|
"eval_wer": 1.0, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 20.87, |
|
"eval_loss": 3.0657691955566406, |
|
"eval_runtime": 175.5853, |
|
"eval_samples_per_second": 27.901, |
|
"eval_steps_per_second": 0.877, |
|
"eval_wer": 1.0, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 21.3, |
|
"eval_loss": 3.050532341003418, |
|
"eval_runtime": 176.2169, |
|
"eval_samples_per_second": 27.801, |
|
"eval_steps_per_second": 0.874, |
|
"eval_wer": 1.0, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 8.9296875e-05, |
|
"loss": 3.1028, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"eval_loss": 3.0491702556610107, |
|
"eval_runtime": 175.9502, |
|
"eval_samples_per_second": 27.843, |
|
"eval_steps_per_second": 0.875, |
|
"eval_wer": 1.0, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 22.17, |
|
"eval_loss": 3.048527479171753, |
|
"eval_runtime": 174.9586, |
|
"eval_samples_per_second": 28.001, |
|
"eval_steps_per_second": 0.88, |
|
"eval_wer": 1.0, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 22.61, |
|
"eval_loss": 3.0482711791992188, |
|
"eval_runtime": 176.76, |
|
"eval_samples_per_second": 27.716, |
|
"eval_steps_per_second": 0.871, |
|
"eval_wer": 1.0, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 23.04, |
|
"eval_loss": 3.0478527545928955, |
|
"eval_runtime": 174.8893, |
|
"eval_samples_per_second": 28.012, |
|
"eval_steps_per_second": 0.881, |
|
"eval_wer": 1.0, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 23.48, |
|
"eval_loss": 3.05094313621521, |
|
"eval_runtime": 175.0794, |
|
"eval_samples_per_second": 27.982, |
|
"eval_steps_per_second": 0.88, |
|
"eval_wer": 1.0, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 23.91, |
|
"learning_rate": 6.5859375e-05, |
|
"loss": 3.1087, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 23.91, |
|
"eval_loss": 3.0529990196228027, |
|
"eval_runtime": 176.1904, |
|
"eval_samples_per_second": 27.805, |
|
"eval_steps_per_second": 0.874, |
|
"eval_wer": 1.0, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 24.35, |
|
"eval_loss": 3.048621654510498, |
|
"eval_runtime": 175.2104, |
|
"eval_samples_per_second": 27.961, |
|
"eval_steps_per_second": 0.879, |
|
"eval_wer": 1.0, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 24.78, |
|
"eval_loss": 3.051391124725342, |
|
"eval_runtime": 175.9351, |
|
"eval_samples_per_second": 27.845, |
|
"eval_steps_per_second": 0.875, |
|
"eval_wer": 1.0, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 25.22, |
|
"eval_loss": 3.050508499145508, |
|
"eval_runtime": 175.1722, |
|
"eval_samples_per_second": 27.967, |
|
"eval_steps_per_second": 0.879, |
|
"eval_wer": 1.0, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 25.65, |
|
"eval_loss": 3.050753355026245, |
|
"eval_runtime": 175.581, |
|
"eval_samples_per_second": 27.902, |
|
"eval_steps_per_second": 0.877, |
|
"eval_wer": 1.0, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 4.2421875e-05, |
|
"loss": 3.1043, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"eval_loss": 3.050074815750122, |
|
"eval_runtime": 175.9337, |
|
"eval_samples_per_second": 27.846, |
|
"eval_steps_per_second": 0.875, |
|
"eval_wer": 1.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 26.52, |
|
"eval_loss": 3.046748638153076, |
|
"eval_runtime": 176.2651, |
|
"eval_samples_per_second": 27.793, |
|
"eval_steps_per_second": 0.874, |
|
"eval_wer": 1.0, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 26.95, |
|
"eval_loss": 3.046581268310547, |
|
"eval_runtime": 175.4906, |
|
"eval_samples_per_second": 27.916, |
|
"eval_steps_per_second": 0.878, |
|
"eval_wer": 1.0, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 27.39, |
|
"eval_loss": 3.0465457439422607, |
|
"eval_runtime": 174.3257, |
|
"eval_samples_per_second": 28.103, |
|
"eval_steps_per_second": 0.883, |
|
"eval_wer": 1.0, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 27.82, |
|
"eval_loss": 3.0464954376220703, |
|
"eval_runtime": 174.3395, |
|
"eval_samples_per_second": 28.1, |
|
"eval_steps_per_second": 0.883, |
|
"eval_wer": 1.0, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 28.26, |
|
"learning_rate": 1.8984375e-05, |
|
"loss": 3.1175, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 28.26, |
|
"eval_loss": 3.046614170074463, |
|
"eval_runtime": 174.756, |
|
"eval_samples_per_second": 28.033, |
|
"eval_steps_per_second": 0.881, |
|
"eval_wer": 1.0, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 28.69, |
|
"eval_loss": 3.046605110168457, |
|
"eval_runtime": 174.8316, |
|
"eval_samples_per_second": 28.021, |
|
"eval_steps_per_second": 0.881, |
|
"eval_wer": 1.0, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 29.13, |
|
"eval_loss": 3.0464911460876465, |
|
"eval_runtime": 174.5543, |
|
"eval_samples_per_second": 28.066, |
|
"eval_steps_per_second": 0.882, |
|
"eval_wer": 1.0, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 29.56, |
|
"eval_loss": 3.046463966369629, |
|
"eval_runtime": 175.0973, |
|
"eval_samples_per_second": 27.979, |
|
"eval_steps_per_second": 0.88, |
|
"eval_wer": 1.0, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 3.046398878097534, |
|
"eval_runtime": 185.0373, |
|
"eval_samples_per_second": 26.476, |
|
"eval_steps_per_second": 0.832, |
|
"eval_wer": 1.0, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 6900, |
|
"total_flos": 1.7654566052477592e+19, |
|
"train_loss": 0.04483215774314991, |
|
"train_runtime": 852.37, |
|
"train_samples_per_second": 518.437, |
|
"train_steps_per_second": 8.095 |
|
} |
|
], |
|
"max_steps": 6900, |
|
"num_train_epochs": 30, |
|
"total_flos": 1.7654566052477592e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|