|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 99.99760191846522, |
|
"global_step": 10400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 14.732, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 9.95e-05, |
|
"loss": 4.5706, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.0001495, |
|
"loss": 3.4967, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.00019950000000000002, |
|
"loss": 3.3326, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 0.0002495, |
|
"loss": 3.0304, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_loss": 1.5675914287567139, |
|
"eval_runtime": 78.6195, |
|
"eval_samples_per_second": 14.144, |
|
"eval_steps_per_second": 3.536, |
|
"eval_wer": 1.0554260089686098, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.0002995, |
|
"loss": 2.0254, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.0003495, |
|
"loss": 1.7226, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 0.0003995, |
|
"loss": 1.6212, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 0.00044950000000000003, |
|
"loss": 1.5773, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 0.0004995, |
|
"loss": 1.5263, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"eval_loss": 0.46925902366638184, |
|
"eval_runtime": 52.7689, |
|
"eval_samples_per_second": 21.073, |
|
"eval_steps_per_second": 5.268, |
|
"eval_wer": 0.8023318385650224, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 0.0005495, |
|
"loss": 1.5324, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 0.0005995000000000001, |
|
"loss": 1.5058, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 0.0006495, |
|
"loss": 1.512, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 0.0006995, |
|
"loss": 1.5222, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 0.0007495000000000001, |
|
"loss": 1.5299, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"eval_loss": 0.43676647543907166, |
|
"eval_runtime": 52.3008, |
|
"eval_samples_per_second": 21.262, |
|
"eval_steps_per_second": 5.315, |
|
"eval_wer": 0.7311210762331839, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 0.0007995, |
|
"loss": 1.5168, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 16.35, |
|
"learning_rate": 0.0008495000000000001, |
|
"loss": 1.5392, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 0.0008995, |
|
"loss": 1.5365, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 18.27, |
|
"learning_rate": 0.0009495, |
|
"loss": 1.4991, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"learning_rate": 0.0009995000000000002, |
|
"loss": 1.5063, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 19.23, |
|
"eval_loss": 0.43596330285072327, |
|
"eval_runtime": 50.4313, |
|
"eval_samples_per_second": 22.05, |
|
"eval_steps_per_second": 5.512, |
|
"eval_wer": 0.7302242152466367, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 20.19, |
|
"learning_rate": 0.0009882142857142856, |
|
"loss": 1.5283, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"learning_rate": 0.0009763095238095238, |
|
"loss": 1.5146, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 22.12, |
|
"learning_rate": 0.0009644047619047619, |
|
"loss": 1.5236, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 0.0009525, |
|
"loss": 1.4972, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"learning_rate": 0.0009405952380952381, |
|
"loss": 1.455, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 24.04, |
|
"eval_loss": 0.4213278293609619, |
|
"eval_runtime": 50.1354, |
|
"eval_samples_per_second": 22.18, |
|
"eval_steps_per_second": 5.545, |
|
"eval_wer": 0.6692376681614349, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 0.0009286904761904762, |
|
"loss": 1.4424, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 25.96, |
|
"learning_rate": 0.0009167857142857144, |
|
"loss": 1.4434, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 26.92, |
|
"learning_rate": 0.0009048809523809524, |
|
"loss": 1.4513, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 27.88, |
|
"learning_rate": 0.0008929761904761905, |
|
"loss": 1.4328, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 28.84, |
|
"learning_rate": 0.0008811904761904762, |
|
"loss": 1.4755, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 28.84, |
|
"eval_loss": 0.4329236149787903, |
|
"eval_runtime": 50.0668, |
|
"eval_samples_per_second": 22.21, |
|
"eval_steps_per_second": 5.553, |
|
"eval_wer": 0.5942600896860987, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 29.81, |
|
"learning_rate": 0.0008692857142857144, |
|
"loss": 1.4344, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"learning_rate": 0.0008573809523809523, |
|
"loss": 1.4326, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 31.73, |
|
"learning_rate": 0.0008454761904761905, |
|
"loss": 1.4016, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 32.69, |
|
"learning_rate": 0.0008335714285714285, |
|
"loss": 1.3705, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"learning_rate": 0.0008216666666666667, |
|
"loss": 1.352, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 33.65, |
|
"eval_loss": 0.40741658210754395, |
|
"eval_runtime": 76.8047, |
|
"eval_samples_per_second": 14.478, |
|
"eval_steps_per_second": 3.62, |
|
"eval_wer": 0.5765022421524664, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 34.61, |
|
"learning_rate": 0.0008097619047619048, |
|
"loss": 1.3511, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 35.58, |
|
"learning_rate": 0.0007978571428571428, |
|
"loss": 1.3237, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 36.54, |
|
"learning_rate": 0.000785952380952381, |
|
"loss": 1.3165, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"learning_rate": 0.0007740476190476191, |
|
"loss": 1.3342, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 0.0007621428571428572, |
|
"loss": 1.3122, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"eval_loss": 0.38659143447875977, |
|
"eval_runtime": 80.318, |
|
"eval_samples_per_second": 13.845, |
|
"eval_steps_per_second": 3.461, |
|
"eval_wer": 0.56304932735426, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 39.42, |
|
"learning_rate": 0.0007502380952380953, |
|
"loss": 1.3098, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 40.38, |
|
"learning_rate": 0.0007383333333333334, |
|
"loss": 1.3253, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 41.35, |
|
"learning_rate": 0.0007264285714285714, |
|
"loss": 1.2787, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 42.31, |
|
"learning_rate": 0.0007146428571428572, |
|
"loss": 1.274, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"learning_rate": 0.0007027380952380952, |
|
"loss": 1.2799, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 43.27, |
|
"eval_loss": 0.3859865069389343, |
|
"eval_runtime": 77.9764, |
|
"eval_samples_per_second": 14.261, |
|
"eval_steps_per_second": 3.565, |
|
"eval_wer": 0.5479820627802691, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 44.23, |
|
"learning_rate": 0.0006908333333333333, |
|
"loss": 1.2525, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 45.19, |
|
"learning_rate": 0.0006789285714285714, |
|
"loss": 1.2245, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"learning_rate": 0.0006670238095238096, |
|
"loss": 1.2158, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 47.12, |
|
"learning_rate": 0.0006551190476190476, |
|
"loss": 1.2028, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"learning_rate": 0.0006432142857142857, |
|
"loss": 1.212, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 48.08, |
|
"eval_loss": 0.358958899974823, |
|
"eval_runtime": 78.2642, |
|
"eval_samples_per_second": 14.208, |
|
"eval_steps_per_second": 3.552, |
|
"eval_wer": 0.5316591928251121, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 49.04, |
|
"learning_rate": 0.0006313095238095238, |
|
"loss": 1.1808, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.0006194047619047619, |
|
"loss": 1.1783, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 50.96, |
|
"learning_rate": 0.0006075000000000001, |
|
"loss": 1.1675, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 51.92, |
|
"learning_rate": 0.0005955952380952381, |
|
"loss": 1.1867, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"learning_rate": 0.0005836904761904763, |
|
"loss": 1.1645, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 52.88, |
|
"eval_loss": 0.328298419713974, |
|
"eval_runtime": 77.7284, |
|
"eval_samples_per_second": 14.306, |
|
"eval_steps_per_second": 3.577, |
|
"eval_wer": 0.475695067264574, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 53.84, |
|
"learning_rate": 0.0005717857142857142, |
|
"loss": 1.1407, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 54.81, |
|
"learning_rate": 0.0005598809523809523, |
|
"loss": 1.1217, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 55.77, |
|
"learning_rate": 0.0005479761904761905, |
|
"loss": 1.118, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 56.73, |
|
"learning_rate": 0.0005360714285714285, |
|
"loss": 1.0727, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"learning_rate": 0.0005241666666666667, |
|
"loss": 1.0854, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 57.69, |
|
"eval_loss": 0.3161650598049164, |
|
"eval_runtime": 82.3243, |
|
"eval_samples_per_second": 13.508, |
|
"eval_steps_per_second": 3.377, |
|
"eval_wer": 0.4686995515695067, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 58.65, |
|
"learning_rate": 0.0005122619047619048, |
|
"loss": 1.0673, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 59.61, |
|
"learning_rate": 0.0005003571428571429, |
|
"loss": 1.0595, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 60.58, |
|
"learning_rate": 0.000488452380952381, |
|
"loss": 1.0474, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 61.54, |
|
"learning_rate": 0.00047654761904761906, |
|
"loss": 1.0424, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"learning_rate": 0.00046464285714285715, |
|
"loss": 1.0292, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 62.5, |
|
"eval_loss": 0.3125934600830078, |
|
"eval_runtime": 78.0974, |
|
"eval_samples_per_second": 14.239, |
|
"eval_steps_per_second": 3.56, |
|
"eval_wer": 0.44161434977578473, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 63.46, |
|
"learning_rate": 0.00045285714285714287, |
|
"loss": 1.0259, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 64.42, |
|
"learning_rate": 0.00044095238095238096, |
|
"loss": 1.0141, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 65.38, |
|
"learning_rate": 0.00042916666666666667, |
|
"loss": 0.9839, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 66.35, |
|
"learning_rate": 0.00041726190476190476, |
|
"loss": 0.9553, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 67.31, |
|
"learning_rate": 0.00040535714285714285, |
|
"loss": 0.9607, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 67.31, |
|
"eval_loss": 0.2990323007106781, |
|
"eval_runtime": 78.5216, |
|
"eval_samples_per_second": 14.162, |
|
"eval_steps_per_second": 3.54, |
|
"eval_wer": 0.40663677130044845, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 68.27, |
|
"learning_rate": 0.00039345238095238094, |
|
"loss": 0.944, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 69.23, |
|
"learning_rate": 0.00038154761904761903, |
|
"loss": 0.9203, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 70.19, |
|
"learning_rate": 0.0003696428571428572, |
|
"loss": 0.9164, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 71.15, |
|
"learning_rate": 0.00035773809523809527, |
|
"loss": 0.8996, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 72.12, |
|
"learning_rate": 0.00034583333333333335, |
|
"loss": 0.9156, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 72.12, |
|
"eval_loss": 0.28695425391197205, |
|
"eval_runtime": 77.1644, |
|
"eval_samples_per_second": 14.411, |
|
"eval_steps_per_second": 3.603, |
|
"eval_wer": 0.4008968609865471, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 73.08, |
|
"learning_rate": 0.0003339285714285714, |
|
"loss": 0.8916, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 74.04, |
|
"learning_rate": 0.00032202380952380953, |
|
"loss": 0.8818, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 0.0003101190476190476, |
|
"loss": 0.8714, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 75.96, |
|
"learning_rate": 0.0002982142857142857, |
|
"loss": 0.8529, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"learning_rate": 0.0002863095238095238, |
|
"loss": 0.8329, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_loss": 0.27907443046569824, |
|
"eval_runtime": 76.6666, |
|
"eval_samples_per_second": 14.504, |
|
"eval_steps_per_second": 3.626, |
|
"eval_wer": 0.39085201793721974, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 77.88, |
|
"learning_rate": 0.00027440476190476195, |
|
"loss": 0.822, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 78.84, |
|
"learning_rate": 0.00026250000000000004, |
|
"loss": 0.8164, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 79.81, |
|
"learning_rate": 0.0002505952380952381, |
|
"loss": 0.8086, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 80.77, |
|
"learning_rate": 0.00023869047619047622, |
|
"loss": 0.7969, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 81.73, |
|
"learning_rate": 0.00022678571428571428, |
|
"loss": 0.7979, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 81.73, |
|
"eval_loss": 0.27704960107803345, |
|
"eval_runtime": 76.769, |
|
"eval_samples_per_second": 14.485, |
|
"eval_steps_per_second": 3.621, |
|
"eval_wer": 0.3669955156950673, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 82.69, |
|
"learning_rate": 0.0002148809523809524, |
|
"loss": 0.7862, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 83.65, |
|
"learning_rate": 0.0002029761904761905, |
|
"loss": 0.7707, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 84.61, |
|
"learning_rate": 0.00019107142857142855, |
|
"loss": 0.7472, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 85.58, |
|
"learning_rate": 0.00017916666666666667, |
|
"loss": 0.7583, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 86.54, |
|
"learning_rate": 0.00016726190476190476, |
|
"loss": 0.7144, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 86.54, |
|
"eval_loss": 0.2840667963027954, |
|
"eval_runtime": 77.8679, |
|
"eval_samples_per_second": 14.281, |
|
"eval_steps_per_second": 3.57, |
|
"eval_wer": 0.36609865470852015, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 87.5, |
|
"learning_rate": 0.00015535714285714287, |
|
"loss": 0.7302, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 88.46, |
|
"learning_rate": 0.0001435714285714286, |
|
"loss": 0.7214, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 89.42, |
|
"learning_rate": 0.00013166666666666665, |
|
"loss": 0.7179, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 90.38, |
|
"learning_rate": 0.00011976190476190477, |
|
"loss": 0.7037, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 91.35, |
|
"learning_rate": 0.00010785714285714286, |
|
"loss": 0.6997, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 91.35, |
|
"eval_loss": 0.2721162438392639, |
|
"eval_runtime": 77.3441, |
|
"eval_samples_per_second": 14.377, |
|
"eval_steps_per_second": 3.594, |
|
"eval_wer": 0.3485201793721973, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 92.31, |
|
"learning_rate": 9.595238095238096e-05, |
|
"loss": 0.676, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 93.27, |
|
"learning_rate": 8.404761904761905e-05, |
|
"loss": 0.6748, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 94.23, |
|
"learning_rate": 7.214285714285715e-05, |
|
"loss": 0.6668, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 95.19, |
|
"learning_rate": 6.023809523809524e-05, |
|
"loss": 0.662, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 96.15, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"loss": 0.6568, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 96.15, |
|
"eval_loss": 0.2680588364601135, |
|
"eval_runtime": 76.864, |
|
"eval_samples_per_second": 14.467, |
|
"eval_steps_per_second": 3.617, |
|
"eval_wer": 0.34367713004484307, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 97.12, |
|
"learning_rate": 3.642857142857143e-05, |
|
"loss": 0.6517, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 98.08, |
|
"learning_rate": 2.4523809523809523e-05, |
|
"loss": 0.6518, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 99.04, |
|
"learning_rate": 1.2619047619047618e-05, |
|
"loss": 0.6285, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 7.142857142857143e-07, |
|
"loss": 0.6282, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 10400, |
|
"total_flos": 4.70570622386933e+19, |
|
"train_loss": 1.3617363874728863, |
|
"train_runtime": 41188.5513, |
|
"train_samples_per_second": 8.099, |
|
"train_steps_per_second": 0.252 |
|
} |
|
], |
|
"max_steps": 10400, |
|
"num_train_epochs": 100, |
|
"total_flos": 4.70570622386933e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|