csikasote's picture
End of training
01fb589 verified
{
"best_metric": 0.702553391456604,
"best_model_checkpoint": "/scratch/skscla001/results/xls-r-1b-bem-natbed-combined-model/checkpoint-1800",
"epoch": 5.256570713391739,
"eval_steps": 100,
"global_step": 2100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2503128911138924,
"grad_norm": 2.982633352279663,
"learning_rate": 0.00029099999999999997,
"loss": 5.0296,
"step": 100
},
{
"epoch": 0.2503128911138924,
"eval_loss": 2.807065010070801,
"eval_runtime": 99.4276,
"eval_samples_per_second": 13.658,
"eval_steps_per_second": 1.71,
"eval_wer": 1.0,
"step": 100
},
{
"epoch": 0.5006257822277848,
"grad_norm": 2.2083914279937744,
"learning_rate": 0.00029754844144903116,
"loss": 1.5792,
"step": 200
},
{
"epoch": 0.5006257822277848,
"eval_loss": 1.1184355020523071,
"eval_runtime": 98.2757,
"eval_samples_per_second": 13.818,
"eval_steps_per_second": 1.73,
"eval_wer": 0.9645076872751063,
"step": 200
},
{
"epoch": 0.7509386733416771,
"grad_norm": 3.159029483795166,
"learning_rate": 0.0002950210614995788,
"loss": 1.1422,
"step": 300
},
{
"epoch": 0.7509386733416771,
"eval_loss": 1.0389240980148315,
"eval_runtime": 98.789,
"eval_samples_per_second": 13.746,
"eval_steps_per_second": 1.721,
"eval_wer": 0.960582270199542,
"step": 300
},
{
"epoch": 1.0012515644555695,
"grad_norm": 6.720183372497559,
"learning_rate": 0.00029249368155012635,
"loss": 0.9883,
"step": 400
},
{
"epoch": 1.0012515644555695,
"eval_loss": 1.0493754148483276,
"eval_runtime": 97.5764,
"eval_samples_per_second": 13.917,
"eval_steps_per_second": 1.742,
"eval_wer": 0.9989368662087014,
"step": 400
},
{
"epoch": 1.2515644555694618,
"grad_norm": 3.1551387310028076,
"learning_rate": 0.00028996630160067397,
"loss": 0.8999,
"step": 500
},
{
"epoch": 1.2515644555694618,
"eval_loss": 0.86915123462677,
"eval_runtime": 98.5884,
"eval_samples_per_second": 13.774,
"eval_steps_per_second": 1.724,
"eval_wer": 0.8682531894013739,
"step": 500
},
{
"epoch": 1.5018773466833542,
"grad_norm": 2.8559296131134033,
"learning_rate": 0.00028743892165122154,
"loss": 0.9135,
"step": 600
},
{
"epoch": 1.5018773466833542,
"eval_loss": 0.8564479351043701,
"eval_runtime": 98.2671,
"eval_samples_per_second": 13.819,
"eval_steps_per_second": 1.73,
"eval_wer": 0.8429833169774289,
"step": 600
},
{
"epoch": 1.7521902377972465,
"grad_norm": 2.4900286197662354,
"learning_rate": 0.00028491154170176916,
"loss": 0.8898,
"step": 700
},
{
"epoch": 1.7521902377972465,
"eval_loss": 0.8450906872749329,
"eval_runtime": 98.3583,
"eval_samples_per_second": 13.807,
"eval_steps_per_second": 1.728,
"eval_wer": 0.8522244030094864,
"step": 700
},
{
"epoch": 2.002503128911139,
"grad_norm": 2.2157928943634033,
"learning_rate": 0.0002823841617523167,
"loss": 0.9089,
"step": 800
},
{
"epoch": 2.002503128911139,
"eval_loss": 0.8857161402702332,
"eval_runtime": 98.023,
"eval_samples_per_second": 13.854,
"eval_steps_per_second": 1.734,
"eval_wer": 0.8484625449787373,
"step": 800
},
{
"epoch": 2.252816020025031,
"grad_norm": 10.16332721710205,
"learning_rate": 0.00027985678180286435,
"loss": 0.8292,
"step": 900
},
{
"epoch": 2.252816020025031,
"eval_loss": 0.8661824464797974,
"eval_runtime": 98.5047,
"eval_samples_per_second": 13.786,
"eval_steps_per_second": 1.726,
"eval_wer": 0.8580307491004252,
"step": 900
},
{
"epoch": 2.5031289111389237,
"grad_norm": 1.2531850337982178,
"learning_rate": 0.00027732940185341197,
"loss": 0.7921,
"step": 1000
},
{
"epoch": 2.5031289111389237,
"eval_loss": 0.796419084072113,
"eval_runtime": 98.0484,
"eval_samples_per_second": 13.85,
"eval_steps_per_second": 1.734,
"eval_wer": 0.7968596663395485,
"step": 1000
},
{
"epoch": 2.7534418022528158,
"grad_norm": 6.90547513961792,
"learning_rate": 0.00027480202190395954,
"loss": 0.7983,
"step": 1100
},
{
"epoch": 2.7534418022528158,
"eval_loss": 0.7895970940589905,
"eval_runtime": 98.3739,
"eval_samples_per_second": 13.804,
"eval_steps_per_second": 1.728,
"eval_wer": 0.7951422963689893,
"step": 1100
},
{
"epoch": 3.0037546933667083,
"grad_norm": 1.1230757236480713,
"learning_rate": 0.00027227464195450716,
"loss": 0.7946,
"step": 1200
},
{
"epoch": 3.0037546933667083,
"eval_loss": 0.7666952013969421,
"eval_runtime": 97.1256,
"eval_samples_per_second": 13.982,
"eval_steps_per_second": 1.75,
"eval_wer": 0.7946516192345436,
"step": 1200
},
{
"epoch": 3.254067584480601,
"grad_norm": 2.657978057861328,
"learning_rate": 0.00026974726200505473,
"loss": 0.7488,
"step": 1300
},
{
"epoch": 3.254067584480601,
"eval_loss": 0.8179810047149658,
"eval_runtime": 98.614,
"eval_samples_per_second": 13.771,
"eval_steps_per_second": 1.724,
"eval_wer": 0.8495256787700359,
"step": 1300
},
{
"epoch": 3.504380475594493,
"grad_norm": 1.1082779169082642,
"learning_rate": 0.00026721988205560235,
"loss": 0.7428,
"step": 1400
},
{
"epoch": 3.504380475594493,
"eval_loss": 0.7548468708992004,
"eval_runtime": 98.2436,
"eval_samples_per_second": 13.823,
"eval_steps_per_second": 1.73,
"eval_wer": 0.7688092901537456,
"step": 1400
},
{
"epoch": 3.7546933667083855,
"grad_norm": 0.6124172210693359,
"learning_rate": 0.0002646925021061499,
"loss": 0.7256,
"step": 1500
},
{
"epoch": 3.7546933667083855,
"eval_loss": 0.7258334755897522,
"eval_runtime": 98.5216,
"eval_samples_per_second": 13.784,
"eval_steps_per_second": 1.726,
"eval_wer": 0.7595682041216879,
"step": 1500
},
{
"epoch": 4.005006257822278,
"grad_norm": 1.6707249879837036,
"learning_rate": 0.00026216512215669754,
"loss": 0.741,
"step": 1600
},
{
"epoch": 4.005006257822278,
"eval_loss": 0.7665364146232605,
"eval_runtime": 98.0591,
"eval_samples_per_second": 13.849,
"eval_steps_per_second": 1.734,
"eval_wer": 0.7717533529604187,
"step": 1600
},
{
"epoch": 4.25531914893617,
"grad_norm": 1.7184687852859497,
"learning_rate": 0.00025963774220724516,
"loss": 0.6775,
"step": 1700
},
{
"epoch": 4.25531914893617,
"eval_loss": 0.7922295928001404,
"eval_runtime": 97.9024,
"eval_samples_per_second": 13.871,
"eval_steps_per_second": 1.736,
"eval_wer": 0.77747791952895,
"step": 1700
},
{
"epoch": 4.505632040050062,
"grad_norm": 0.8240795135498047,
"learning_rate": 0.00025711036225779273,
"loss": 0.6795,
"step": 1800
},
{
"epoch": 4.505632040050062,
"eval_loss": 0.702553391456604,
"eval_runtime": 96.6836,
"eval_samples_per_second": 14.046,
"eval_steps_per_second": 1.758,
"eval_wer": 0.7512266928361139,
"step": 1800
},
{
"epoch": 4.755944931163955,
"grad_norm": 0.8530161380767822,
"learning_rate": 0.00025458298230834035,
"loss": 0.683,
"step": 1900
},
{
"epoch": 4.755944931163955,
"eval_loss": 0.7051005363464355,
"eval_runtime": 98.456,
"eval_samples_per_second": 13.793,
"eval_steps_per_second": 1.727,
"eval_wer": 0.72252208047105,
"step": 1900
},
{
"epoch": 5.006257822277847,
"grad_norm": 1.205073595046997,
"learning_rate": 0.0002520556023588879,
"loss": 0.6838,
"step": 2000
},
{
"epoch": 5.006257822277847,
"eval_loss": 0.7196346521377563,
"eval_runtime": 98.5975,
"eval_samples_per_second": 13.773,
"eval_steps_per_second": 1.724,
"eval_wer": 0.7503271180896304,
"step": 2000
},
{
"epoch": 5.256570713391739,
"grad_norm": 0.9071811437606812,
"learning_rate": 0.00024952822240943554,
"loss": 0.6005,
"step": 2100
},
{
"epoch": 5.256570713391739,
"eval_loss": 0.7032491564750671,
"eval_runtime": 98.4687,
"eval_samples_per_second": 13.791,
"eval_steps_per_second": 1.726,
"eval_wer": 0.7423945044160942,
"step": 2100
},
{
"epoch": 5.256570713391739,
"step": 2100,
"total_flos": 2.148232424796796e+19,
"train_loss": 1.0403752663021997,
"train_runtime": 6352.2325,
"train_samples_per_second": 30.164,
"train_steps_per_second": 1.884
}
],
"logging_steps": 100,
"max_steps": 11970,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 200,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 2
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.148232424796796e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}