mf-doom / trainer_state.json
AlekseyKorshuk's picture
huggingartists
f51c1b1
{
"best_metric": 4.393705368041992,
"best_model_checkpoint": "output/mf-doom/checkpoint-193",
"epoch": 1.0,
"global_step": 193,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 0.00013697291967368176,
"loss": 4.7696,
"step": 5
},
{
"epoch": 0.05,
"learning_rate": 0.0001362931820613334,
"loss": 4.9375,
"step": 10
},
{
"epoch": 0.08,
"learning_rate": 0.00013516528730985944,
"loss": 4.6377,
"step": 15
},
{
"epoch": 0.1,
"learning_rate": 0.00013359670255361022,
"loss": 4.7907,
"step": 20
},
{
"epoch": 0.13,
"learning_rate": 0.00013159781247883373,
"loss": 4.8492,
"step": 25
},
{
"epoch": 0.16,
"learning_rate": 0.00012918185057271526,
"loss": 4.698,
"step": 30
},
{
"epoch": 0.18,
"learning_rate": 0.00012636481151216485,
"loss": 4.6354,
"step": 35
},
{
"epoch": 0.21,
"learning_rate": 0.0001231653452723755,
"loss": 4.7479,
"step": 40
},
{
"epoch": 0.23,
"learning_rate": 0.00011960463365619783,
"loss": 4.6099,
"step": 45
},
{
"epoch": 0.26,
"learning_rate": 0.000115706250061758,
"loss": 4.4771,
"step": 50
},
{
"epoch": 0.28,
"learning_rate": 0.000111496003416716,
"loss": 4.6176,
"step": 55
},
{
"epoch": 0.31,
"learning_rate": 0.00010700176731238488,
"loss": 4.5517,
"step": 60
},
{
"epoch": 0.34,
"learning_rate": 0.00010225329546891398,
"loss": 4.6173,
"step": 65
},
{
"epoch": 0.36,
"learning_rate": 9.728202475323418e-05,
"loss": 4.5918,
"step": 70
},
{
"epoch": 0.39,
"learning_rate": 9.21208670538683e-05,
"loss": 4.4796,
"step": 75
},
{
"epoch": 0.41,
"learning_rate": 8.680399139048255e-05,
"loss": 4.5046,
"step": 80
},
{
"epoch": 0.44,
"learning_rate": 8.136659770070481e-05,
"loss": 4.5786,
"step": 85
},
{
"epoch": 0.47,
"learning_rate": 7.584468380183525e-05,
"loss": 4.5974,
"step": 90
},
{
"epoch": 0.49,
"learning_rate": 7.027480707026031e-05,
"loss": 4.4949,
"step": 95
},
{
"epoch": 0.52,
"learning_rate": 6.469384241635202e-05,
"loss": 4.5468,
"step": 100
},
{
"epoch": 0.54,
"learning_rate": 5.9138738157159314e-05,
"loss": 4.2671,
"step": 105
},
{
"epoch": 0.57,
"learning_rate": 5.364627140311692e-05,
"loss": 4.5739,
"step": 110
},
{
"epoch": 0.6,
"learning_rate": 4.825280457821314e-05,
"loss": 4.4114,
"step": 115
},
{
"epoch": 0.62,
"learning_rate": 4.299404468555545e-05,
"loss": 4.403,
"step": 120
},
{
"epoch": 0.65,
"learning_rate": 3.7904806912095865e-05,
"loss": 4.5395,
"step": 125
},
{
"epoch": 0.67,
"learning_rate": 3.30187841375518e-05,
"loss": 4.478,
"step": 130
},
{
"epoch": 0.7,
"learning_rate": 2.8368323873469696e-05,
"loss": 4.5482,
"step": 135
},
{
"epoch": 0.73,
"learning_rate": 2.39842141091877e-05,
"loss": 4.5102,
"step": 140
},
{
"epoch": 0.75,
"learning_rate": 1.9895479482486794e-05,
"loss": 4.3508,
"step": 145
},
{
"epoch": 0.78,
"learning_rate": 1.61291891243658e-05,
"loss": 4.3784,
"step": 150
},
{
"epoch": 0.8,
"learning_rate": 1.2710277450088041e-05,
"loss": 4.5042,
"step": 155
},
{
"epoch": 0.83,
"learning_rate": 9.661379082937688e-06,
"loss": 4.3445,
"step": 160
},
{
"epoch": 0.85,
"learning_rate": 7.002679003559785e-06,
"loss": 4.5091,
"step": 165
},
{
"epoch": 0.88,
"learning_rate": 4.7517789169574735e-06,
"loss": 4.4888,
"step": 170
},
{
"epoch": 0.91,
"learning_rate": 2.9235807218530457e-06,
"loss": 4.354,
"step": 175
},
{
"epoch": 0.93,
"learning_rate": 1.5301878538942795e-06,
"loss": 4.3435,
"step": 180
},
{
"epoch": 0.96,
"learning_rate": 5.808251558551173e-07,
"loss": 4.3824,
"step": 185
},
{
"epoch": 0.98,
"learning_rate": 8.177780532344056e-08,
"loss": 4.4941,
"step": 190
},
{
"epoch": 1.0,
"eval_loss": 4.393705368041992,
"eval_runtime": 13.4209,
"eval_samples_per_second": 22.428,
"eval_steps_per_second": 2.831,
"step": 193
}
],
"max_steps": 193,
"num_train_epochs": 1,
"total_flos": 201586802688000.0,
"trial_name": null,
"trial_params": null
}