BookwormBizConsultant / trainer_state.json
theoldmandthesea's picture
Updated model
d146f81
raw
history blame
3.07 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.04576659038901602,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.00019292929292929293,
"loss": 2.0259,
"step": 10
},
{
"epoch": 0.0,
"learning_rate": 0.00018282828282828283,
"loss": 1.4961,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 0.00017272727272727275,
"loss": 1.3953,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 0.00016262626262626264,
"loss": 1.3234,
"step": 40
},
{
"epoch": 0.01,
"learning_rate": 0.00015252525252525253,
"loss": 1.4601,
"step": 50
},
{
"epoch": 0.01,
"learning_rate": 0.00014242424242424243,
"loss": 1.3362,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 0.00013232323232323235,
"loss": 1.3905,
"step": 70
},
{
"epoch": 0.02,
"learning_rate": 0.00012222222222222224,
"loss": 1.3132,
"step": 80
},
{
"epoch": 0.02,
"learning_rate": 0.00011212121212121212,
"loss": 1.2767,
"step": 90
},
{
"epoch": 0.02,
"learning_rate": 0.00010202020202020202,
"loss": 1.3303,
"step": 100
},
{
"epoch": 0.03,
"learning_rate": 9.191919191919192e-05,
"loss": 1.3685,
"step": 110
},
{
"epoch": 0.03,
"learning_rate": 8.181818181818183e-05,
"loss": 1.335,
"step": 120
},
{
"epoch": 0.03,
"learning_rate": 7.171717171717171e-05,
"loss": 1.3122,
"step": 130
},
{
"epoch": 0.03,
"learning_rate": 6.161616161616162e-05,
"loss": 1.2693,
"step": 140
},
{
"epoch": 0.03,
"learning_rate": 5.151515151515152e-05,
"loss": 1.2069,
"step": 150
},
{
"epoch": 0.04,
"learning_rate": 4.141414141414142e-05,
"loss": 1.2302,
"step": 160
},
{
"epoch": 0.04,
"learning_rate": 3.131313131313132e-05,
"loss": 1.2451,
"step": 170
},
{
"epoch": 0.04,
"learning_rate": 2.1212121212121215e-05,
"loss": 1.2993,
"step": 180
},
{
"epoch": 0.04,
"learning_rate": 1.1111111111111112e-05,
"loss": 1.2968,
"step": 190
},
{
"epoch": 0.05,
"learning_rate": 1.0101010101010103e-06,
"loss": 1.2567,
"step": 200
},
{
"epoch": 0.05,
"step": 200,
"total_flos": 1428558744158208.0,
"train_loss": 1.3583835124969483,
"train_runtime": 1540.4432,
"train_samples_per_second": 0.519,
"train_steps_per_second": 0.13
}
],
"logging_steps": 10,
"max_steps": 200,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 1428558744158208.0,
"trial_name": null,
"trial_params": null
}