miniCPM_finetune_lora_viet_vqa / trainer_state.json
baohuynhbk14's picture
Model save
539ed93 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 100,
"global_step": 360,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06944444444444445,
"grad_norm": 6.3636603355407715,
"learning_rate": 1e-06,
"loss": 2.6727,
"step": 5
},
{
"epoch": 0.1388888888888889,
"grad_norm": 7.486879825592041,
"learning_rate": 1e-06,
"loss": 2.3642,
"step": 10
},
{
"epoch": 0.20833333333333334,
"grad_norm": 6.5991997718811035,
"learning_rate": 1e-06,
"loss": 2.515,
"step": 15
},
{
"epoch": 0.2777777777777778,
"grad_norm": 7.580630779266357,
"learning_rate": 1e-06,
"loss": 2.3997,
"step": 20
},
{
"epoch": 0.3472222222222222,
"grad_norm": 7.5727410316467285,
"learning_rate": 1e-06,
"loss": 2.4551,
"step": 25
},
{
"epoch": 0.4166666666666667,
"grad_norm": 8.835946083068848,
"learning_rate": 1e-06,
"loss": 2.4476,
"step": 30
},
{
"epoch": 0.4861111111111111,
"grad_norm": 7.495606899261475,
"learning_rate": 1e-06,
"loss": 2.4191,
"step": 35
},
{
"epoch": 0.5555555555555556,
"grad_norm": 8.057035446166992,
"learning_rate": 1e-06,
"loss": 2.441,
"step": 40
},
{
"epoch": 0.625,
"grad_norm": 6.828744411468506,
"learning_rate": 1e-06,
"loss": 2.3052,
"step": 45
},
{
"epoch": 0.6944444444444444,
"grad_norm": 7.163251876831055,
"learning_rate": 1e-06,
"loss": 2.1357,
"step": 50
},
{
"epoch": 0.7638888888888888,
"grad_norm": 5.414941787719727,
"learning_rate": 1e-06,
"loss": 2.2248,
"step": 55
},
{
"epoch": 0.8333333333333334,
"grad_norm": 6.0801544189453125,
"learning_rate": 1e-06,
"loss": 2.2934,
"step": 60
},
{
"epoch": 0.9027777777777778,
"grad_norm": 6.054081439971924,
"learning_rate": 1e-06,
"loss": 2.3014,
"step": 65
},
{
"epoch": 0.9722222222222222,
"grad_norm": 5.827741622924805,
"learning_rate": 1e-06,
"loss": 2.2515,
"step": 70
},
{
"epoch": 1.0416666666666667,
"grad_norm": 3.5676162242889404,
"learning_rate": 1e-06,
"loss": 2.0915,
"step": 75
},
{
"epoch": 1.1111111111111112,
"grad_norm": 5.15900993347168,
"learning_rate": 1e-06,
"loss": 2.0749,
"step": 80
},
{
"epoch": 1.1805555555555556,
"grad_norm": 5.206437110900879,
"learning_rate": 1e-06,
"loss": 2.0539,
"step": 85
},
{
"epoch": 1.25,
"grad_norm": 5.990969657897949,
"learning_rate": 1e-06,
"loss": 2.1308,
"step": 90
},
{
"epoch": 1.3194444444444444,
"grad_norm": 6.198008060455322,
"learning_rate": 1e-06,
"loss": 2.3256,
"step": 95
},
{
"epoch": 1.3888888888888888,
"grad_norm": 5.184628486633301,
"learning_rate": 1e-06,
"loss": 2.1566,
"step": 100
},
{
"epoch": 1.3888888888888888,
"eval_loss": 2.0880796909332275,
"eval_runtime": 34.0667,
"eval_samples_per_second": 2.935,
"eval_steps_per_second": 0.734,
"step": 100
},
{
"epoch": 1.4583333333333333,
"grad_norm": 5.412724494934082,
"learning_rate": 1e-06,
"loss": 1.9085,
"step": 105
},
{
"epoch": 1.5277777777777777,
"grad_norm": 3.459959030151367,
"learning_rate": 1e-06,
"loss": 1.9494,
"step": 110
},
{
"epoch": 1.5972222222222223,
"grad_norm": 5.159445762634277,
"learning_rate": 1e-06,
"loss": 1.9334,
"step": 115
},
{
"epoch": 1.6666666666666665,
"grad_norm": 5.133082389831543,
"learning_rate": 1e-06,
"loss": 2.0826,
"step": 120
},
{
"epoch": 1.7361111111111112,
"grad_norm": 4.473026752471924,
"learning_rate": 1e-06,
"loss": 2.0585,
"step": 125
},
{
"epoch": 1.8055555555555556,
"grad_norm": 5.063863754272461,
"learning_rate": 1e-06,
"loss": 2.1289,
"step": 130
},
{
"epoch": 1.875,
"grad_norm": 4.927737236022949,
"learning_rate": 1e-06,
"loss": 1.9872,
"step": 135
},
{
"epoch": 1.9444444444444444,
"grad_norm": 5.563902854919434,
"learning_rate": 1e-06,
"loss": 1.9803,
"step": 140
},
{
"epoch": 2.013888888888889,
"grad_norm": 3.901442050933838,
"learning_rate": 1e-06,
"loss": 1.8309,
"step": 145
},
{
"epoch": 2.0833333333333335,
"grad_norm": 3.771136999130249,
"learning_rate": 1e-06,
"loss": 1.7758,
"step": 150
},
{
"epoch": 2.1527777777777777,
"grad_norm": 4.6159257888793945,
"learning_rate": 1e-06,
"loss": 1.9193,
"step": 155
},
{
"epoch": 2.2222222222222223,
"grad_norm": 3.758843183517456,
"learning_rate": 1e-06,
"loss": 1.9329,
"step": 160
},
{
"epoch": 2.2916666666666665,
"grad_norm": 4.267579078674316,
"learning_rate": 1e-06,
"loss": 2.0399,
"step": 165
},
{
"epoch": 2.361111111111111,
"grad_norm": 3.9819560050964355,
"learning_rate": 1e-06,
"loss": 1.9568,
"step": 170
},
{
"epoch": 2.4305555555555554,
"grad_norm": 3.8918192386627197,
"learning_rate": 1e-06,
"loss": 1.7377,
"step": 175
},
{
"epoch": 2.5,
"grad_norm": 3.9746928215026855,
"learning_rate": 1e-06,
"loss": 1.8949,
"step": 180
},
{
"epoch": 2.5694444444444446,
"grad_norm": 3.328784704208374,
"learning_rate": 1e-06,
"loss": 1.6509,
"step": 185
},
{
"epoch": 2.638888888888889,
"grad_norm": 3.835324287414551,
"learning_rate": 1e-06,
"loss": 1.8321,
"step": 190
},
{
"epoch": 2.7083333333333335,
"grad_norm": 3.3603885173797607,
"learning_rate": 1e-06,
"loss": 1.8628,
"step": 195
},
{
"epoch": 2.7777777777777777,
"grad_norm": 3.7577502727508545,
"learning_rate": 1e-06,
"loss": 1.8447,
"step": 200
},
{
"epoch": 2.7777777777777777,
"eval_loss": 1.8452154397964478,
"eval_runtime": 34.0911,
"eval_samples_per_second": 2.933,
"eval_steps_per_second": 0.733,
"step": 200
},
{
"epoch": 2.8472222222222223,
"grad_norm": 4.379385948181152,
"learning_rate": 1e-06,
"loss": 1.8212,
"step": 205
},
{
"epoch": 2.9166666666666665,
"grad_norm": 3.7095022201538086,
"learning_rate": 1e-06,
"loss": 1.7862,
"step": 210
},
{
"epoch": 2.986111111111111,
"grad_norm": 4.164438724517822,
"learning_rate": 1e-06,
"loss": 1.8046,
"step": 215
},
{
"epoch": 3.0555555555555554,
"grad_norm": 3.6749582290649414,
"learning_rate": 1e-06,
"loss": 1.6358,
"step": 220
},
{
"epoch": 3.125,
"grad_norm": 3.7247958183288574,
"learning_rate": 1e-06,
"loss": 1.791,
"step": 225
},
{
"epoch": 3.1944444444444446,
"grad_norm": 2.9533472061157227,
"learning_rate": 1e-06,
"loss": 1.6251,
"step": 230
},
{
"epoch": 3.263888888888889,
"grad_norm": 4.062502384185791,
"learning_rate": 1e-06,
"loss": 1.6976,
"step": 235
},
{
"epoch": 3.3333333333333335,
"grad_norm": 4.328882217407227,
"learning_rate": 1e-06,
"loss": 1.8438,
"step": 240
},
{
"epoch": 3.4027777777777777,
"grad_norm": 4.158596038818359,
"learning_rate": 1e-06,
"loss": 1.8998,
"step": 245
},
{
"epoch": 3.4722222222222223,
"grad_norm": 5.7752556800842285,
"learning_rate": 1e-06,
"loss": 1.7517,
"step": 250
},
{
"epoch": 3.5416666666666665,
"grad_norm": 4.568635940551758,
"learning_rate": 1e-06,
"loss": 1.6835,
"step": 255
},
{
"epoch": 3.611111111111111,
"grad_norm": 3.6611974239349365,
"learning_rate": 1e-06,
"loss": 1.7852,
"step": 260
},
{
"epoch": 3.6805555555555554,
"grad_norm": 4.026912212371826,
"learning_rate": 1e-06,
"loss": 1.7916,
"step": 265
},
{
"epoch": 3.75,
"grad_norm": 4.750195026397705,
"learning_rate": 1e-06,
"loss": 1.7584,
"step": 270
},
{
"epoch": 3.8194444444444446,
"grad_norm": 3.936798572540283,
"learning_rate": 1e-06,
"loss": 1.5877,
"step": 275
},
{
"epoch": 3.888888888888889,
"grad_norm": 4.1127800941467285,
"learning_rate": 1e-06,
"loss": 1.5392,
"step": 280
},
{
"epoch": 3.9583333333333335,
"grad_norm": 3.6437580585479736,
"learning_rate": 1e-06,
"loss": 1.6125,
"step": 285
},
{
"epoch": 4.027777777777778,
"grad_norm": 3.641177177429199,
"learning_rate": 1e-06,
"loss": 1.687,
"step": 290
},
{
"epoch": 4.097222222222222,
"grad_norm": 3.797327995300293,
"learning_rate": 1e-06,
"loss": 1.7779,
"step": 295
},
{
"epoch": 4.166666666666667,
"grad_norm": 5.071943283081055,
"learning_rate": 1e-06,
"loss": 1.7103,
"step": 300
},
{
"epoch": 4.166666666666667,
"eval_loss": 1.6850143671035767,
"eval_runtime": 34.4694,
"eval_samples_per_second": 2.901,
"eval_steps_per_second": 0.725,
"step": 300
},
{
"epoch": 4.236111111111111,
"grad_norm": 6.09140682220459,
"learning_rate": 1e-06,
"loss": 1.6347,
"step": 305
},
{
"epoch": 4.305555555555555,
"grad_norm": 5.452902317047119,
"learning_rate": 1e-06,
"loss": 1.7689,
"step": 310
},
{
"epoch": 4.375,
"grad_norm": 3.5834009647369385,
"learning_rate": 1e-06,
"loss": 1.6514,
"step": 315
},
{
"epoch": 4.444444444444445,
"grad_norm": 3.288220167160034,
"learning_rate": 1e-06,
"loss": 1.4941,
"step": 320
},
{
"epoch": 4.513888888888889,
"grad_norm": 4.202756404876709,
"learning_rate": 1e-06,
"loss": 1.5374,
"step": 325
},
{
"epoch": 4.583333333333333,
"grad_norm": 3.9757556915283203,
"learning_rate": 1e-06,
"loss": 1.6289,
"step": 330
},
{
"epoch": 4.652777777777778,
"grad_norm": 3.3575947284698486,
"learning_rate": 1e-06,
"loss": 1.5446,
"step": 335
},
{
"epoch": 4.722222222222222,
"grad_norm": 4.207667350769043,
"learning_rate": 1e-06,
"loss": 1.5668,
"step": 340
},
{
"epoch": 4.791666666666667,
"grad_norm": 3.2263221740722656,
"learning_rate": 1e-06,
"loss": 1.4529,
"step": 345
},
{
"epoch": 4.861111111111111,
"grad_norm": 3.272395610809326,
"learning_rate": 1e-06,
"loss": 1.5215,
"step": 350
},
{
"epoch": 4.930555555555555,
"grad_norm": 3.4315106868743896,
"learning_rate": 1e-06,
"loss": 1.5781,
"step": 355
},
{
"epoch": 5.0,
"grad_norm": 3.9581406116485596,
"learning_rate": 1e-06,
"loss": 1.5001,
"step": 360
},
{
"epoch": 5.0,
"step": 360,
"total_flos": 2.3797808143060173e+17,
"train_loss": 1.9143991947174073,
"train_runtime": 6464.4185,
"train_samples_per_second": 0.891,
"train_steps_per_second": 0.056
}
],
"logging_steps": 5,
"max_steps": 360,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 100,
"total_flos": 2.3797808143060173e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}