rinko_300_labeling / trainer_state.json
ikno's picture
Model save
b7b1651 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.948453608247423,
"eval_steps": 500,
"global_step": 240,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.020618556701030927,
"grad_norm": 1.546875,
"learning_rate": 8.333333333333333e-08,
"loss": 2.5196,
"step": 1
},
{
"epoch": 0.10309278350515463,
"grad_norm": 1.5390625,
"learning_rate": 4.1666666666666667e-07,
"loss": 2.458,
"step": 5
},
{
"epoch": 0.20618556701030927,
"grad_norm": 1.4375,
"learning_rate": 8.333333333333333e-07,
"loss": 2.4466,
"step": 10
},
{
"epoch": 0.30927835051546393,
"grad_norm": 1.515625,
"learning_rate": 1.2499999999999999e-06,
"loss": 2.4248,
"step": 15
},
{
"epoch": 0.41237113402061853,
"grad_norm": 1.6171875,
"learning_rate": 1.6666666666666667e-06,
"loss": 2.4622,
"step": 20
},
{
"epoch": 0.5154639175257731,
"grad_norm": 1.4921875,
"learning_rate": 1.9998942319271077e-06,
"loss": 2.4147,
"step": 25
},
{
"epoch": 0.6185567010309279,
"grad_norm": 1.46875,
"learning_rate": 1.9961946980917456e-06,
"loss": 2.3747,
"step": 30
},
{
"epoch": 0.7216494845360825,
"grad_norm": 1.453125,
"learning_rate": 1.987229113117374e-06,
"loss": 2.4189,
"step": 35
},
{
"epoch": 0.8247422680412371,
"grad_norm": 1.578125,
"learning_rate": 1.9730448705798236e-06,
"loss": 2.4126,
"step": 40
},
{
"epoch": 0.9278350515463918,
"grad_norm": 1.4765625,
"learning_rate": 1.953716950748227e-06,
"loss": 2.3912,
"step": 45
},
{
"epoch": 0.9896907216494846,
"eval_loss": 2.246396064758301,
"eval_runtime": 1335.3788,
"eval_samples_per_second": 4.423,
"eval_steps_per_second": 0.553,
"step": 48
},
{
"epoch": 1.0309278350515463,
"grad_norm": 1.4453125,
"learning_rate": 1.929347524226822e-06,
"loss": 2.3653,
"step": 50
},
{
"epoch": 1.134020618556701,
"grad_norm": 1.546875,
"learning_rate": 1.900065411864121e-06,
"loss": 2.3567,
"step": 55
},
{
"epoch": 1.2371134020618557,
"grad_norm": 1.5,
"learning_rate": 1.8660254037844386e-06,
"loss": 2.3555,
"step": 60
},
{
"epoch": 1.3402061855670104,
"grad_norm": 1.6015625,
"learning_rate": 1.8274074411415103e-06,
"loss": 2.2988,
"step": 65
},
{
"epoch": 1.443298969072165,
"grad_norm": 1.53125,
"learning_rate": 1.7844156649195757e-06,
"loss": 2.288,
"step": 70
},
{
"epoch": 1.5463917525773194,
"grad_norm": 1.3671875,
"learning_rate": 1.737277336810124e-06,
"loss": 2.2871,
"step": 75
},
{
"epoch": 1.6494845360824741,
"grad_norm": 1.59375,
"learning_rate": 1.6862416378687337e-06,
"loss": 2.2903,
"step": 80
},
{
"epoch": 1.7525773195876289,
"grad_norm": 1.484375,
"learning_rate": 1.6315783513024974e-06,
"loss": 2.2465,
"step": 85
},
{
"epoch": 1.8556701030927836,
"grad_norm": 1.484375,
"learning_rate": 1.573576436351046e-06,
"loss": 2.2434,
"step": 90
},
{
"epoch": 1.9587628865979383,
"grad_norm": 1.421875,
"learning_rate": 1.5125425007998652e-06,
"loss": 2.2442,
"step": 95
},
{
"epoch": 2.0,
"eval_loss": 2.1167430877685547,
"eval_runtime": 1327.822,
"eval_samples_per_second": 4.449,
"eval_steps_per_second": 0.557,
"step": 97
},
{
"epoch": 2.0618556701030926,
"grad_norm": 1.4765625,
"learning_rate": 1.4487991802004622e-06,
"loss": 2.2931,
"step": 100
},
{
"epoch": 2.1649484536082473,
"grad_norm": 1.390625,
"learning_rate": 1.3826834323650898e-06,
"loss": 2.2062,
"step": 105
},
{
"epoch": 2.268041237113402,
"grad_norm": 1.4921875,
"learning_rate": 1.3145447561516136e-06,
"loss": 2.1501,
"step": 110
},
{
"epoch": 2.3711340206185567,
"grad_norm": 1.1875,
"learning_rate": 1.2447433439543238e-06,
"loss": 2.1248,
"step": 115
},
{
"epoch": 2.4742268041237114,
"grad_norm": 1.3984375,
"learning_rate": 1.1736481776669305e-06,
"loss": 2.1624,
"step": 120
},
{
"epoch": 2.5773195876288657,
"grad_norm": 1.2265625,
"learning_rate": 1.101635078182802e-06,
"loss": 2.1479,
"step": 125
},
{
"epoch": 2.680412371134021,
"grad_norm": 1.3359375,
"learning_rate": 1.0290847187431114e-06,
"loss": 2.1655,
"step": 130
},
{
"epoch": 2.783505154639175,
"grad_norm": 1.3203125,
"learning_rate": 9.56380612634664e-07,
"loss": 2.1369,
"step": 135
},
{
"epoch": 2.88659793814433,
"grad_norm": 1.28125,
"learning_rate": 8.839070858747696e-07,
"loss": 2.1003,
"step": 140
},
{
"epoch": 2.9896907216494846,
"grad_norm": 1.2109375,
"learning_rate": 8.120472455998881e-07,
"loss": 2.1047,
"step": 145
},
{
"epoch": 2.9896907216494846,
"eval_loss": 2.0316832065582275,
"eval_runtime": 1327.2975,
"eval_samples_per_second": 4.45,
"eval_steps_per_second": 0.557,
"step": 145
},
{
"epoch": 3.0927835051546393,
"grad_norm": 1.25,
"learning_rate": 7.411809548974791e-07,
"loss": 2.0675,
"step": 150
},
{
"epoch": 3.195876288659794,
"grad_norm": 1.3125,
"learning_rate": 6.71682824786439e-07,
"loss": 2.0934,
"step": 155
},
{
"epoch": 3.2989690721649483,
"grad_norm": 1.0234375,
"learning_rate": 6.039202339608431e-07,
"loss": 2.081,
"step": 160
},
{
"epoch": 3.402061855670103,
"grad_norm": 1.0625,
"learning_rate": 5.382513867649663e-07,
"loss": 2.0219,
"step": 165
},
{
"epoch": 3.5051546391752577,
"grad_norm": 1.3203125,
"learning_rate": 4.750234196654399e-07,
"loss": 2.0877,
"step": 170
},
{
"epoch": 3.6082474226804124,
"grad_norm": 1.2890625,
"learning_rate": 4.1457056623005947e-07,
"loss": 2.1019,
"step": 175
},
{
"epoch": 3.711340206185567,
"grad_norm": 1.1328125,
"learning_rate": 3.5721239031346063e-07,
"loss": 2.0828,
"step": 180
},
{
"epoch": 3.8144329896907214,
"grad_norm": 1.2421875,
"learning_rate": 3.032520967893453e-07,
"loss": 2.09,
"step": 185
},
{
"epoch": 3.917525773195876,
"grad_norm": 1.3125,
"learning_rate": 2.5297492875900415e-07,
"loss": 2.05,
"step": 190
},
{
"epoch": 4.0,
"eval_loss": 2.0067081451416016,
"eval_runtime": 1327.277,
"eval_samples_per_second": 4.45,
"eval_steps_per_second": 0.557,
"step": 194
},
{
"epoch": 4.020618556701031,
"grad_norm": 1.1796875,
"learning_rate": 2.0664665970876495e-07,
"loss": 2.1246,
"step": 195
},
{
"epoch": 4.123711340206185,
"grad_norm": 1.1796875,
"learning_rate": 1.6451218858706372e-07,
"loss": 2.0933,
"step": 200
},
{
"epoch": 4.22680412371134,
"grad_norm": 1.1328125,
"learning_rate": 1.2679424522780425e-07,
"loss": 2.0561,
"step": 205
},
{
"epoch": 4.329896907216495,
"grad_norm": 1.1640625,
"learning_rate": 9.369221296335006e-08,
"loss": 2.0946,
"step": 210
},
{
"epoch": 4.43298969072165,
"grad_norm": 1.125,
"learning_rate": 6.538107465101162e-08,
"loss": 2.0797,
"step": 215
},
{
"epoch": 4.536082474226804,
"grad_norm": 1.125,
"learning_rate": 4.20104876845111e-08,
"loss": 2.0907,
"step": 220
},
{
"epoch": 4.639175257731958,
"grad_norm": 1.171875,
"learning_rate": 2.3703992880066636e-08,
"loss": 2.1295,
"step": 225
},
{
"epoch": 4.742268041237113,
"grad_norm": 1.1640625,
"learning_rate": 1.0558361419055529e-08,
"loss": 2.0247,
"step": 230
},
{
"epoch": 4.845360824742268,
"grad_norm": 1.046875,
"learning_rate": 2.643083299427751e-09,
"loss": 2.0658,
"step": 235
},
{
"epoch": 4.948453608247423,
"grad_norm": 1.046875,
"learning_rate": 0.0,
"loss": 2.0626,
"step": 240
},
{
"epoch": 4.948453608247423,
"eval_loss": 2.0068044662475586,
"eval_runtime": 1327.4294,
"eval_samples_per_second": 4.45,
"eval_steps_per_second": 0.557,
"step": 240
},
{
"epoch": 4.948453608247423,
"step": 240,
"total_flos": 1.0715672433026662e+17,
"train_loss": 2.205865615606308,
"train_runtime": 7479.3037,
"train_samples_per_second": 0.257,
"train_steps_per_second": 0.032
}
],
"logging_steps": 5,
"max_steps": 240,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 1,
"total_flos": 1.0715672433026662e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}