bge-tuned / trainer_state.json
minh132's picture
Upload folder using huggingface_hub
0acac25 verified
raw
history blame
16.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.6447368421052633,
"eval_steps": 500,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01644736842105263,
"grad_norm": NaN,
"learning_rate": 1e-05,
"loss": 0.7345,
"step": 10
},
{
"epoch": 0.03289473684210526,
"grad_norm": NaN,
"learning_rate": 1e-05,
"loss": 0.5846,
"step": 20
},
{
"epoch": 0.049342105263157895,
"grad_norm": 56.480621337890625,
"learning_rate": 9.998355263157894e-06,
"loss": 0.8864,
"step": 30
},
{
"epoch": 0.06578947368421052,
"grad_norm": NaN,
"learning_rate": 9.998355263157894e-06,
"loss": 0.4863,
"step": 40
},
{
"epoch": 0.08223684210526316,
"grad_norm": NaN,
"learning_rate": 9.99671052631579e-06,
"loss": 0.4998,
"step": 50
},
{
"epoch": 0.09868421052631579,
"grad_norm": NaN,
"learning_rate": 9.99671052631579e-06,
"loss": 0.7638,
"step": 60
},
{
"epoch": 0.11513157894736842,
"grad_norm": NaN,
"learning_rate": 9.99671052631579e-06,
"loss": 0.561,
"step": 70
},
{
"epoch": 0.13157894736842105,
"grad_norm": NaN,
"learning_rate": 9.99671052631579e-06,
"loss": 0.5553,
"step": 80
},
{
"epoch": 0.14802631578947367,
"grad_norm": NaN,
"learning_rate": 9.99671052631579e-06,
"loss": 0.4243,
"step": 90
},
{
"epoch": 0.16447368421052633,
"grad_norm": NaN,
"learning_rate": 9.99671052631579e-06,
"loss": 0.5658,
"step": 100
},
{
"epoch": 0.18092105263157895,
"grad_norm": NaN,
"learning_rate": 9.99671052631579e-06,
"loss": 0.6598,
"step": 110
},
{
"epoch": 0.19736842105263158,
"grad_norm": NaN,
"learning_rate": 9.99671052631579e-06,
"loss": 0.6398,
"step": 120
},
{
"epoch": 0.2138157894736842,
"grad_norm": NaN,
"learning_rate": 9.99671052631579e-06,
"loss": 0.5054,
"step": 130
},
{
"epoch": 0.23026315789473684,
"grad_norm": NaN,
"learning_rate": 9.99671052631579e-06,
"loss": 0.5486,
"step": 140
},
{
"epoch": 0.24671052631578946,
"grad_norm": NaN,
"learning_rate": 9.99342105263158e-06,
"loss": 0.361,
"step": 150
},
{
"epoch": 0.2631578947368421,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.6621,
"step": 160
},
{
"epoch": 0.27960526315789475,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 170
},
{
"epoch": 0.29605263157894735,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 180
},
{
"epoch": 0.3125,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 190
},
{
"epoch": 0.32894736842105265,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 200
},
{
"epoch": 0.34539473684210525,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 210
},
{
"epoch": 0.3618421052631579,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 220
},
{
"epoch": 0.3782894736842105,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 230
},
{
"epoch": 0.39473684210526316,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 240
},
{
"epoch": 0.41118421052631576,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 250
},
{
"epoch": 0.4276315789473684,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 260
},
{
"epoch": 0.4440789473684211,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 270
},
{
"epoch": 0.4605263157894737,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 280
},
{
"epoch": 0.4769736842105263,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 290
},
{
"epoch": 0.4934210526315789,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 300
},
{
"epoch": 0.5098684210526315,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 310
},
{
"epoch": 0.5263157894736842,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 320
},
{
"epoch": 0.5427631578947368,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 330
},
{
"epoch": 0.5592105263157895,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 340
},
{
"epoch": 0.5756578947368421,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 350
},
{
"epoch": 0.5921052631578947,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 360
},
{
"epoch": 0.6085526315789473,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 370
},
{
"epoch": 0.625,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 380
},
{
"epoch": 0.6414473684210527,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 390
},
{
"epoch": 0.6578947368421053,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 400
},
{
"epoch": 0.6743421052631579,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 410
},
{
"epoch": 0.6907894736842105,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 420
},
{
"epoch": 0.7072368421052632,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 430
},
{
"epoch": 0.7236842105263158,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 440
},
{
"epoch": 0.7401315789473685,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 450
},
{
"epoch": 0.756578947368421,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 460
},
{
"epoch": 0.7730263157894737,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 470
},
{
"epoch": 0.7894736842105263,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 480
},
{
"epoch": 0.805921052631579,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 490
},
{
"epoch": 0.8223684210526315,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 500
},
{
"epoch": 0.8388157894736842,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 510
},
{
"epoch": 0.8552631578947368,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 520
},
{
"epoch": 0.8717105263157895,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 530
},
{
"epoch": 0.8881578947368421,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 540
},
{
"epoch": 0.9046052631578947,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 550
},
{
"epoch": 0.9210526315789473,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 560
},
{
"epoch": 0.9375,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 570
},
{
"epoch": 0.9539473684210527,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 580
},
{
"epoch": 0.9703947368421053,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 590
},
{
"epoch": 0.9868421052631579,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 600
},
{
"epoch": 1.0032894736842106,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 610
},
{
"epoch": 1.019736842105263,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 620
},
{
"epoch": 1.0361842105263157,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 630
},
{
"epoch": 1.0526315789473684,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 640
},
{
"epoch": 1.069078947368421,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 650
},
{
"epoch": 1.0855263157894737,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 660
},
{
"epoch": 1.1019736842105263,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 670
},
{
"epoch": 1.118421052631579,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 680
},
{
"epoch": 1.1348684210526316,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 690
},
{
"epoch": 1.1513157894736843,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 700
},
{
"epoch": 1.1677631578947367,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 710
},
{
"epoch": 1.1842105263157894,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 720
},
{
"epoch": 1.200657894736842,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 730
},
{
"epoch": 1.2171052631578947,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 740
},
{
"epoch": 1.2335526315789473,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 750
},
{
"epoch": 1.25,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 760
},
{
"epoch": 1.2664473684210527,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 770
},
{
"epoch": 1.2828947368421053,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 780
},
{
"epoch": 1.299342105263158,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 790
},
{
"epoch": 1.3157894736842106,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 800
},
{
"epoch": 1.3322368421052633,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 810
},
{
"epoch": 1.3486842105263157,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 820
},
{
"epoch": 1.3651315789473684,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 830
},
{
"epoch": 1.381578947368421,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 840
},
{
"epoch": 1.3980263157894737,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 850
},
{
"epoch": 1.4144736842105263,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 860
},
{
"epoch": 1.430921052631579,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 870
},
{
"epoch": 1.4473684210526316,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 880
},
{
"epoch": 1.4638157894736843,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 890
},
{
"epoch": 1.4802631578947367,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 900
},
{
"epoch": 1.4967105263157894,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 910
},
{
"epoch": 1.513157894736842,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 920
},
{
"epoch": 1.5296052631578947,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 930
},
{
"epoch": 1.5460526315789473,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 940
},
{
"epoch": 1.5625,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 950
},
{
"epoch": 1.5789473684210527,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 960
},
{
"epoch": 1.5953947368421053,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 970
},
{
"epoch": 1.611842105263158,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 980
},
{
"epoch": 1.6282894736842106,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 990
},
{
"epoch": 1.6447368421052633,
"grad_norm": NaN,
"learning_rate": 9.991776315789474e-06,
"loss": 0.0,
"step": 1000
}
],
"logging_steps": 10,
"max_steps": 6080,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}